Merge "ART: Add HADDPS/HADDPD/SHUFPS/SHUFPD instruction generation"
diff --git a/Android.mk b/Android.mk
index 7d31382..aef731f 100644
--- a/Android.mk
+++ b/Android.mk
@@ -40,8 +40,6 @@
 
 .PHONY: clean-oat-host
 clean-oat-host:
-	rm -rf $(ART_NATIVETEST_OUT)
-	rm -rf $(ART_TEST_OUT)
 	rm -f $(HOST_CORE_IMG_OUT)
 	rm -f $(HOST_CORE_OAT_OUT)
 	rm -f $(HOST_OUT_JAVA_LIBRARIES)/$(ART_HOST_ARCH)/*.odex
@@ -58,7 +56,10 @@
 endif
 	rm -rf $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)
 	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.odex
-	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.oat
+	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*/*.oat
+	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*/*.art
+	rm -f $(TARGET_OUT)/framework/*/*.oat
+	rm -f $(TARGET_OUT)/framework/*/*.art
 	rm -f $(TARGET_OUT_APPS)/*.odex
 	rm -f $(TARGET_OUT_INTERMEDIATES)/JAVA_LIBRARIES/*_intermediates/javalib.odex
 	rm -f $(TARGET_OUT_INTERMEDIATES)/APPS/*_intermediates/*.odex
@@ -108,7 +109,7 @@
 ART_HOST_DEPENDENCIES := \
 	$(ART_HOST_EXECUTABLES) \
 	$(HOST_OUT_JAVA_LIBRARIES)/core-libart-hostdex.jar \
-	$(HOST_LIBRARY_PATH)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
+	$(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
 ART_TARGET_DEPENDENCIES := \
 	$(ART_TARGET_EXECUTABLES) \
 	$(TARGET_OUT_JAVA_LIBRARIES)/core-libart.jar \
@@ -133,7 +134,6 @@
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
 	adb remount
 	adb sync
-	adb shell mkdir -p $(ART_TARGET_TEST_DIR)
 
 # Undefine variable now its served its purpose.
 TEST_ART_TARGET_SYNC_DEPS :=
@@ -355,18 +355,18 @@
 build-art: build-art-host build-art-target
 
 .PHONY: build-art-host
-build-art-host:   $(ART_HOST_EXECUTABLES)   $(ART_HOST_GTEST_EXECUTABLES)   $(HOST_CORE_IMG_OUT)   $(HOST_OUT)/lib/libjavacore.so
+build-art-host:   $(ART_HOST_EXECUTABLES)   $(ART_HOST_GTEST_EXECUTABLES)   $(HOST_CORE_IMG_OUT)   $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION)
 
 .PHONY: build-art-target
-build-art-target: $(ART_TARGET_EXECUTABLES) $(ART_TARGET_GTEST_EXECUTABLES) $(TARGET_CORE_IMG_OUT) $(TARGET_OUT)/lib/libjavacore.so
+build-art-target: $(ART_TARGET_EXECUTABLES) $(ART_TARGET_GTEST_EXECUTABLES) $(TARGET_CORE_IMG_OUT) $(TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
 
 ########################################################################
 # "m art-host" for just building the files needed to run the art script
 .PHONY: art-host
 ifeq ($(HOST_PREFER_32_BIT),true)
-art-host:   $(HOST_OUT_EXECUTABLES)/art $(HOST_OUT)/bin/dalvikvm32 $(HOST_OUT)/lib/libart.so $(HOST_OUT)/bin/dex2oat $(HOST_CORE_IMG_OUT) $(HOST_OUT)/lib/libjavacore.so
+art-host:   $(HOST_OUT_EXECUTABLES)/art $(HOST_OUT)/bin/dalvikvm32 $(HOST_OUT)/lib/libart.so $(HOST_OUT)/bin/dex2oat $(HOST_CORE_IMG_OUT) $(HOST_OUT)/lib/libjavacore.so $(HOST_OUT)/bin/dalvikvm
 else
-art-host:   $(HOST_OUT_EXECUTABLES)/art $(HOST_OUT)/bin/dalvikvm64 $(HOST_OUT)/bin/dalvikvm32 $(HOST_OUT)/lib/libart.so $(HOST_OUT)/bin/dex2oat $(HOST_CORE_IMG_OUT) $(HOST_OUT)/lib/libjavacore.so
+art-host:   $(HOST_OUT_EXECUTABLES)/art $(HOST_OUT)/bin/dalvikvm64 $(HOST_OUT)/bin/dalvikvm32 $(HOST_OUT)/lib/libart.so $(HOST_OUT)/bin/dex2oat $(HOST_CORE_IMG_OUT) $(HOST_OUT)/lib/libjavacore.so $(HOST_OUT)/lib64/libjavacore.so $(HOST_OUT)/bin/dalvikvm
 endif
 
 .PHONY: art-host-debug
@@ -400,7 +400,7 @@
 use-art-full:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
+	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags ""
 	adb shell setprop dalvik.vm.image-dex2oat-flags ""
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
@@ -410,7 +410,7 @@
 use-artd-full:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
+	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags ""
 	adb shell setprop dalvik.vm.image-dex2oat-flags ""
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
@@ -420,7 +420,7 @@
 use-art-smart:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
+	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-flags ""
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
@@ -430,17 +430,27 @@
 use-art-interpret-only:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
+	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=interpret-only"
 	adb shell setprop dalvik.vm.image-dex2oat-flags "--compiler-filter=interpret-only"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
 	adb shell start
 
+.PHONY: use-artd-interpret-only
+use-artd-interpret-only:
+	adb root && sleep 3
+	adb shell stop
+	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
+	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=interpret-only"
+	adb shell setprop dalvik.vm.image-dex2oat-flags "--compiler-filter=interpret-only"
+	adb shell setprop persist.sys.dalvik.vm.lib.2 libartd.so
+	adb shell start
+
 .PHONY: use-art-verify-none
 use-art-verify-none:
 	adb root && sleep 3
 	adb shell stop
-	adb shell rm -rf $(ART_DALVIK_CACHE_DIR)/*
+	adb shell rm -rf $(ART_TARGET_DALVIK_CACHE_DIR)/*
 	adb shell setprop dalvik.vm.dex2oat-flags "--compiler-filter=verify-none"
 	adb shell setprop dalvik.vm.image-dex2oat-flags "--compiler-filter=verify-none"
 	adb shell setprop persist.sys.dalvik.vm.lib.2 libart.so
diff --git a/NOTICE b/NOTICE
index faed58a..d27f6a6 100644
--- a/NOTICE
+++ b/NOTICE
@@ -188,3 +188,79 @@
 
    END OF TERMS AND CONDITIONS
 
+-------------------------------------------------------------------
+
+For art/runtime/elf.h derived from external/llvm/include/llvm/Support/ELF.h
+
+==============================================================================
+LLVM Release License
+==============================================================================
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.
+All rights reserved.
+
+Developed by:
+
+    LLVM Team
+
+    University of Illinois at Urbana-Champaign
+
+    http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the names of the LLVM Team, University of Illinois at
+      Urbana-Champaign, nor the names of its contributors may be used to
+      endorse or promote products derived from this Software without specific
+      prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+Copyrights and Licenses for Third Party Software Distributed with LLVM:
+==============================================================================
+The LLVM software contains code written by third parties.  Such software will
+have its own individual LICENSE.TXT file in the directory in which it appears.
+This file will describe the copyrights, license, and restrictions which apply
+to that code.
+
+The disclaimer of warranty in the University of Illinois Open Source License
+applies to all code in the LLVM Distribution, and nothing in any of the
+other licenses gives permission to use the names of the LLVM Team or the
+University of Illinois to endorse or promote products derived from this
+Software.
+
+The following pieces of software have additional or alternate copyrights,
+licenses, and/or restrictions:
+
+Program             Directory
+-------             ---------
+Autoconf            llvm/autoconf
+                    llvm/projects/ModuleMaker/autoconf
+Google Test         llvm/utils/unittest/googletest
+OpenBSD regex       llvm/lib/Support/{reg*, COPYRIGHT.regex}
+pyyaml tests        llvm/test/YAMLParser/{*.data, LICENSE.TXT}
+ARM contributions   llvm/lib/Target/ARM/LICENSE.TXT
+md5 contributions   llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h
+
+-------------------------------------------------------------------
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 150b404..39a734d 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -59,7 +59,6 @@
   2ND_ART_HOST_ARCH :=
   2ND_HOST_ARCH :=
   ART_HOST_LIBRARY_PATH := $(HOST_LIBRARY_PATH)
-  2ND_ART_HOST_LIBRARY_PATH :=
   ART_HOST_OUT_SHARED_LIBRARIES := $(2ND_HOST_OUT_SHARED_LIBRARIES)
   2ND_ART_HOST_OUT_SHARED_LIBRARIES :=
 else
@@ -71,7 +70,6 @@
   2ND_ART_HOST_ARCH := x86
   2ND_HOST_ARCH := x86
   ART_HOST_LIBRARY_PATH := $(HOST_LIBRARY_PATH)
-  2ND_ART_HOST_LIBRARY_PATH := $(HOST_LIBRARY_PATH)32
   ART_HOST_OUT_SHARED_LIBRARIES := $(HOST_OUT_SHARED_LIBRARIES)
   2ND_ART_HOST_OUT_SHARED_LIBRARIES := $(2ND_HOST_OUT_SHARED_LIBRARIES)
 endif
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 21a8931..542e888 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -21,11 +21,13 @@
 
 # List of known broken tests that we won't attempt to execute. The test name must be the full
 # rule name such as test-art-host-oat-optimizing-HelloWorld64.
-ART_TEST_KNOWN_BROKEN :=
+ART_TEST_KNOWN_BROKEN := \
+  test-art-host-oat-optimizing-SignalTest64 \
+  test-art-host-oat-optimizing-SignalTest32
 
-# List of known failing tests that when executed won't cause test execution to finish. The test name
-# must be the full rule name such as test-art-host-oat-optimizing-HelloWorld64.
-ART_TEST_KNOWN_FAILING := $(ART_TEST_KNOWN_BROKEN)
+# List of known failing tests that when executed won't cause test execution to not finish.
+# The test name must be the full rule name such as test-art-host-oat-optimizing-HelloWorld64.
+ART_TEST_KNOWN_FAILING :=
 
 # Keep going after encountering a test failure?
 ART_TEST_KEEP_GOING ?= false
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index 7abf863..79f8f5e 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -18,7 +18,7 @@
 
 ART_CPPLINT := art/tools/cpplint.py
 ART_CPPLINT_FILTER := --filter=-whitespace/line_length,-build/include,-readability/function,-readability/streams,-readability/todo,-runtime/references,-runtime/sizeof,-runtime/threadsafe_fn,-runtime/printf
-ART_CPPLINT_SRC := $(shell find art -name "*.h" -o -name "*$(ART_CPP_EXTENSION)" | grep -v art/compiler/llvm/generated/)
+ART_CPPLINT_SRC := $(shell find art -name "*.h" -o -name "*$(ART_CPP_EXTENSION)" | grep -v art/compiler/llvm/generated/ | grep -v art/runtime/elf\.h)
 
 # "mm cpplint-art" to verify we aren't regressing
 .PHONY: cpplint-art
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 69d6c5a..e467656 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -45,27 +45,28 @@
   $(ART_TARGET_NATIVETEST_OUT),art/build/Android.gtest.mk,ART_GTEST_$(dir)_DEX)))
 
 # Dex file dependencies for each gtest.
-ART_GTEST_class_linker_test_DEPS := Interfaces MyClass Nested Statics StaticsFromCode
-ART_GTEST_compiler_driver_test_DEPS := AbstractMethod
-ART_GTEST_dex_file_test_DEPS := GetMethodSignature
-ART_GTEST_exception_test_DEPS := ExceptionHandle
-ART_GTEST_jni_compiler_test_DEPS := MyClassNatives
-ART_GTEST_jni_internal_test_DEPS := AllFields StaticLeafMethods
-ART_GTEST_object_test_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
-ART_GTEST_proxy_test_DEPS := Interfaces
-ART_GTEST_reflection_test_DEPS := Main NonStaticLeafMethods StaticLeafMethods
-ART_GTEST_stub_test_DEPS := AllFields
-ART_GTEST_transaction_test_DEPS := Transaction
+ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MyClass Nested Statics StaticsFromCode
+ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod
+ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature
+ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
+ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
+ART_GTEST_jni_internal_test_DEX_DEPS := AllFields StaticLeafMethods
+ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
+ART_GTEST_proxy_test_DEX_DEPS := Interfaces
+ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
+ART_GTEST_stub_test_DEX_DEPS := AllFields
+ART_GTEST_transaction_test_DEX_DEPS := Transaction
 
 # The elf writer test has dependencies on core.oat.
-ART_GTEST_elf_writer_test_DEPS := $(HOST_CORE_OAT_OUT) $(2ND_HOST_CORE_OAT_OUT) \
-  $(TARGET_CORE_OAT_OUT) $(2ND_TARGET_CORE_OAT_OUT)
+ART_GTEST_elf_writer_test_HOST_DEPS := $(HOST_CORE_OAT_OUT) $(2ND_HOST_CORE_OAT_OUT)
+ART_GTEST_elf_writer_test_TARGET_DEPS := $(TARGET_CORE_OAT_OUT) $(2ND_TARGET_CORE_OAT_OUT)
 
 # The path for which all the source files are relative, not actually the current directory.
 LOCAL_PATH := art
 
 RUNTIME_GTEST_COMMON_SRC_FILES := \
   runtime/arch/arch_test.cc \
+  runtime/arch/memcmp16_test.cc \
   runtime/arch/stub_test.cc \
   runtime/barrier_test.cc \
   runtime/base/bit_field_test.cc \
@@ -194,7 +195,8 @@
   # Add the test dependencies to test-art-target-sync, which will be a prerequisite for the test
   # to ensure files are pushed to the device.
   TEST_ART_TARGET_SYNC_DEPS += \
-    $(foreach file,$(ART_GTEST_$(1)_DEPS),$(ART_GTEST_$(file)_DEX)) \
+    $$(ART_GTEST_$(1)_TARGET_DEPS) \
+    $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_GTEST_$(file)_DEX)) \
     $$(ART_TARGET_NATIVETEST_OUT)/$$(TARGET_$(2)ARCH)/$(1) \
     $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so
 
@@ -204,7 +206,7 @@
 	$(hide) adb shell rm $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID
 	$(hide) adb shell chmod 755 $(ART_TARGET_NATIVETEST_DIR)/$(TARGET_$(2)ARCH)/$(1)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
-	  (adb shell sh -c "$(ART_TARGET_NATIVETEST_DIR)/$(TARGET_$(2)ARCH)/$(1) && touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID" \
+	  (adb shell "$(ART_TARGET_NATIVETEST_DIR)/$(TARGET_$(2)ARCH)/$(1) && touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID" \
 	  && (adb pull $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID /tmp/ \
 	      && $$(call ART_TEST_PASSED,$$@)) \
 	  || $$(call ART_TEST_FAILED,$$@))
@@ -226,11 +228,11 @@
   gtest_exe := $$(HOST_OUT_EXECUTABLES)/$(1)$$($(2)ART_PHONY_TEST_HOST_SUFFIX)
   # Dependencies for all host gtests.
   gtest_deps := $$(HOST_CORE_DEX_LOCATIONS) \
-    $$($(2)ART_HOST_LIBRARY_PATH)/libjavacore$$(ART_HOST_SHLIB_EXTENSION)
+    $$($(2)ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$$(ART_HOST_SHLIB_EXTENSION)
 
 
 .PHONY: $$(gtest_rule)
-$$(gtest_rule): $$(gtest_exe) $(foreach file,$(ART_GTEST_$(1)_DEPS),$(ART_GTEST_$(file)_DEX-host)) $$(gtest_deps)
+$$(gtest_rule): $$(gtest_exe) $$(ART_GTEST_$(1)_HOST_DEPS) $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_GTEST_$(file)_DEX-host)) $$(gtest_deps)
 	$(hide) ($$(call ART_TEST_SKIP,$$@) && $$< && $$(call ART_TEST_PASSED,$$@)) \
 	  || $$(call ART_TEST_FAILED,$$@)
 
@@ -239,7 +241,7 @@
   ART_TEST_HOST_GTEST_$(1)_RULES += $$(gtest_rule)
 
 .PHONY: valgrind-$$(gtest_rule)
-valgrind-$$(gtest_rule): $$(gtest_exe) $$(ART_GTEST_$(1)_DEPS) $$(gtest_deps)
+valgrind-$$(gtest_rule): $$(gtest_exe) $$(ART_GTEST_$(1)_HOST_DEPS) $(foreach file,$(ART_GTEST_$(1)_DEX_DEPS),$(ART_GTEST_$(file)_DEX-host)) $$(gtest_deps)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
 	  valgrind --leak-check=full --error-exitcode=1 $$< && $$(call ART_TEST_PASSED,$$@) \
 	    || $$(call ART_TEST_FAILED,$$@)
@@ -444,17 +446,19 @@
 ART_TEST_TARGET_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST_RULES :=
-ART_GTEST_class_linker_test_DEPS :=
-ART_GTEST_compiler_driver_test_DEPS :=
-ART_GTEST_dex_file_test_DEPS :=
-ART_GTEST_exception_test_DEPS :=
-ART_GTEST_jni_compiler_test_DEPS :=
-ART_GTEST_jni_internal_test_DEPS :=
-ART_GTEST_object_test_DEPS :=
-ART_GTEST_proxy_test_DEPS :=
-ART_GTEST_reflection_test_DEPS :=
-ART_GTEST_stub_test_DEPS :=
-ART_GTEST_transaction_test_DEPS :=
+ART_GTEST_class_linker_test_DEX_DEPS :=
+ART_GTEST_compiler_driver_test_DEX_DEPS :=
+ART_GTEST_dex_file_test_DEX_DEPS :=
+ART_GTEST_exception_test_DEX_DEPS :=
+ART_GTEST_elf_writer_test_HOST_DEPS :=
+ART_GTEST_elf_writer_test_TARGET_DEPS :=
+ART_GTEST_jni_compiler_test_DEX_DEPS :=
+ART_GTEST_jni_internal_test_DEX_DEPS :=
+ART_GTEST_object_test_DEX_DEPS :=
+ART_GTEST_proxy_test_DEX_DEPS :=
+ART_GTEST_reflection_test_DEX_DEPS :=
+ART_GTEST_stub_test_DEX_DEPS :=
+ART_GTEST_transaction_test_DEX_DEPS :=
 $(foreach dir,$(GTEST_DEX_DIRECTORIES), $(eval ART_GTEST_TEST_$(dir)_DEX :=))
 GTEST_DEX_DIRECTORIES :=
 LOCAL_PATH :=
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 3117f71..916fd58 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -26,7 +26,7 @@
 # Use dex2oat debug version for better error reporting
 # $(1): 2ND_ or undefined, 2ND_ for 32-bit host builds.
 define create-core-oat-host-rules
-$$($(1)HOST_CORE_IMG_OUT): $$($(1)HOST_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
+$$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
 	@echo "host dex2oat: $$@ ($$?)"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $$(DEX2OATD) --runtime-arg -Xms16m --runtime-arg -Xmx16m \
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 76838d7..f940b54 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -108,10 +108,9 @@
       mir_to_lir = MipsCodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     case kX86:
-      mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
-      break;
+      // Fall-through.
     case kX86_64:
-      mir_to_lir = X86_64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
+      mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     default:
       LOG(FATAL) << "Unexpected instruction set: " << cu->instruction_set;
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 2b097b5..6eccb0e 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -139,7 +139,7 @@
 class NullCheckEliminationAndTypeInference : public PassME {
  public:
   NullCheckEliminationAndTypeInference()
-    : PassME("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
+    : PassME("NCE_TypeInference", kRepeatingTopologicalSortTraversal, "4_post_nce_cfg") {
   }
 
   void Start(PassDataHolder* data) const {
@@ -169,7 +169,8 @@
 
 class ClassInitCheckElimination : public PassME {
  public:
-  ClassInitCheckElimination() : PassME("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) {
+  ClassInitCheckElimination()
+    : PassME("ClInitCheckElimination", kRepeatingTopologicalSortTraversal) {
   }
 
   bool Gate(const PassDataHolder* data) const {
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 62973af..66c524f 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -337,16 +337,10 @@
        * @param mir_graph The MIRGraph considered.
        */
       explicit TopologicalSortIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
-            mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()) {
         // Extra setup for TopologicalSortIterator.
         idx_ = start_idx_;
         block_id_list_ = mir_graph->GetTopologicalSortOrder();
-
-        if (mir_graph->GetTopologicalSortOrder() == nullptr) {
-          /* Compute the topological order */
-          mir_graph->ComputeTopologicalSortOrder();
-        }
       }
 
       /**
@@ -375,16 +369,10 @@
       * @param mir_graph The MIRGraph considered.
       */
      explicit RepeatingTopologicalSortIterator(MIRGraph* mir_graph)
-         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
-           mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder()->Size()) {
        // Extra setup for RepeatingTopologicalSortIterator.
        idx_ = start_idx_;
        block_id_list_ = mir_graph->GetTopologicalSortOrder();
-
-       if (mir_graph->GetTopologicalSortOrder() == nullptr) {
-         /* Compute the topological order */
-         mir_graph->ComputeTopologicalSortOrder();
-       }
      }
 
      /**
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 72990b4..dc6043d 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -865,12 +865,12 @@
         (1 << kPromoteCompilerTemps));
   } else if (cu.instruction_set == kX86_64) {
     // TODO(X86_64): enable optimizations once backend is mature enough.
-    cu.disable_opt |= (
-        (1 << kLoadStoreElimination) |
-        (1 << kPromoteRegs));
+    cu.disable_opt |= (1 << kLoadStoreElimination);
   } else if (cu.instruction_set == kArm64) {
     // TODO(Arm64): enable optimizations once backend is mature enough.
-    cu.disable_opt = ~(uint32_t)0;
+    cu.disable_opt = ~((1 << kSuppressMethodInlining) |
+                       (1 << kNullCheckElimination) |
+                       (1 << kPromoteRegs));
   }
 
   cu.StartTimingSplit("BuildMIRGraph");
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 63a5570..baa46d6 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -27,6 +27,7 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "leb128.h"
 #include "pass_driver_me_post_opt.h"
+#include "utils/scoped_arena_containers.h"
 
 namespace art {
 
@@ -1437,22 +1438,24 @@
 }
 
 void MIRGraph::ComputeTopologicalSortOrder() {
-  std::queue<BasicBlock*> q;
-  std::map<int, int> visited_cnt_values;
-
   // Clear the nodes.
   ClearAllVisitedFlags();
 
   // Create the topological order if need be.
-  if (topological_order_ != nullptr) {
-    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, 0);
+  if (topological_order_ == nullptr) {
+    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, GetNumBlocks());
   }
   topological_order_->Reset();
 
+  ScopedArenaAllocator allocator(&cu_->arena_stack);
+  ScopedArenaQueue<BasicBlock*> q(allocator.Adapter());
+  ScopedArenaVector<size_t> visited_cnt_values(GetNumBlocks(), 0u, allocator.Adapter());
+
   // Set up visitedCntValues map for all BB. The default value for this counters in the map is zero.
   // also fill initial queue.
   GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
 
+  size_t num_blocks = 0u;
   while (true) {
     BasicBlock* bb = iterator.Next();
 
@@ -1464,7 +1467,8 @@
       continue;
     }
 
-    visited_cnt_values[bb->id] = bb->predecessors->Size();
+    num_blocks += 1u;
+    size_t unvisited_predecessor_count = bb->predecessors->Size();
 
     GrowableArray<BasicBlockId>::Iterator pred_iterator(bb->predecessors);
     // To process loops we should not wait for dominators.
@@ -1475,53 +1479,75 @@
         break;
       }
 
-      if (pred_bb->dominators == nullptr || pred_bb->hidden == true) {
-        continue;
-      }
-
-      // Skip the backward branch.
-      if (pred_bb->dominators->IsBitSet(bb->id) != 0) {
-        visited_cnt_values[bb->id]--;
+      // Skip the backward branch or hidden predecessor.
+      if (pred_bb->hidden ||
+          (pred_bb->dominators != nullptr && pred_bb->dominators->IsBitSet(bb->id))) {
+        unvisited_predecessor_count -= 1u;
       }
     }
 
+    visited_cnt_values[bb->id] = unvisited_predecessor_count;
+
     // Add entry block to queue.
-    if (visited_cnt_values[bb->id] == 0) {
+    if (unvisited_predecessor_count == 0) {
       q.push(bb);
     }
   }
 
-  while (q.size() > 0) {
-    // Get top.
-    BasicBlock* bb = q.front();
-    q.pop();
-
-    DCHECK_EQ(bb->hidden, false);
-
-    if (bb->IsExceptionBlock() == true) {
-      continue;
+  // We can theoretically get a cycle where none of the blocks dominates the other. Therefore
+  // don't stop when the queue is empty, continue until we've processed all the blocks.
+  // (In practice, we've seen this for a monitor-exit catch handler that erroneously tries to
+  // handle its own exceptions being broken into two blocks by a jump to to the monitor-exit
+  // from another catch hanler. http://b/15745363 .)
+  AllNodesIterator candidate_iter(this);  // For the empty queue case.
+  while (num_blocks != 0u) {
+    num_blocks -= 1u;
+    BasicBlock* bb = nullptr;
+    if (!q.empty()) {
+      // Get top.
+      bb = q.front();
+      q.pop();
+    } else {
+      // Find some block we didn't visit yet that has at least one visited predecessor.
+      while (bb == nullptr) {
+        BasicBlock* candidate = candidate_iter.Next();
+        DCHECK(candidate != nullptr);
+        if (candidate->visited || candidate->hidden) {
+          continue;
+        }
+        GrowableArray<BasicBlockId>::Iterator iter(candidate->predecessors);
+        for (BasicBlock* pred_bb = GetBasicBlock(iter.Next()); pred_bb != nullptr;
+            pred_bb = GetBasicBlock(iter.Next())) {
+          if (!pred_bb->hidden && pred_bb->visited) {
+            bb = candidate;
+            break;
+          }
+        }
+      }
     }
 
+    DCHECK_EQ(bb->hidden, false);
+    DCHECK_EQ(bb->visited, false);
+
     // We've visited all the predecessors. So, we can visit bb.
-    if (bb->visited == false) {
-      bb->visited = true;
+    bb->visited = true;
 
-      // Now add the basic block.
-      topological_order_->Insert(bb->id);
+    // Now add the basic block.
+    topological_order_->Insert(bb->id);
 
-      // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
-      ChildBlockIterator succIter(bb, this);
-      BasicBlock* successor = succIter.Next();
-      while (successor != nullptr) {
-        // one more predecessor was visited.
-        visited_cnt_values[successor->id]--;
+    // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
+    ChildBlockIterator succIter(bb, this);
+    BasicBlock* successor = succIter.Next();
+    for ( ; successor != nullptr; successor = succIter.Next()) {
+      if (successor->visited || successor->hidden) {
+        continue;
+      }
 
-        if (visited_cnt_values[successor->id] <= 0 && successor->visited == false && successor->hidden == false) {
-          q.push(successor);
-        }
-
-        // Take next successor.
-        successor = succIter.Next();
+      // one more predecessor was visited.
+      DCHECK_NE(visited_cnt_values[successor->id], 0u);
+      visited_cnt_values[successor->id] -= 1u;
+      if (visited_cnt_values[successor->id] == 0u) {
+        q.push(successor);
       }
     }
   }
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 0ff340e..398c7f6 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -27,6 +27,7 @@
 #include "mir_method_info.h"
 #include "utils/arena_bit_vector.h"
 #include "utils/growable_array.h"
+#include "reg_location.h"
 #include "reg_storage.h"
 
 namespace art {
@@ -492,39 +493,6 @@
 };
 
 /*
- * Whereas a SSA name describes a definition of a Dalvik vreg, the RegLocation describes
- * the type of an SSA name (and, can also be used by code generators to record where the
- * value is located (i.e. - physical register, frame, spill, etc.).  For each SSA name (SReg)
- * there is a RegLocation.
- * A note on SSA names:
- *   o SSA names for Dalvik vRegs v0..vN will be assigned 0..N.  These represent the "vN_0"
- *     names.  Negative SSA names represent special values not present in the Dalvik byte code.
- *     For example, SSA name -1 represents an invalid SSA name, and SSA name -2 represents the
- *     the Method pointer.  SSA names < -2 are reserved for future use.
- *   o The vN_0 names for non-argument Dalvik should in practice never be used (as they would
- *     represent the read of an undefined local variable).  The first definition of the
- *     underlying Dalvik vReg will result in a vN_1 name.
- *
- * FIXME: The orig_sreg field was added as a workaround for llvm bitcode generation.  With
- * the latest restructuring, we should be able to remove it and rely on s_reg_low throughout.
- */
-struct RegLocation {
-  RegLocationType location:3;
-  unsigned wide:1;
-  unsigned defined:1;   // Do we know the type?
-  unsigned is_const:1;  // Constant, value in mir_graph->constant_values[].
-  unsigned fp:1;        // Floating point?
-  unsigned core:1;      // Non-floating point?
-  unsigned ref:1;       // Something GC cares about.
-  unsigned high_word:1;  // High word of pair?
-  unsigned home:1;      // Does this represent the home location?
-  RegStorage reg;       // Encoded physical registers.
-  int16_t s_reg_low;    // SSA name for low Dalvik word.
-  int16_t orig_sreg;    // TODO: remove after Bitcode gen complete
-                        // and consolidate usage w/ s_reg_low.
-};
-
-/*
  * Collection of information describing an invoke, and the destination of
  * the subsequent MOVE_RESULT (if applicable).  Collected as a unit to enable
  * more efficient invoke code generation.
@@ -701,6 +669,7 @@
   void BasicBlockOptimization();
 
   GrowableArray<BasicBlockId>* GetTopologicalSortOrder() {
+    DCHECK(topological_order_ != nullptr);
     return topological_order_;
   }
 
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 29c353a..9b2e798 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -190,10 +190,12 @@
   void PerformClassInitCheckElimination() {
     cu_.mir_graph->SSATransformationStart();
     cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
     cu_.mir_graph->SSATransformationEnd();
     bool gate_result = cu_.mir_graph->EliminateClassInitChecksGate();
     ASSERT_TRUE(gate_result);
-    RepeatingPreOrderDfsIterator iterator(cu_.mir_graph.get());
+    RepeatingTopologicalSortIterator iterator(cu_.mir_graph.get());
     bool change = false;
     for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
       change = cu_.mir_graph->EliminateClassInitChecks(bb);
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
index 7d76fb8..031c5cf 100644
--- a/compiler/dex/pass_driver_me.h
+++ b/compiler/dex/pass_driver_me.h
@@ -62,6 +62,12 @@
       case kPostOrderDOMTraversal:
         DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass);
         break;
+      case kTopologicalSortTraversal:
+        DoWalkBasicBlocks<TopologicalSortIterator>(&pass_me_data_holder_, me_pass);
+        break;
+      case kRepeatingTopologicalSortTraversal:
+        DoWalkBasicBlocks<RepeatingTopologicalSortIterator>(&pass_me_data_holder_, me_pass);
+        break;
       case kAllNodes:
         DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
         break;
diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc
index cb63f41..14108af 100644
--- a/compiler/dex/pass_driver_me_post_opt.cc
+++ b/compiler/dex/pass_driver_me_post_opt.cc
@@ -36,6 +36,7 @@
   GetPassInstance<CalculatePredecessors>(),
   GetPassInstance<DFSOrders>(),
   GetPassInstance<BuildDomination>(),
+  GetPassInstance<TopologicalSortOrders>(),
   GetPassInstance<DefBlockMatrix>(),
   GetPassInstance<CreatePhiNodes>(),
   GetPassInstance<ClearVisitedFlag>(),
diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h
index 445c46d..a1b0df4 100644
--- a/compiler/dex/post_opt_passes.h
+++ b/compiler/dex/post_opt_passes.h
@@ -127,6 +127,23 @@
 };
 
 /**
+ * @class TopologicalSortOrders
+ * @brief Compute the topological sort order of the MIR graph
+ */
+class TopologicalSortOrders : public PassME {
+ public:
+  TopologicalSortOrders() : PassME("TopologicalSortOrders") {
+  }
+
+  void Start(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph.get()->ComputeTopologicalSortOrder();
+  }
+};
+
+/**
  * @class DefBlockMatrix
  * @brief Calculate the matrix of definition per basic block
  */
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 70dce7f..43db24c 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -67,7 +67,6 @@
     void MarkPreservedSingle(int v_reg, RegStorage reg);
     void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
-    RegStorage AllocPreservedDouble(int s_reg);
 
     // Required for target - miscellaneous.
     void AssembleLIR();
@@ -114,7 +113,7 @@
                   RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
     bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-    bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+    bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
@@ -196,6 +195,8 @@
     bool InexpensiveConstantFloat(int32_t value);
     bool InexpensiveConstantLong(int64_t value);
     bool InexpensiveConstantDouble(int64_t value);
+    RegStorage AllocPreservedDouble(int s_reg);
+    RegStorage AllocPreservedSingle(int s_reg);
 
   private:
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index e34d944..95071d9 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -19,6 +19,7 @@
 #include "arm_lir.h"
 #include "codegen_arm.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
 
@@ -687,8 +688,11 @@
   return rl_result;
 }
 
-bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+bool ArmMir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
   DCHECK_EQ(cu_->instruction_set, kThumb2);
+  if (is_long) {
+    return false;
+  }
   RegLocation rl_src1 = info->args[0];
   RegLocation rl_src2 = info->args[1];
   rl_src1 = LoadValue(rl_src1, kCoreReg);
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index e1e2d5b..ef94bbc 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -771,7 +771,7 @@
   int p_map_idx = SRegToPMap(s_reg);
   if (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg) {
     // Upper reg is already allocated.  Can we fit?
-    int high_reg = promotion_map_[p_map_idx+1].FpReg;
+    int high_reg = promotion_map_[p_map_idx+1].fp_reg;
     if ((high_reg & 1) == 0) {
       // High reg is even - fail.
       return res;  // Invalid.
@@ -805,13 +805,32 @@
   if (res.Valid()) {
     RegisterInfo* info = GetRegInfo(res);
     promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx].FpReg =
+    promotion_map_[p_map_idx].fp_reg =
         info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg().GetReg();
     promotion_map_[p_map_idx+1].fp_location = kLocPhysReg;
-    promotion_map_[p_map_idx+1].FpReg =
+    promotion_map_[p_map_idx+1].fp_reg =
         info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg().GetReg();
   }
   return res;
 }
 
+// Reserve a callee-save sp single register.
+RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) {
+  RegStorage res;
+  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
+  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
+    if (!info->IsTemp() && !info->InUse()) {
+      res = info->GetReg();
+      int p_map_idx = SRegToPMap(s_reg);
+      int v_reg = mir_graph_->SRegToVReg(s_reg);
+      GetRegInfo(res)->MarkInUse();
+      MarkPreservedSingle(v_reg, res);
+      promotion_map_[p_map_idx].fp_location = kLocPhysReg;
+      promotion_map_[p_map_idx].fp_reg = res.GetReg();
+      break;
+    }
+  }
+  return res;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index bc8f95b..2d5e291 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -17,6 +17,7 @@
 #include "arm_lir.h"
 #include "codegen_arm.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 
 namespace art {
 
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 1f1a252..5077d11 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -136,23 +136,23 @@
   A64_REGISTER_CODE_LIST(A64_DEFINE_REGISTERS)
 #undef A64_DEFINE_REGISTERS
 
-  rwzr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0x3f,
   rxzr = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 0x3f,
-  rwsp = rw31,
+  rwzr = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0x3f,
   rsp = rx31,
-  rA64_SUSPEND = rx19,
-  rA64_SELF = rx18,
-  rA64_SP = rx31,
-  rA64_LR = rx30,
+  rwsp = rw31,
+
+  // Aliases which are not defined in "ARM Architecture Reference, register names".
+  rxSUSPEND = rx19,
+  rxSELF = rx18,
+  rxLR = rx30,
   /*
    * FIXME: It's a bit awkward to define both 32 and 64-bit views of these - we'll only ever use
    * the 64-bit view. However, for now we'll define a 32-bit view to keep these from being
    * allocated as 32-bit temp registers.
    */
-  rA32_SUSPEND = rw19,
-  rA32_SELF = rw18,
-  rA32_SP = rw31,
-  rA32_LR = rw30
+  rwSUSPEND = rw19,
+  rwSELF = rw18,
+  rwLR = rw30,
 };
 
 #define A64_DEFINE_REGSTORAGES(nr) \
@@ -163,27 +163,30 @@
 A64_REGISTER_CODE_LIST(A64_DEFINE_REGSTORAGES)
 #undef A64_DEFINE_REGSTORAGES
 
-constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr);
 constexpr RegStorage rs_xzr(RegStorage::kValid | rxzr);
-constexpr RegStorage rs_rA64_SUSPEND(RegStorage::kValid | rA64_SUSPEND);
-constexpr RegStorage rs_rA64_SELF(RegStorage::kValid | rA64_SELF);
-constexpr RegStorage rs_rA64_SP(RegStorage::kValid | rA64_SP);
-constexpr RegStorage rs_rA64_LR(RegStorage::kValid | rA64_LR);
+constexpr RegStorage rs_wzr(RegStorage::kValid | rwzr);
+// Reserved registers.
+constexpr RegStorage rs_xSUSPEND(RegStorage::kValid | rxSUSPEND);
+constexpr RegStorage rs_xSELF(RegStorage::kValid | rxSELF);
+constexpr RegStorage rs_sp(RegStorage::kValid | rsp);
+constexpr RegStorage rs_xLR(RegStorage::kValid | rxLR);
 // TODO: eliminate the need for these.
-constexpr RegStorage rs_rA32_SUSPEND(RegStorage::kValid | rA32_SUSPEND);
-constexpr RegStorage rs_rA32_SELF(RegStorage::kValid | rA32_SELF);
-constexpr RegStorage rs_rA32_SP(RegStorage::kValid | rA32_SP);
-constexpr RegStorage rs_rA32_LR(RegStorage::kValid | rA32_LR);
+constexpr RegStorage rs_wSUSPEND(RegStorage::kValid | rwSUSPEND);
+constexpr RegStorage rs_wSELF(RegStorage::kValid | rwSELF);
+constexpr RegStorage rs_wsp(RegStorage::kValid | rwsp);
+constexpr RegStorage rs_wLR(RegStorage::kValid | rwLR);
 
 // RegisterLocation templates return values (following the hard-float calling convention).
 const RegLocation arm_loc_c_return =
     {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_w0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_ref =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_wide =
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_x0, INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_float =
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_f0, INVALID_SREG, INVALID_SREG};
+    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, rs_f0, INVALID_SREG, INVALID_SREG};
 const RegLocation arm_loc_c_return_double =
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_d0, INVALID_SREG, INVALID_SREG};
+    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, rs_d0, INVALID_SREG, INVALID_SREG};
 
 /**
  * @brief Shift-type to be applied to a register via EncodeShift().
@@ -256,6 +259,8 @@
   kA64Fcvt2Ss,       // fcvt   [0001111000100010110000] rn[9-5] rd[4-0].
   kA64Fcvt2sS,       // fcvt   [0001111001100010010000] rn[9-5] rd[4-0].
   kA64Fdiv3fff,      // fdiv[000111100s1] rm[20-16] [000110] rn[9-5] rd[4-0].
+  kA64Fmax3fff,      // fmax[000111100s1] rm[20-16] [010010] rn[9-5] rd[4-0].
+  kA64Fmin3fff,      // fmin[000111100s1] rm[20-16] [010110] rn[9-5] rd[4-0].
   kA64Fmov2ff,       // fmov[000111100s100000010000] rn[9-5] rd[4-0].
   kA64Fmov2fI,       // fmov[000111100s1] imm_8[20-13] [10000000] rd[4-0].
   kA64Fmov2sw,       // fmov[0001111000100111000000] rn[9-5] rd[4-0].
@@ -303,6 +308,7 @@
   kA64Orr3Rrl,       // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
   kA64Orr4rrro,      // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
   kA64Ret,           // ret [11010110010111110000001111000000].
+  kA64Rbit2rr,       // rbit [s101101011000000000000] rn[9-5] rd[4-0].
   kA64Rev2rr,        // rev [s10110101100000000001x] rn[9-5] rd[4-0].
   kA64Rev162rr,      // rev16[s101101011000000000001] rn[9-5] rd[4-0].
   kA64Ror3rrr,       // ror [s0011010110] rm[20-16] [001011] rn[9-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index bee64f1..e10f7cf 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -170,7 +170,7 @@
     ENCODING_MAP(WIDE(kA64Cbz2rt), SF_VARIANTS(0x34000000),
                  kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_BINARY_OP | REG_USE0 | IS_BRANCH  | NEEDS_FIXUP,
+                 IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP,
                  "cbz", "!0r, !1t", kFixupCBxZ),
     ENCODING_MAP(WIDE(kA64Cmn3rro), SF_VARIANTS(0x2b00001f),
                  kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
@@ -260,6 +260,14 @@
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "fdiv", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmax3fff), FLOAT_VARIANTS(0x1e204800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fmax", "!0f, !1f, !2f", kFixupNone),
+    ENCODING_MAP(FWIDE(kA64Fmin3fff), FLOAT_VARIANTS(0x1e205800),
+                 kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtRegF, 20, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fmin", "!0f, !1f, !2f", kFixupNone),
     ENCODING_MAP(FWIDE(kA64Fmov2ff), FLOAT_VARIANTS(0x1e204000),
                  kFmtRegF, 4, 0, kFmtRegF, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
@@ -280,7 +288,7 @@
                  kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fmov", "!0w, !1s", kFixupNone),
-    ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e6e0000),
+    ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e660000),
                  kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
                  "fmov", "!0x, !1S", kFixupNone),
@@ -450,6 +458,10 @@
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
                  "ret", "", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Rbit2rr), SF_VARIANTS(0x5ac00000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "rbit", "!0r, !1r", kFixupNone),
     ENCODING_MAP(WIDE(kA64Rev2rr), CUSTOM_VARIANTS(0x5ac00800, 0xdac00c00),
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
@@ -484,7 +496,7 @@
                  "sdiv", "!0r, !1r, !2r", kFixupNone),
     ENCODING_MAP(WIDE(kA64Smaddl4xwwx), NO_VARIANTS(0x9b200000),
                  kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
-                 kFmtRegX, -1, -1, IS_QUAD_OP | REG_DEF0_USE123,
+                 kFmtRegX, 14, 10, IS_QUAD_OP | REG_DEF0_USE123,
                  "smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
     ENCODING_MAP(WIDE(kA64Stp4ffXD), CUSTOM_VARIANTS(0x2d000000, 0x6d000000),
                  kFmtRegF, 4, 0, kFmtRegF, 14, 10, kFmtRegXOrSp, 9, 5,
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index c3f4711..56dcbe5 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -24,14 +24,6 @@
 
 namespace art {
 
-bool Arm64Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
-                                  const InlineMethod& special) {
-  // TODO(Arm64): re-enable this, once hard-float ABI is implemented.
-  //   (this currently does not work, as GetArgMappingToPhysicalReg returns InvalidReg()).
-  // return Mir2Lir::GenSpecialCase(bb, mir, special);
-  return false;
-}
-
 /*
  * The sparse table in the literal pool is an array of <key,displacement>
  * pairs.  For each set, we'll load them as a pair using ldp.
@@ -140,7 +132,7 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  LoadBaseIndexed(table_base, As64BitReg(key_reg), As64BitReg(disp_reg), 2, k32);
+  LoadBaseIndexed(table_base, As64BitReg(key_reg), disp_reg, 2, k32);
 
   // Get base branch address.
   RegStorage branch_reg = AllocTempWide();
@@ -182,12 +174,12 @@
   // Making a call - use explicit registers
   FlushAllRegs();   /* Everything to home location */
   LoadValueDirectFixed(rl_src, rs_x0);
-  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData).Int32Value(),
-               rs_rA64_LR);
+  LoadWordDisp(rs_xSELF, QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData).Int32Value(),
+               rs_xLR);
   // Materialize a pointer to the fill data image
   NewLIR3(kA64Adr2xd, rx1, 0, WrapPointer(tab_rec));
   ClobberCallerSave();
-  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+  LIR* call_inst = OpReg(kOpBlx, rs_xLR);
   MarkSafepointPC(call_inst);
 }
 
@@ -203,7 +195,7 @@
   // TUNING: How much performance we get when we inline this?
   // Since we've already flush all register.
   FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_w0);
+  LoadValueDirectFixed(rl_src, rs_x0);  // = TargetRefReg(kArg0)
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
   if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -214,7 +206,7 @@
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
-  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
+  Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
   OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
   NewLIR2(kA64Ldxr2rX, rw3, rx2);
   MarkPossibleNullPointerException(opt_flags);
@@ -229,9 +221,9 @@
   }
   // TODO: move to a slow path.
   // Go expensive route - artLockObjectFromCode(obj);
-  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_rA64_LR);
+  LoadWordDisp(rs_xSELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_xLR);
   ClobberCallerSave();
-  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+  LIR* call_inst = OpReg(kOpBlx, rs_xLR);
   MarkSafepointPC(call_inst);
 
   LIR* success_target = NewLIR0(kPseudoTargetLabel);
@@ -251,7 +243,7 @@
   // TUNING: How much performance we get when we inline this?
   // Since we've already flush all register.
   FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_w0);  // Get obj
+  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
   if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
@@ -262,7 +254,7 @@
       null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
     }
   }
-  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
+  Load32Disp(rs_xSELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
   Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
   MarkPossibleNullPointerException(opt_flags);
   LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
@@ -277,9 +269,9 @@
   }
   // TODO: move to a slow path.
   // Go expensive route - artUnlockObjectFromCode(obj);
-  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_rA64_LR);
+  LoadWordDisp(rs_xSELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_xLR);
   ClobberCallerSave();
-  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+  LIR* call_inst = OpReg(kOpBlx, rs_xLR);
   MarkSafepointPC(call_inst);
 
   LIR* success_target = NewLIR0(kPseudoTargetLabel);
@@ -289,8 +281,8 @@
 void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
-  LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg, kNotVolatile);
-  StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr, kNotVolatile);
+  LoadRefDisp(rs_xSELF, ex_offset, rl_result.reg, kNotVolatile);
+  StoreRefDisp(rs_xSELF, ex_offset, rs_xzr, kNotVolatile);
   StoreValue(rl_dest, rl_result);
 }
 
@@ -299,12 +291,12 @@
  */
 void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
   RegStorage reg_card_base = AllocTempWide();
-  RegStorage reg_card_no = AllocTemp();
+  RegStorage reg_card_no = AllocTempWide();  // Needs to be wide as addr is ref=64b
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
-  LoadWordDisp(rs_rA64_SELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
+  LoadWordDisp(rs_xSELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
-  StoreBaseIndexed(reg_card_base, As64BitReg(reg_card_no), As32BitReg(reg_card_base),
+  StoreBaseIndexed(reg_card_base, reg_card_no, As32BitReg(reg_card_base),
                    0, kUnsignedByte);
   LIR* target = NewLIR0(kPseudoTargetLabel);
   branch_over->target = target;
@@ -349,33 +341,33 @@
     if (cu_->compiler_driver->GetCompilerOptions().GetExplicitStackOverflowChecks()) {
       if (!large_frame) {
         // Load stack limit
-        LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
+        LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
       }
     } else {
       // TODO(Arm64) Implement implicit checks.
       // Implicit stack overflow check.
       // Generate a load from [sp, #-framesize].  If this is in the stack
       // redzone we will get a segmentation fault.
-      // Load32Disp(rs_rA64_SP, -Thread::kStackOverflowReservedBytes, rs_wzr);
+      // Load32Disp(rs_wSP, -Thread::kStackOverflowReservedBytes, rs_wzr);
       // MarkPossibleStackOverflowException();
       LOG(FATAL) << "Implicit stack overflow checks not implemented.";
     }
   }
 
   if (frame_size_ > 0) {
-    OpRegImm64(kOpSub, rs_rA64_SP, spill_size);
+    OpRegImm64(kOpSub, rs_sp, spill_size);
   }
 
   /* Need to spill any FP regs? */
   if (fp_spill_mask_) {
     int spill_offset = spill_size - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
-    SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
+    SpillFPRegs(rs_sp, spill_offset, fp_spill_mask_);
   }
 
   /* Spill core callee saves. */
   if (core_spill_mask_) {
     int spill_offset = spill_size - kArm64PointerSize*num_core_spills_;
-    SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
+    SpillCoreRegs(rs_sp, spill_offset, core_spill_mask_);
   }
 
   if (!skip_overflow_check) {
@@ -391,11 +383,11 @@
           m2l_->ResetDefTracking();
           GenerateTargetLabel(kPseudoThrowTarget);
           // Unwinds stack.
-          m2l_->OpRegImm(kOpAdd, rs_rA64_SP, sp_displace_);
+          m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
           m2l_->ClobberCallerSave();
           ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
           m2l_->LockTemp(rs_x8);
-          m2l_->LoadWordDisp(rs_rA64_SELF, func_offset.Int32Value(), rs_x8);
+          m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_x8);
           m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg());
           m2l_->FreeTemp(rs_x8);
         }
@@ -407,26 +399,26 @@
       if (large_frame) {
         // Compare Expected SP against bottom of stack.
         // Branch to throw target if there is not enough room.
-        OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills);
-        LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
+        OpRegRegImm(kOpSub, rs_x9, rs_sp, frame_size_without_spills);
+        LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
         LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr);
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
-        OpRegCopy(rs_rA64_SP, rs_x9);  // Establish stack after checks.
+        OpRegCopy(rs_sp, rs_x9);  // Establish stack after checks.
       } else {
         /*
          * If the frame is small enough we are guaranteed to have enough space that remains to
          * handle signals on the user stack.
          * Establishes stack before checks.
          */
-        OpRegRegImm(kOpSub, rs_rA64_SP, rs_rA64_SP, frame_size_without_spills);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x9, nullptr);
+        OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size_without_spills);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_sp, rs_x9, nullptr);
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_));
       }
     } else {
-      OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills);
+      OpRegImm(kOpSub, rs_sp, frame_size_without_spills);
     }
   } else {
-    OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills);
+    OpRegImm(kOpSub, rs_sp, frame_size_without_spills);
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -453,17 +445,59 @@
 
   NewLIR0(kPseudoMethodExit);
 
-  /* Need to restore any FP callee saves? */
-  if (fp_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
-    UnSpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
-  }
-  if (core_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
-    UnSpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
+  // Restore saves and drop stack frame.
+  // 2 versions:
+  //
+  // 1. (Original): Try to address directly, then drop the whole frame.
+  //                Limitation: ldp is a 7b signed immediate. There should have been a DCHECK!
+  //
+  // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
+  //           in range. Then drop the rest.
+  //
+  // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
+  //       in variant 1.
+
+  if (frame_size_ <= 504) {
+    // "Magic" constant, 63 (max signed 7b) * 8. Do variant 1.
+    // Could be tighter, as the last load is below frame_size_ offset.
+    if (fp_spill_mask_) {
+      int spill_offset = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+      UnSpillFPRegs(rs_sp, spill_offset, fp_spill_mask_);
+    }
+    if (core_spill_mask_) {
+      int spill_offset = frame_size_ - kArm64PointerSize * num_core_spills_;
+      UnSpillCoreRegs(rs_sp, spill_offset, core_spill_mask_);
+    }
+
+    OpRegImm64(kOpAdd, rs_sp, frame_size_);
+  } else {
+    // Second variant. Drop the frame part.
+    int drop = 0;
+    // TODO: Always use the first formula, as num_fp_spills would be zero?
+    if (fp_spill_mask_) {
+      drop = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+    } else {
+      drop = frame_size_ - kArm64PointerSize * num_core_spills_;
+    }
+
+    // Drop needs to be 16B aligned, so that SP keeps aligned.
+    drop = RoundDown(drop, 16);
+
+    OpRegImm64(kOpAdd, rs_sp, drop);
+
+    if (fp_spill_mask_) {
+      int offset = frame_size_ - drop - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+      UnSpillFPRegs(rs_sp, offset, fp_spill_mask_);
+    }
+    if (core_spill_mask_) {
+      int offset = frame_size_ - drop - kArm64PointerSize * num_core_spills_;
+      UnSpillCoreRegs(rs_sp, offset, core_spill_mask_);
+    }
+
+    OpRegImm64(kOpAdd, rs_sp, frame_size_ - drop);
   }
 
-  OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_);
+  // Finally return.
   NewLIR0(kA64Ret);
 }
 
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 68fa6f4..7d75da9 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -24,17 +24,12 @@
 
 namespace art {
 
-class Arm64Mir2Lir : public Mir2Lir {
+class Arm64Mir2Lir FINAL : public Mir2Lir {
  protected:
-  // If we detect a size error, FATAL out.
-  static constexpr bool kFailOnSizeError = false && kIsDebugBuild;
-  // If we detect a size error, report to LOG.
-  static constexpr bool kReportSizeError = false && kIsDebugBuild;
-
   // TODO: consolidate 64-bit target support.
   class InToRegStorageMapper {
    public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0;
     virtual ~InToRegStorageMapper() {}
   };
 
@@ -42,7 +37,7 @@
    public:
     InToRegStorageArm64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
     virtual ~InToRegStorageArm64Mapper() {}
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref);
    private:
     int cur_core_reg_;
     int cur_fp_reg_;
@@ -69,7 +64,9 @@
 
     // Required for target - codegen helpers.
     bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
-                            RegLocation rl_dest, int lit);
+                            RegLocation rl_dest, int lit) OVERRIDE;
+    bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
+                          RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
     RegStorage LoadHelper(ThreadOffset<4> offset) OVERRIDE;
@@ -81,7 +78,8 @@
         OVERRIDE;
     LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
                          OpSize size) OVERRIDE;
-    LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest) OVERRIDE;
+    LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale)
+        OVERRIDE;
     LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                              RegStorage r_dest, OpSize size) OVERRIDE;
     LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
@@ -92,7 +90,8 @@
                       VolatileKind is_volatile) OVERRIDE;
     LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
                           OpSize size) OVERRIDE;
-    LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src) OVERRIDE;
+    LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale)
+        OVERRIDE;
     LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                               RegStorage r_src, OpSize size) OVERRIDE;
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE;
@@ -100,7 +99,23 @@
                            int offset, int check_value, LIR* target) OVERRIDE;
 
     // Required for target - register utilities.
-    RegStorage TargetReg(SpecialTargetRegister reg);
+    RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
+    RegStorage TargetReg(SpecialTargetRegister symbolic_reg, bool is_wide) OVERRIDE {
+      RegStorage reg = TargetReg(symbolic_reg);
+      if (is_wide) {
+        return (reg.Is64Bit()) ? reg : As64BitReg(reg);
+      } else {
+        return (reg.Is32Bit()) ? reg : As32BitReg(reg);
+      }
+    }
+    RegStorage TargetRefReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
+      RegStorage reg = TargetReg(symbolic_reg);
+      return (reg.Is64Bit() ? reg : As64BitReg(reg));
+    }
+    RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
+      RegStorage reg = TargetReg(symbolic_reg);
+      return (reg.Is64Bit() ? reg : As64BitReg(reg));
+    }
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
@@ -114,8 +129,6 @@
     void ClobberCallerSave();
     void FreeCallTemps();
     void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, RegStorage reg);
-    void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
@@ -165,8 +178,11 @@
     void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                   RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
+    bool GenInlinedReverseBits(CallInfo* info, OpSize size);
+    bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
     bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-    bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+    bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
+    bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
@@ -203,7 +219,6 @@
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
 
     uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2);
     void UnSpillCoreRegs(RegStorage base, int offset, uint32_t reg_mask);
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 9814cb4..6594c4b 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -323,6 +323,16 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
+  RegLocation rl_src = info->args[0];
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_dest = InlineTargetWide(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62);
+  StoreValueWide(rl_dest, rl_result);
+  return true;
+}
+
 bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) {
   RegLocation rl_src = info->args[0];
   RegLocation rl_dest = InlineTargetWide(info);  // double place for result
@@ -333,4 +343,19 @@
   return true;
 }
 
+bool Arm64Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
+  DCHECK_EQ(cu_->instruction_set, kArm64);
+  int op = (is_min) ? kA64Fmin3fff : kA64Fmax3fff;
+  ArmOpcode wide = (is_double) ? FWIDE(0) : FUNWIDE(0);
+  RegLocation rl_src1 = info->args[0];
+  RegLocation rl_src2 = (is_double) ? info->args[2] : info->args[1];
+  rl_src1 = (is_double) ? LoadValueWide(rl_src1, kFPReg) : LoadValue(rl_src1, kFPReg);
+  rl_src2 = (is_double) ? LoadValueWide(rl_src2, kFPReg) : LoadValue(rl_src2, kFPReg);
+  RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
+  RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR3(op | wide, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg());
+  (is_double) ?  StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 1fdbe2d..e8f5cb9 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -19,6 +19,7 @@
 #include "arm64_lir.h"
 #include "codegen_arm64.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
 
@@ -181,6 +182,8 @@
 
   if (LIKELY(dest_is_fp == src_is_fp)) {
     if (LIKELY(!dest_is_fp)) {
+      DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
+
       // Core/core copy.
       // Copies involving the sp register require a different instruction.
       opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
@@ -210,14 +213,14 @@
       if (r_dest.IsDouble()) {
         opcode = kA64Fmov2Sx;
       } else {
-        DCHECK(r_src.IsSingle());
+        r_src = Check32BitReg(r_src);
         opcode = kA64Fmov2sw;
       }
     } else {
       if (r_src.IsDouble()) {
         opcode = kA64Fmov2xS;
       } else {
-        DCHECK(r_dest.Is32Bit());
+        r_dest = Check32BitReg(r_dest);
         opcode = kA64Fmov2ws;
       }
     }
@@ -271,14 +274,8 @@
 
 // Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
 bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
-  // TODO(Arm64): fix this for Arm64. Note: may be worth revisiting the magic table.
-  //   It should be possible subtracting one from all its entries, and using smaddl
-  //   to counteract this. The advantage is that integers should then be easier to
-  //   encode as logical immediates (0x55555555 rather than 0x55555556).
-  UNIMPLEMENTED(FATAL);
-
-  if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) {
+                                      RegLocation rl_src, RegLocation rl_dest, int lit) {
+  if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
     return false;
   }
   DividePattern pattern = magic_table[lit].pattern;
@@ -294,21 +291,24 @@
   LoadConstant(r_magic, magic_table[lit].magic);
   rl_src = LoadValue(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage r_hi = AllocTemp();
-  RegStorage r_lo = AllocTemp();
-  NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
+  RegStorage r_long_mul = AllocTemp();
+  NewLIR4(kA64Smaddl4xwwx, As64BitReg(r_long_mul).GetReg(),
+          r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
   switch (pattern) {
     case Divide3:
-      OpRegRegRegShift(kOpSub, rl_result.reg, r_hi, rl_src.reg, EncodeShift(kA64Asr, 31));
+      OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
+      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
       break;
     case Divide5:
-      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
+      OpRegRegImm(kOpAsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul),
+                  32 + magic_table[lit].shift);
+      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
       break;
     case Divide7:
-      OpRegReg(kOpAdd, r_hi, rl_src.reg);
-      OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
+      OpRegRegRegShift(kOpAdd, As64BitReg(r_long_mul), As64BitReg(rl_src.reg),
+                       As64BitReg(r_long_mul), EncodeShift(kA64Lsr, 32));
+      OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
+      OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
       break;
     default:
       LOG(FATAL) << "Unexpected pattern: " << pattern;
@@ -317,13 +317,59 @@
   return true;
 }
 
+// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
+// and store the result in 'rl_dest'.
+bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
+                                    RegLocation rl_src, RegLocation rl_dest, int lit) {
+  if (lit < 2) {
+    return false;
+  }
+  if (!IsPowerOfTwo(lit)) {
+    return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, lit);
+  }
+  int k = LowestSetBit(lit);
+  if (k >= 30) {
+    // Avoid special cases.
+    return false;
+  }
+  rl_src = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (is_div) {
+    RegStorage t_reg = AllocTemp();
+    if (lit == 2) {
+      // Division by 2 is by far the most common division by constant.
+      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsr, 32 - k));
+      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
+    } else {
+      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, 31);
+      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, EncodeShift(kA64Lsr, 32 - k));
+      OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
+    }
+  } else {
+    RegStorage t_reg = AllocTemp();
+    if (lit == 2) {
+      OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsr, 32 - k));
+      OpRegRegImm(kOpAnd, t_reg, t_reg, lit - 1);
+      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, EncodeShift(kA64Lsr, 32 - k));
+    } else {
+      RegStorage t_reg2 = AllocTemp();
+      OpRegRegImm(kOpAsr, t_reg, rl_src.reg, 31);
+      OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, EncodeShift(kA64Lsr, 32 - k));
+      OpRegRegImm(kOpAnd, t_reg2, t_reg2, lit - 1);
+      OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, EncodeShift(kA64Lsr, 32 - k));
+    }
+  }
+  StoreValue(rl_dest, rl_result);
+  return true;
+}
+
 bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
   LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
   return false;
 }
 
 RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation rl_src2, bool is_div, bool check_zero) {
+                                    RegLocation rl_src2, bool is_div, bool check_zero) {
   LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
   return rl_dest;
 }
@@ -388,26 +434,25 @@
   return true;
 }
 
-bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
   DCHECK_EQ(cu_->instruction_set, kArm64);
   RegLocation rl_src1 = info->args[0];
-  RegLocation rl_src2 = info->args[1];
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
-  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
+  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
+  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
+  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
-          rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
-  StoreValue(rl_dest, rl_result);
+  NewLIR4((is_long) ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc, rl_result.reg.GetReg(),
+          rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
+  (is_long) ?  StoreValueWide(rl_dest, rl_result) :StoreValue(rl_dest, rl_result);
   return true;
 }
 
 bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
   RegLocation rl_src_address = info->args[0];  // long address
-  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1] ?
-  RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);   // kRefReg
+  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
+  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
 
   LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
@@ -422,9 +467,8 @@
 
 bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
   RegLocation rl_src_address = info->args[0];  // long address
-  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
   RegLocation rl_src_value = info->args[2];  // [size] value
-  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);   // kRefReg
+  RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
 
   RegLocation rl_value;
   if (size == k64) {
@@ -451,11 +495,9 @@
 
 bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK_EQ(cu_->instruction_set, kArm64);
-  ArmOpcode wide = is_long ? WIDE(0) : UNWIDE(0);
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
   RegLocation rl_src_offset = info->args[2];  // long low
-  rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3] //TODO: do we really need this
   RegLocation rl_src_expected = info->args[4];  // int, long or Object
   // If is_long, high half is in info->args[5]
   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
@@ -464,7 +506,7 @@
 
   // Load Object and offset
   RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
-  RegLocation rl_offset = LoadValue(rl_src_offset, kRefReg);
+  RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
 
   RegLocation rl_new_value;
   RegLocation rl_expected;
@@ -496,28 +538,38 @@
   // result = tmp != 0;
 
   RegStorage r_tmp;
+  RegStorage r_tmp_stored;
+  RegStorage rl_new_value_stored = rl_new_value.reg;
+  ArmOpcode wide = UNWIDE(0);
   if (is_long) {
-    r_tmp = AllocTempWide();
+    r_tmp_stored = r_tmp = AllocTempWide();
+    wide = WIDE(0);
   } else if (is_object) {
+    // References use 64-bit registers, but are stored as compressed 32-bit values.
+    // This means r_tmp_stored != r_tmp.
     r_tmp = AllocTempRef();
+    r_tmp_stored = As32BitReg(r_tmp);
+    rl_new_value_stored = As32BitReg(rl_new_value_stored);
   } else {
-    r_tmp = AllocTemp();
+    r_tmp_stored = r_tmp = AllocTemp();
   }
 
+  RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp);
   LIR* loop = NewLIR0(kPseudoTargetLabel);
-  NewLIR2(kA64Ldaxr2rX | wide, r_tmp.GetReg(), r_ptr.GetReg());
+  NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg());
   OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
   LIR* early_exit = OpCondBranch(kCondNe, NULL);
-
-  NewLIR3(kA64Stlxr3wrX | wide, As32BitReg(r_tmp).GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg());
-  NewLIR3(kA64Cmp3RdT, As32BitReg(r_tmp).GetReg(), 0, ENCODE_NO_SHIFT);
+  NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg());
+  NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT);
   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
   OpCondBranch(kCondNe, loop);
 
+  LIR* exit_loop = NewLIR0(kPseudoTargetLabel);
+  early_exit->target = exit_loop;
+
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  LIR* exit =  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
-  early_exit->target = exit;
+  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
 
   FreeTemp(r_tmp);  // Now unneeded.
   FreeTemp(r_ptr);  // Now unneeded.
@@ -556,9 +608,7 @@
 
 // Test suspend flag, return target of taken suspend branch
 LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
-  // FIXME: Define rA64_SUSPEND as w19, when we do not need two copies of reserved register.
-  // Note: The opcode is not set as wide, so actually we are using the 32-bit version register.
-  NewLIR3(kA64Subs3rRd, rA64_SUSPEND, rA64_SUSPEND, 1);
+  NewLIR3(kA64Subs3rRd, rwSUSPEND, rwSUSPEND, 1);
   return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
 }
 
@@ -614,7 +664,7 @@
 
   rl_src = LoadValue(rl_src, kCoreReg);
   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 31);
+  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31);
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -773,7 +823,7 @@
       FreeTemp(reg_len);
     }
     if (rl_result.ref) {
-      LoadRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg);
+      LoadRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale);
     } else {
       LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size);
     }
@@ -870,7 +920,7 @@
       FreeTemp(reg_len);
     }
     if (rl_src.ref) {
-      StoreRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg);
+      StoreRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale);
     } else {
       StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size);
     }
@@ -1011,6 +1061,7 @@
     if (UNLIKELY(reg2 < 0)) {
       NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
     } else {
+      DCHECK_LE(offset, 63);
       NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
               RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
     }
@@ -1063,4 +1114,15 @@
   }
 }
 
+bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
+  ArmOpcode wide = (size == k64) ? WIDE(0) : UNWIDE(0);
+  RegLocation rl_src_i = info->args[0];
+  RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
+  NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
+  (size == k64) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index dfaa483..ef9dbdd 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -22,6 +22,7 @@
 
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 
 namespace art {
 
@@ -48,9 +49,9 @@
      rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
      rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
 static constexpr RegStorage reserved_regs_arr[] =
-    {rs_rA32_SUSPEND, rs_rA32_SELF, rs_rA32_SP, rs_rA32_LR, rs_wzr};
+    {rs_wSUSPEND, rs_wSELF, rs_wsp, rs_wLR, rs_wzr};
 static constexpr RegStorage reserved64_regs_arr[] =
-    {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR, rs_xzr};
+    {rs_xSUSPEND, rs_xSELF, rs_sp, rs_xLR, rs_xzr};
 // TUNING: Are there too many temp registers and too less promote target?
 // This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
 // Note: we are not able to call to C function directly if it un-match C ABI.
@@ -88,7 +89,7 @@
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnRef() {
-  return arm_loc_c_return;
+  return arm_loc_c_return_ref;
 }
 
 RegLocation Arm64Mir2Lir::LocCReturnWide() {
@@ -107,11 +108,11 @@
 RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
   RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
-    case kSelf: res_reg = rs_rA64_SELF; break;
-    case kSuspend: res_reg = rs_rA64_SUSPEND; break;
-    case kLr: res_reg =  rs_rA64_LR; break;
+    case kSelf: res_reg = rs_xSELF; break;
+    case kSuspend: res_reg = rs_xSUSPEND; break;
+    case kLr: res_reg =  rs_xLR; break;
     case kPc: res_reg = RegStorage::InvalidReg(); break;
-    case kSp: res_reg =  rs_rA64_SP; break;
+    case kSp: res_reg =  rs_sp; break;
     case kArg0: res_reg = rs_x0; break;
     case kArg1: res_reg = rs_x1; break;
     case kArg2: res_reg = rs_x2; break;
@@ -130,7 +131,7 @@
     case kFArg7: res_reg = rs_f7; break;
     case kRet0: res_reg = rs_x0; break;
     case kRet1: res_reg = rs_x1; break;
-    case kInvokeTgt: res_reg = rs_rA64_LR; break;
+    case kInvokeTgt: res_reg = rs_xLR; break;
     case kHiddenArg: res_reg = rs_x12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
@@ -644,33 +645,10 @@
  */
 
 void Arm64Mir2Lir::AdjustSpillMask() {
-  core_spill_mask_ |= (1 << rs_rA64_LR.GetRegNum());
+  core_spill_mask_ |= (1 << rs_xLR.GetRegNum());
   num_core_spills_++;
 }
 
-/*
- * Mark a callee-save fp register as promoted.
- */
-void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
-  DCHECK(reg.IsFloat());
-  int adjusted_reg_num = reg.GetRegNum() - A64_FP_CALLEE_SAVE_BASE;
-  // Ensure fp_vmap_table is large enough
-  int table_size = fp_vmap_table_.size();
-  for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
-    fp_vmap_table_.push_back(INVALID_VREG);
-  }
-  // Add the current mapping
-  fp_vmap_table_[adjusted_reg_num] = v_reg;
-  // Size of fp_vmap_table is high-water mark, use to set mask
-  num_fp_spills_ = fp_vmap_table_.size();
-  fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << A64_FP_CALLEE_SAVE_BASE;
-}
-
-void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
-  DCHECK(reg.IsDouble());
-  MarkPreservedSingle(v_reg, reg);
-}
-
 /* Clobber all regs that might be used by an external C call */
 void Arm64Mir2Lir::ClobberCallerSave() {
   Clobber(rs_x0);
@@ -789,13 +767,13 @@
 RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<8> offset) {
   // TODO(Arm64): use LoadWordDisp instead.
   //   e.g. LoadWordDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR);
-  LoadBaseDisp(rs_rA64_SELF, offset.Int32Value(), rs_rA64_LR, k64, kNotVolatile);
-  return rs_rA64_LR;
+  LoadBaseDisp(rs_xSELF, offset.Int32Value(), rs_xLR, k64, kNotVolatile);
+  return rs_xLR;
 }
 
 LIR* Arm64Mir2Lir::CheckSuspendUsingLoad() {
   RegStorage tmp = rs_x0;
-  LoadWordDisp(rs_rA64_SELF, Thread::ThreadSuspendTriggerOffset<8>().Int32Value(), tmp);
+  LoadWordDisp(rs_xSELF, Thread::ThreadSuspendTriggerOffset<8>().Int32Value(), tmp);
   LIR* load2 = LoadWordDisp(tmp, 0, tmp);
   return load2;
 }
@@ -816,7 +794,8 @@
 }
 
 RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or_float,
-                                                               bool is_wide) {
+                                                               bool is_wide,
+                                                               bool is_ref) {
   const RegStorage coreArgMappingToPhysicalReg[] =
       {rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7};
   const int coreArgMappingToPhysicalRegSize =
@@ -829,6 +808,7 @@
   RegStorage result = RegStorage::InvalidReg();
   if (is_double_or_float) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+      DCHECK(!is_ref);
       result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
@@ -842,7 +822,8 @@
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
         int res_reg = result.GetReg();
-        result = is_wide ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
+        DCHECK(!(is_wide && is_ref));
+        result = (is_wide || is_ref) ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
       }
     }
   }
@@ -861,14 +842,16 @@
   max_mapped_in_ = -1;
   is_there_stack_mapped_ = false;
   for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
+                                         arg_locs[in_position].wide,
+                                         arg_locs[in_position].ref);
      if (reg.Valid()) {
        mapping_[in_position] = reg;
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-       if (reg.Is64BitSolo()) {
+       if (arg_locs[in_position].wide) {
          // We covered 2 args, so skip the next one
          in_position++;
        }
+       max_mapped_in_ = std::max(max_mapped_in_, in_position);
      } else {
        is_there_stack_mapped_ = true;
      }
@@ -899,7 +882,7 @@
     int n = *num_gpr_used;
     if (n < 8) {
       *num_gpr_used = n + 1;
-      if (loc->wide) {
+      if (loc->wide || loc->ref) {
         *op_size = k64;
         return RegStorage::Solo64(n);
       } else {
@@ -960,35 +943,64 @@
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
   for (int i = 0; i < cu_->num_ins; i++) {
-    PromotionMap* v_map = &promotion_map_[start_vreg + i];
     RegLocation* t_loc = &ArgLocs[i];
     OpSize op_size;
     RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
 
     if (reg.Valid()) {
-      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+      // If arriving in register.
+
+      // We have already updated the arg location with promoted info
+      // so we can be based on it.
+      if (t_loc->location == kLocPhysReg) {
+        // Just copy it.
+        OpRegCopy(t_loc->reg, reg);
       } else {
-        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile);
-        if (reg.Is64Bit()) {
-          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
-            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
-          }
-          i += 1;
+        // Needs flush.
+        if (t_loc->ref) {
+          StoreRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile);
+        } else {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
+              kNotVolatile);
         }
       }
     } else {
-      // If arriving in frame & promoted
-      if (v_map->core_location == kLocPhysReg) {
-        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
-                     RegStorage::Solo32(v_map->core_reg));
-      }
-      if (v_map->fp_location == kLocPhysReg) {
-        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+      // If arriving in frame & promoted.
+      if (t_loc->location == kLocPhysReg) {
+        if (t_loc->ref) {
+          LoadRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
+        } else {
+          LoadBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg,
+                       t_loc->wide ? k64 : k32, kNotVolatile);
+        }
       }
     }
+    if (t_loc->wide) {
+      // Increment i to skip the next one.
+      i++;
+    }
+    //      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+    //        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+    //      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+    //        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
+    //      } else {
+    //        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile);
+    //        if (reg.Is64Bit()) {
+    //          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
+    //            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
+    //          }
+    //          i += 1;
+    //        }
+    //      }
+    //    } else {
+    //      // If arriving in frame & promoted
+    //      if (v_map->core_location == kLocPhysReg) {
+    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
+    //                     RegStorage::Solo32(v_map->core_reg));
+    //      }
+    //      if (v_map->fp_location == kLocPhysReg) {
+    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg));
+    //      }
   }
 }
 
@@ -1042,16 +1054,14 @@
   InToRegStorageMapping in_to_reg_storage_mapping;
   in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
   const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
-          in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
+  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1);
 
-  // Fisrt of all, check whether it make sense to use bulk copying
-  // Optimization is aplicable only for range case
+  // First of all, check whether it makes sense to use bulk copying.
+  // Bulk copying is done only for the range case.
   // TODO: make a constant instead of 2
   if (info->is_range && regs_left_to_pass_via_stack >= 2) {
     // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
+    for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) {
       RegLocation loc = info->args[next_arg];
       if (loc.wide) {
         loc = UpdateLocWide(loc);
@@ -1064,7 +1074,11 @@
         loc = UpdateLoc(loc);
         if (loc.location == kLocPhysReg) {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
+          if (loc.ref) {
+            StoreRefDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+          } else {
+            StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
+          }
         }
         next_arg++;
       }
@@ -1074,8 +1088,8 @@
     DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
 
     // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped,
+    int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1,
                                                    cu_->instruction_set);
 
     int current_src_offset = start_offset;
@@ -1094,7 +1108,7 @@
 
       // Instead of allocating a new temp, simply reuse one of the registers being used
       // for argument passing.
-      RegStorage temp = TargetReg(kArg3);
+      RegStorage temp = TargetReg(kArg3, false);
 
       // Now load the argument VR and store to the outs.
       Load32Disp(TargetReg(kSp), current_src_offset, temp);
@@ -1127,7 +1141,6 @@
               LoadValueDirectWideFixed(rl_arg, regWide);
               StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64, kNotVolatile);
             }
-            i++;
           } else {
             if (rl_arg.location == kLocPhysReg) {
               if (rl_arg.ref) {
@@ -1149,6 +1162,9 @@
         call_state = next_call_insn(cu_, info, call_state, target_method,
                                     vtable_idx, direct_code, direct_method, type);
       }
+      if (rl_arg.wide) {
+        i++;
+      }
     }
   }
 
@@ -1160,12 +1176,14 @@
     if (reg.Valid()) {
       if (rl_arg.wide) {
         LoadValueDirectWideFixed(rl_arg, reg);
-        i++;
       } else {
         LoadValueDirectFixed(rl_arg, reg);
       }
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
+                                  direct_code, direct_method, type);
+    }
+    if (rl_arg.wide) {
+      i++;
     }
   }
 
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 12c2f41..22a4ec4 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -17,6 +17,7 @@
 #include "arm64_lir.h"
 #include "codegen_arm64.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 
 namespace art {
 
@@ -554,7 +555,7 @@
   /* RegReg operations with SP in first parameter need extended register instruction form.
    * Only CMN and CMP instructions are implemented.
    */
-  if (r_dest_src1 == rs_rA64_SP) {
+  if (r_dest_src1 == rs_sp) {
     return OpRegRegExtend(op, r_dest_src1, r_src2, ENCODE_NO_EXTEND);
   } else {
     return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
@@ -732,7 +733,7 @@
         return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
       } else {
         log_imm = -1;
-        alt_opcode = (neg) ? kA64Add4RRre : kA64Sub4RRre;
+        alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre;
         info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
       }
       break;
@@ -875,7 +876,7 @@
 }
 
 int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
-  return ((shift_type & 0x3) << 7) | (amount & 0x1f);
+  return ((shift_type & 0x3) << 7) | (amount & 0x3f);
 }
 
 int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
@@ -891,9 +892,15 @@
   LIR* load;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
-  DCHECK(r_base.Is64Bit());
-  // TODO: need a cleaner handling of index registers here and throughout.
-  r_index = Check32BitReg(r_index);
+  r_base = Check64BitReg(r_base);
+
+  // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
+  //   register offset load (rather than doing the sign extension in a separate instruction).
+  if (r_index.Is32Bit()) {
+    // Assemble: ``sxtw xN, wN''.
+    r_index = As64BitReg(r_index);
+    NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
+  }
 
   if (r_dest.IsFloat()) {
     if (r_dest.IsDouble()) {
@@ -920,25 +927,29 @@
       opcode = WIDE(kA64Ldr4rXxG);
       expected_scale = 3;
       break;
-    case kSingle:
-    case k32:
+    case kSingle:     // Intentional fall-through.
+    case k32:         // Intentional fall-through.
     case kReference:
       r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldr4rXxG;
       expected_scale = 2;
       break;
     case kUnsignedHalf:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrh4wXxd;
       expected_scale = 1;
       break;
     case kSignedHalf:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrsh4rXxd;
       expected_scale = 1;
       break;
     case kUnsignedByte:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrb3wXx;
       break;
     case kSignedByte:
+      r_dest = Check32BitReg(r_dest);
       opcode = kA64Ldrsb3rXx;
       break;
     default:
@@ -959,8 +970,9 @@
   return load;
 }
 
-LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest) {
-  return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), 2, kReference);
+LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
+                                  int scale) {
+  return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), scale, kReference);
 }
 
 LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
@@ -968,9 +980,15 @@
   LIR* store;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
-  DCHECK(r_base.Is64Bit());
-  // TODO: need a cleaner handling of index registers here and throughout.
-  r_index = Check32BitReg(r_index);
+  r_base = Check64BitReg(r_base);
+
+  // TODO(Arm64): The sign extension of r_index should be carried out by using an extended
+  //   register offset store (rather than doing the sign extension in a separate instruction).
+  if (r_index.Is32Bit()) {
+    // Assemble: ``sxtw xN, wN''.
+    r_index = As64BitReg(r_index);
+    NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
+  }
 
   if (r_src.IsFloat()) {
     if (r_src.IsDouble()) {
@@ -1006,11 +1024,13 @@
       break;
     case kUnsignedHalf:
     case kSignedHalf:
+      r_src = Check32BitReg(r_src);
       opcode = kA64Strh4wXxd;
       expected_scale = 1;
       break;
     case kUnsignedByte:
     case kSignedByte:
+      r_src = Check32BitReg(r_src);
       opcode = kA64Strb3wXx;
       break;
     default:
@@ -1030,8 +1050,9 @@
   return store;
 }
 
-LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src) {
-  return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), 2, kReference);
+LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
+                                   int scale) {
+  return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), scale, kReference);
 }
 
 /*
@@ -1110,7 +1131,7 @@
 
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    DCHECK(r_base == rs_rA64_SP);
+    DCHECK(r_base == rs_sp);
     AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
   }
   return load;
@@ -1203,7 +1224,7 @@
 
   // TODO: In future, may need to differentiate Dalvik & spill accesses.
   if (mem_ref_type_ == ResourceMask::kDalvikReg) {
-    DCHECK(r_base == rs_rA64_SP);
+    DCHECK(r_base == rs_sp);
     AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
   }
   return store;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index f31b670..5870d22 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -272,7 +272,7 @@
     PromotionMap v_reg_map = promotion_map_[i];
     std::string buf;
     if (v_reg_map.fp_location == kLocPhysReg) {
-      StringAppendF(&buf, " : s%d", RegStorage::RegNum(v_reg_map.FpReg));
+      StringAppendF(&buf, " : s%d", RegStorage::RegNum(v_reg_map.fp_reg));
     }
 
     std::string buf3;
@@ -1184,7 +1184,8 @@
     // resolve these invokes to the same method, so we don't care which one we record here.
     data_target->operands[2] = type;
   }
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  // Loads a code pointer. Code from oat file can be mapped anywhere.
+  LIR* load_pc_rel = OpPcRelLoad(TargetPtrReg(symbolic_reg), data_target);
   AppendLIR(load_pc_rel);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
 }
@@ -1200,7 +1201,8 @@
     // resolve these invokes to the same method, so we don't care which one we record here.
     data_target->operands[2] = type;
   }
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  // Loads an ArtMethod pointer, which is a reference as it lives in the heap.
+  LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target);
   AppendLIR(load_pc_rel);
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
 }
@@ -1211,7 +1213,8 @@
   if (data_target == nullptr) {
     data_target = AddWordData(&class_literal_list_, type_idx);
   }
-  LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg), data_target);
+  // Loads a Class pointer, which is a reference as it lives in the heap.
+  LIR* load_pc_rel = OpPcRelLoad(TargetRefReg(symbolic_reg), data_target);
   AppendLIR(load_pc_rel);
 }
 
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 3f9379c..b699bd3 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -87,6 +87,7 @@
 };
 
 const char* const DexFileMethodInliner::kNameCacheNames[] = {
+    "reverse",               // kNameCacheReverse
     "reverseBytes",          // kNameCacheReverseBytes
     "doubleToRawLongBits",   // kNameCacheDoubleToRawLongBits
     "longBitsToDouble",      // kNameCacheLongBitsToDouble
@@ -139,8 +140,12 @@
     { kClassCacheShort, 1, { kClassCacheShort } },
     // kProtoCacheD_D
     { kClassCacheDouble, 1, { kClassCacheDouble } },
+    // kProtoCacheDD_D
+    { kClassCacheDouble, 2, { kClassCacheDouble, kClassCacheDouble } },
     // kProtoCacheF_F
     { kClassCacheFloat, 1, { kClassCacheFloat } },
+    // kProtoCacheFF_F
+    { kClassCacheFloat, 2, { kClassCacheFloat, kClassCacheFloat } },
     // kProtoCacheD_J
     { kClassCacheLong, 1, { kClassCacheDouble } },
     // kProtoCacheJ_D
@@ -171,6 +176,8 @@
     { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheByte } },
     // kProtoCacheJI_V
     { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheInt } },
+    // kProtoCacheJJ_J
+    { kClassCacheLong, 2, { kClassCacheLong, kClassCacheLong } },
     // kProtoCacheJJ_V
     { kClassCacheVoid, 2, { kClassCacheLong, kClassCacheLong } },
     // kProtoCacheJS_V
@@ -211,6 +218,8 @@
     INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, k32),
     INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, k64),
     INTRINSIC(JavaLangShort, ReverseBytes, S_S, kIntrinsicReverseBytes, kSignedHalf),
+    INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32),
+    INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64),
 
     INTRINSIC(JavaLangMath,       Abs, I_I, kIntrinsicAbsInt, 0),
     INTRINSIC(JavaLangStrictMath, Abs, I_I, kIntrinsicAbsInt, 0),
@@ -224,6 +233,19 @@
     INTRINSIC(JavaLangStrictMath, Min, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMin),
     INTRINSIC(JavaLangMath,       Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
     INTRINSIC(JavaLangStrictMath, Max, II_I, kIntrinsicMinMaxInt, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangMath,       Min, JJ_J, kIntrinsicMinMaxLong, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangStrictMath, Min, JJ_J, kIntrinsicMinMaxLong, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangMath,       Max, JJ_J, kIntrinsicMinMaxLong, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangStrictMath, Max, JJ_J, kIntrinsicMinMaxLong, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangMath,       Min, FF_F, kIntrinsicMinMaxFloat, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangStrictMath, Min, FF_F, kIntrinsicMinMaxFloat, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangMath,       Max, FF_F, kIntrinsicMinMaxFloat, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangStrictMath, Max, FF_F, kIntrinsicMinMaxFloat, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangMath,       Min, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangStrictMath, Min, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMin),
+    INTRINSIC(JavaLangMath,       Max, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMax),
+    INTRINSIC(JavaLangStrictMath, Max, DD_D, kIntrinsicMinMaxDouble, kIntrinsicFlagMax),
+
     INTRINSIC(JavaLangMath,       Sqrt, D_D, kIntrinsicSqrt, 0),
     INTRINSIC(JavaLangStrictMath, Sqrt, D_D, kIntrinsicSqrt, 0),
 
@@ -319,6 +341,8 @@
       return backend->GenInlinedFloatCvt(info);
     case kIntrinsicReverseBytes:
       return backend->GenInlinedReverseBytes(info, static_cast<OpSize>(intrinsic.d.data));
+    case kIntrinsicReverseBits:
+      return backend->GenInlinedReverseBits(info, static_cast<OpSize>(intrinsic.d.data));
     case kIntrinsicAbsInt:
       return backend->GenInlinedAbsInt(info);
     case kIntrinsicAbsLong:
@@ -328,7 +352,13 @@
     case kIntrinsicAbsDouble:
       return backend->GenInlinedAbsDouble(info);
     case kIntrinsicMinMaxInt:
-      return backend->GenInlinedMinMaxInt(info, intrinsic.d.data & kIntrinsicFlagMin);
+      return backend->GenInlinedMinMax(info, intrinsic.d.data & kIntrinsicFlagMin, false /* is_long */);
+    case kIntrinsicMinMaxLong:
+      return backend->GenInlinedMinMax(info, intrinsic.d.data & kIntrinsicFlagMin, true /* is_long */);
+    case kIntrinsicMinMaxFloat:
+      return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, false /* is_double */);
+    case kIntrinsicMinMaxDouble:
+      return backend->GenInlinedMinMaxFP(info, intrinsic.d.data & kIntrinsicFlagMin, true /* is_double */);
     case kIntrinsicSqrt:
       return backend->GenInlinedSqrt(info);
     case kIntrinsicCharAt:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 70693c2..c7a3b83 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -128,7 +128,8 @@
      */
     enum NameCacheIndex : uint8_t {  // unit8_t to save space, make larger if needed
       kNameCacheFirst = 0,
-      kNameCacheReverseBytes = kNameCacheFirst,
+      kNameCacheReverse =  kNameCacheFirst,
+      kNameCacheReverseBytes,
       kNameCacheDoubleToRawLongBits,
       kNameCacheLongBitsToDouble,
       kNameCacheFloatToRawIntBits,
@@ -183,7 +184,9 @@
       kProtoCacheJ_J,
       kProtoCacheS_S,
       kProtoCacheD_D,
+      kProtoCacheDD_D,
       kProtoCacheF_F,
+      kProtoCacheFF_F,
       kProtoCacheD_J,
       kProtoCacheJ_D,
       kProtoCacheF_I,
@@ -199,6 +202,7 @@
       kProtoCacheJ_S,
       kProtoCacheJB_V,
       kProtoCacheJI_V,
+      kProtoCacheJJ_J,
       kProtoCacheJJ_V,
       kProtoCacheJS_V,
       kProtoCacheObjectJII_Z,
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index b00cbeb..dafefea 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -127,14 +127,17 @@
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
 
-      m2l_->OpRegCopy(m2l_->TargetReg(kArg1), length_);
-      m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
+      RegStorage arg1_32 = m2l_->TargetReg(kArg1, false);
+      RegStorage arg0_32 = m2l_->TargetReg(kArg0, false);
+
+      m2l_->OpRegCopy(arg1_32, length_);
+      m2l_->LoadConstant(arg0_32, index_);
       if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
-                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+                                      arg0_32, arg1_32, true);
       } else {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+                                      arg0_32, arg1_32, true);
       }
     }
 
@@ -365,7 +368,7 @@
       if (!use_direct_type_ptr) {
         mir_to_lir->LoadClassType(type_idx, kArg0);
         func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayResolved);
-        mir_to_lir->CallRuntimeHelperRegMethodRegLocation(func_offset, mir_to_lir->TargetReg(kArg0),
+        mir_to_lir->CallRuntimeHelperRegMethodRegLocation(func_offset, mir_to_lir->TargetReg(kArg0, false),
                                                           rl_src, true);
       } else {
         // Use the direct pointer.
@@ -428,8 +431,8 @@
   } else {
     GenFilledNewArrayCall<4>(this, cu_, elems, type_idx);
   }
-  FreeTemp(TargetReg(kArg2));
-  FreeTemp(TargetReg(kArg1));
+  FreeTemp(TargetReg(kArg2, false));
+  FreeTemp(TargetReg(kArg1, false));
   /*
    * NOTE: the implicit target for Instruction::FILLED_NEW_ARRAY is the
    * return region.  Because AllocFromCode placed the new array
@@ -437,7 +440,8 @@
    * added, it may be necessary to additionally copy all return
    * values to a home location in thread-local storage
    */
-  LockTemp(TargetReg(kRet0));
+  RegStorage ref_reg = TargetRefReg(kRet0);
+  LockTemp(ref_reg);
 
   // TODO: use the correct component size, currently all supported types
   // share array alignment with ints (see comment at head of function)
@@ -457,7 +461,7 @@
       RegLocation loc = UpdateLoc(info->args[i]);
       if (loc.location == kLocPhysReg) {
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+        Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
       }
     }
     /*
@@ -473,11 +477,11 @@
     switch (cu_->instruction_set) {
       case kThumb2:
       case kArm64:
-        r_val = TargetReg(kLr);
+        r_val = TargetReg(kLr, false);
         break;
       case kX86:
       case kX86_64:
-        FreeTemp(TargetReg(kRet0));
+        FreeTemp(ref_reg);
         r_val = AllocTemp();
         break;
       case kMips:
@@ -487,9 +491,9 @@
     }
     // Set up source pointer
     RegLocation rl_first = info->args[0];
-    OpRegRegImm(kOpAdd, r_src, TargetReg(kSp), SRegOffset(rl_first.s_reg_low));
+    OpRegRegImm(kOpAdd, r_src, TargetPtrReg(kSp), SRegOffset(rl_first.s_reg_low));
     // Set up the target pointer
-    OpRegRegImm(kOpAdd, r_dst, TargetReg(kRet0),
+    OpRegRegImm(kOpAdd, r_dst, ref_reg,
                 mirror::Array::DataOffset(component_size).Int32Value());
     // Set up the loop counter (known to be > 0)
     LoadConstant(r_idx, elems - 1);
@@ -507,14 +511,14 @@
     OpDecAndBranch(kCondGe, r_idx, target);
     if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
       // Restore the target pointer
-      OpRegRegImm(kOpAdd, TargetReg(kRet0), r_dst,
+      OpRegRegImm(kOpAdd, ref_reg, r_dst,
                   -mirror::Array::DataOffset(component_size).Int32Value());
     }
   } else if (!info->is_range) {
     // TUNING: interleave
     for (int i = 0; i < elems; i++) {
       RegLocation rl_arg = LoadValue(info->args[i], kCoreReg);
-      Store32Disp(TargetReg(kRet0),
+      Store32Disp(ref_reg,
                   mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg);
       // If the LoadValue caused a temp to be allocated, free it
       if (IsTemp(rl_arg.reg)) {
@@ -549,7 +553,7 @@
                                  storage_index_, true);
     }
     // Copy helper's result into r_base, a no-op on all but MIPS.
-    m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0));
+    m2l_->OpRegCopy(r_base_,  m2l_->TargetRefReg(kRet0));
 
     m2l_->OpUnconditionalBranch(cont_);
   }
@@ -597,10 +601,10 @@
       // May do runtime call so everything to home locations.
       FlushAllRegs();
       // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetReg(kArg1);
+      RegStorage r_method = TargetRefReg(kArg1);
       LockTemp(r_method);
       LoadCurrMethodDirect(r_method);
-      r_base = TargetReg(kArg0);
+      r_base = TargetRefReg(kArg0);
       LockTemp(r_base);
       LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
                   kNotVolatile);
@@ -614,7 +618,7 @@
         // The slow path is invoked if the r_base is NULL or the class pointed
         // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-        RegStorage r_tmp = TargetReg(kArg2);
+        RegStorage r_tmp = TargetReg(kArg2, false);
         LockTemp(r_tmp);
         LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
@@ -690,10 +694,10 @@
       // May do runtime call so everything to home locations.
       FlushAllRegs();
       // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetReg(kArg1);
+      RegStorage r_method = TargetRefReg(kArg1);
       LockTemp(r_method);
       LoadCurrMethodDirect(r_method);
-      r_base = TargetReg(kArg0);
+      r_base = TargetRefReg(kArg0);
       LockTemp(r_base);
       LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
                   kNotVolatile);
@@ -707,7 +711,7 @@
         // The slow path is invoked if the r_base is NULL or the class pointed
         // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-        RegStorage r_tmp = TargetReg(kArg2);
+        RegStorage r_tmp = TargetReg(kArg2, false);
         LockTemp(r_tmp);
         LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
                                           mirror::Class::StatusOffset().Int32Value(),
@@ -749,11 +753,12 @@
     } else {
       GenSgetCall<4>(this, is_long_or_double, is_object, &field_info);
     }
+    // FIXME: pGetXXStatic always return an int or int64 regardless of rl_dest.fp.
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
+      RegLocation rl_result = GetReturnWide(kCoreReg);
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
+      RegLocation rl_result = GetReturn(rl_dest.ref ? kRefReg : kCoreReg);
       StoreValue(rl_dest, rl_result);
     }
   }
@@ -900,12 +905,12 @@
 
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
-  DCHECK(!cu_->target64 || rl_method.reg.Is64Bit());
+  CheckRegLocation(rl_method);
   RegStorage res_reg = AllocTempRef();
   RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
-                                                   *cu_->dex_file,
-                                                   type_idx)) {
+                                                        *cu_->dex_file,
+                                                        type_idx)) {
     // Call out to helper which resolves type and verifies access.
     // Resolved type returned in kRet0.
     if (cu_->target64) {
@@ -950,7 +955,7 @@
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
                                                       rl_method_.reg, true);
           }
-          m2l_->OpRegCopy(rl_result_.reg,  m2l_->TargetReg(kRet0));
+          m2l_->OpRegCopy(rl_result_.reg,  m2l_->TargetRefReg(kRet0));
 
           m2l_->OpUnconditionalBranch(cont_);
         }
@@ -990,15 +995,15 @@
       DCHECK(!IsTemp(rl_method.reg));
       r_method = rl_method.reg;
     } else {
-      r_method = TargetReg(kArg2);
+      r_method = TargetRefReg(kArg2);
       LoadCurrMethodDirect(r_method);
     }
     LoadRefDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(),
-                TargetReg(kArg0), kNotVolatile);
+                TargetRefReg(kArg0), kNotVolatile);
 
     // Might call out to helper, which will return resolved string in kRet0
-    LoadRefDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0), kNotVolatile);
-    LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL);
+    LoadRefDisp(TargetRefReg(kArg0), offset_of_string, TargetRefReg(kRet0), kNotVolatile);
+    LIR* fromfast = OpCmpImmBranch(kCondEq, TargetRefReg(kRet0), 0, NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
     {
@@ -1067,10 +1072,10 @@
         mir_to_lir->LoadClassType(type_idx, kArg0);
         if (!is_type_initialized) {
           func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectResolved);
-          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetReg(kArg0), true);
+          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetRefReg(kArg0), true);
         } else {
           func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectInitialized);
-          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetReg(kArg0), true);
+          mir_to_lir->CallRuntimeHelperRegMethod(func_offset, mir_to_lir->TargetRefReg(kArg0), true);
         }
       } else {
         // Use the direct pointer.
@@ -1128,8 +1133,9 @@
   RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
-  if (result_reg == object.reg) {
+  if (IsSameReg(result_reg, object.reg)) {
     result_reg = AllocTypedTemp(false, kCoreReg);
+    DCHECK(!IsSameReg(result_reg, object.reg));
   }
   LoadConstant(result_reg, 0);     // assume false
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
@@ -1188,8 +1194,9 @@
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
-  LoadCurrMethodDirect(TargetReg(kArg1));  // kArg1 <= current Method*
-  RegStorage class_reg = TargetReg(kArg2);  // kArg2 will hold the Class*
+  RegStorage method_reg = TargetRefReg(kArg1);
+  LoadCurrMethodDirect(method_reg);   // kArg1 <= current Method*
+  RegStorage class_reg = TargetRefReg(kArg2);  // kArg2 will hold the Class*
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kArg0
@@ -1200,16 +1207,16 @@
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     }
-    OpRegCopy(class_reg, TargetReg(kRet0));  // Align usage with fast path
-    LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
+    OpRegCopy(class_reg, TargetRefReg(kRet0));  // Align usage with fast path
+    LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  // kArg0 <= ref
   } else if (use_declaring_class) {
-    LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+    LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  // kArg0 <= ref
+    LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
   } else {
     // Load dex cache entry into class_reg (kArg2)
-    LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+    LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  // kArg0 <= ref
+    LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                 class_reg, kNotVolatile);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
     LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
@@ -1223,38 +1230,38 @@
       } else {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
       }
-      OpRegCopy(TargetReg(kArg2), TargetReg(kRet0));  // Align usage with fast path
-      LoadValueDirectFixed(rl_src, TargetReg(kArg0));  /* reload Ref */
+      OpRegCopy(TargetRefReg(kArg2), TargetRefReg(kRet0));  // Align usage with fast path
+      LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  /* reload Ref */
       // Rejoin code paths
       LIR* hop_target = NewLIR0(kPseudoTargetLabel);
       hop_branch->target = hop_target;
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
-  RegLocation rl_result = GetReturn(kRefReg);
+  RegLocation rl_result = GetReturn(kCoreReg);
   if (cu_->instruction_set == kMips) {
     // On MIPS rArg0 != rl_result, place false in result if branch is taken.
     LoadConstant(rl_result.reg, 0);
   }
-  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
+  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetRefReg(kArg0), 0, NULL);
 
   /* load object->klass_ */
   DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1),
+  LoadRefDisp(TargetRefReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetRefReg(kArg1),
               kNotVolatile);
   /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class */
   LIR* branchover = NULL;
   if (type_known_final) {
     // rl_result == ref == null == 0.
     if (cu_->instruction_set == kThumb2) {
-      OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2));  // Same?
+      OpRegReg(kOpCmp, TargetRefReg(kArg1), TargetRefReg(kArg2));  // Same?
       LIR* it = OpIT(kCondEq, "E");   // if-convert the test
       LoadConstant(rl_result.reg, 1);     // .eq case - load true
       LoadConstant(rl_result.reg, 0);     // .ne case - load false
       OpEndIT(it);
     } else {
       LoadConstant(rl_result.reg, 0);     // ne case - load false
-      branchover = OpCmpBranch(kCondNe, TargetReg(kArg1), TargetReg(kArg2), NULL);
+      branchover = OpCmpBranch(kCondNe, TargetRefReg(kArg1), TargetRefReg(kArg2), NULL);
       LoadConstant(rl_result.reg, 1);     // eq case - load true
     }
   } else {
@@ -1265,11 +1272,11 @@
       LIR* it = nullptr;
       if (!type_known_abstract) {
       /* Uses conditional nullification */
-        OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2));  // Same?
+        OpRegReg(kOpCmp, TargetRefReg(kArg1), TargetRefReg(kArg2));  // Same?
         it = OpIT(kCondEq, "EE");   // if-convert the test
-        LoadConstant(TargetReg(kArg0), 1);     // .eq case - load true
+        LoadConstant(TargetReg(kArg0, false), 1);     // .eq case - load true
       }
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));    // .ne case - arg0 <= class
+      OpRegCopy(TargetRefReg(kArg0), TargetRefReg(kArg2));    // .ne case - arg0 <= class
       OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
       if (it != nullptr) {
         OpEndIT(it);
@@ -1279,12 +1286,12 @@
       if (!type_known_abstract) {
         /* Uses branchovers */
         LoadConstant(rl_result.reg, 1);     // assume true
-        branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
+        branchover = OpCmpBranch(kCondEq, TargetRefReg(kArg1), TargetRefReg(kArg2), NULL);
       }
       RegStorage r_tgt = cu_->target64 ?
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial)) :
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));    // .ne case - arg0 <= class
+      OpRegCopy(TargetRefReg(kArg0), TargetRefReg(kArg2));    // .ne case - arg0 <= class
       OpReg(kOpBlx, r_tgt);    // .ne case: helper(class, ref->class)
       FreeTemp(r_tgt);
     }
@@ -1338,26 +1345,27 @@
   FlushAllRegs();
   // May generate a call - use explicit registers
   LockCallTemps();
-  LoadCurrMethodDirect(TargetReg(kArg1));  // kArg1 <= current Method*
-  RegStorage class_reg = TargetReg(kArg2);  // kArg2 will hold the Class*
+  RegStorage method_reg = TargetRefReg(kArg1);
+  LoadCurrMethodDirect(method_reg);  // kArg1 <= current Method*
+  RegStorage class_reg = TargetRefReg(kArg2);  // kArg2 will hold the Class*
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kRet0
     // InitializeTypeAndVerifyAccess(idx, method)
     if (cu_->target64) {
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
-                              type_idx, TargetReg(kArg1), true);
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
     } else {
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
-                              type_idx, TargetReg(kArg1), true);
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
     }
-    OpRegCopy(class_reg, TargetReg(kRet0));  // Align usage with fast path
+    OpRegCopy(class_reg, TargetRefReg(kRet0));  // Align usage with fast path
   } else if (use_declaring_class) {
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+    LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
   } else {
     // Load dex cache entry into class_reg (kArg2)
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+    LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                 class_reg, kNotVolatile);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
     LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
@@ -1382,12 +1390,12 @@
           // InitializeTypeFromCode(idx, method)
           if (m2l_->cu_->target64) {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
-                                          m2l_->TargetReg(kArg1), true);
+                                          m2l_->TargetRefReg(kArg1), true);
           } else {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx_,
-                                                      m2l_->TargetReg(kArg1), true);
+                                          m2l_->TargetRefReg(kArg1), true);
           }
-          m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0));  // Align usage with fast path
+          m2l_->OpRegCopy(class_reg_, m2l_->TargetRefReg(kRet0));  // Align usage with fast path
           m2l_->OpUnconditionalBranch(cont_);
         }
 
@@ -1400,7 +1408,7 @@
     }
   }
   // At this point, class_reg (kArg2) has class
-  LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
+  LoadValueDirectFixed(rl_src, TargetRefReg(kArg0));  // kArg0 <= ref
 
   // Slow path for the case where the classes are not equal.  In this case we need
   // to call a helper function to do the check.
@@ -1414,15 +1422,15 @@
       GenerateTargetLabel();
 
       if (load_) {
-        m2l_->LoadRefDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
-                          m2l_->TargetReg(kArg1), kNotVolatile);
+        m2l_->LoadRefDisp(m2l_->TargetRefReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                          m2l_->TargetRefReg(kArg1), kNotVolatile);
       }
       if (m2l_->cu_->target64) {
-        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pCheckCast), m2l_->TargetReg(kArg2),
-                                      m2l_->TargetReg(kArg1), true);
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pCheckCast), m2l_->TargetRefReg(kArg2),
+                                      m2l_->TargetRefReg(kArg1), true);
       } else {
-        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pCheckCast), m2l_->TargetReg(kArg2),
-                                              m2l_->TargetReg(kArg1), true);
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pCheckCast), m2l_->TargetRefReg(kArg2),
+                                              m2l_->TargetRefReg(kArg1), true);
       }
 
       m2l_->OpUnconditionalBranch(cont_);
@@ -1434,7 +1442,7 @@
 
   if (type_known_abstract) {
     // Easier case, run slow path if target is non-null (slow path will load from target)
-    LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kArg0), 0, NULL);
+    LIR* branch = OpCmpImmBranch(kCondNe, TargetRefReg(kArg0), 0, nullptr);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
     AddSlowPath(new (arena_) SlowPath(this, branch, cont, true));
   } else {
@@ -1443,13 +1451,13 @@
     // slow path if the classes are not equal.
 
     /* Null is OK - continue */
-    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
+    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetRefReg(kArg0), 0, nullptr);
     /* load object->klass_ */
     DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-    LoadRefDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1),
-                kNotVolatile);
+    LoadRefDisp(TargetRefReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                TargetRefReg(kArg1), kNotVolatile);
 
-    LIR* branch2 = OpCmpBranch(kCondNe, TargetReg(kArg1), class_reg, NULL);
+    LIR* branch2 = OpCmpBranch(kCondNe, TargetRefReg(kArg1), class_reg, nullptr);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
     // Add the slow path that will not perform load since this is already done.
@@ -1665,13 +1673,13 @@
     // If we haven't already generated the code use the callout function.
     if (!done) {
       FlushAllRegs();   /* Send everything to home location */
-      LoadValueDirectFixed(rl_src2, TargetReg(kArg1));
+      LoadValueDirectFixed(rl_src2, TargetReg(kArg1, false));
       RegStorage r_tgt = cu_->target64 ?
           CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod)) :
           CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod));
-      LoadValueDirectFixed(rl_src1, TargetReg(kArg0));
+      LoadValueDirectFixed(rl_src1, TargetReg(kArg0, false));
       if (check_zero) {
-        GenDivZeroCheck(TargetReg(kArg1));
+        GenDivZeroCheck(TargetReg(kArg1, false));
       }
       // NOTE: callout here is not a safepoint.
       if (cu_->target64) {
@@ -1935,13 +1943,13 @@
 
       if (!done) {
         FlushAllRegs();   /* Everything to home location. */
-        LoadValueDirectFixed(rl_src, TargetReg(kArg0));
-        Clobber(TargetReg(kArg0));
+        LoadValueDirectFixed(rl_src, TargetReg(kArg0, false));
+        Clobber(TargetReg(kArg0, false));
         if (cu_->target64) {
-          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), TargetReg(kArg0), lit,
+          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), TargetReg(kArg0, false), lit,
                                   false);
         } else {
-          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), TargetReg(kArg0), lit,
+          CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), TargetReg(kArg0, false), lit,
                                   false);
         }
         if (is_div)
@@ -1975,7 +1983,7 @@
   bool call_out = false;
   bool check_zero = false;
   ThreadOffset<pointer_size> func_offset(-1);
-  int ret_reg = mir_to_lir->TargetReg(kRet0).GetReg();
+  int ret_reg = mir_to_lir->TargetReg(kRet0, false).GetReg();
 
   switch (opcode) {
     case Instruction::NOT_LONG:
@@ -2023,7 +2031,7 @@
         return;
       } else {
         call_out = true;
-        ret_reg = mir_to_lir->TargetReg(kRet0).GetReg();
+        ret_reg = mir_to_lir->TargetReg(kRet0, false).GetReg();
         func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLmul);
       }
       break;
@@ -2035,7 +2043,7 @@
       }
       call_out = true;
       check_zero = true;
-      ret_reg = mir_to_lir->TargetReg(kRet0).GetReg();
+      ret_reg = mir_to_lir->TargetReg(kRet0, false).GetReg();
       func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLdiv);
       break;
     case Instruction::REM_LONG:
@@ -2048,8 +2056,8 @@
       check_zero = true;
       func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLmod);
       /* NOTE - for Arm, result is in kArg2/kArg3 instead of kRet0/kRet1 */
-      ret_reg = (cu->instruction_set == kThumb2) ? mir_to_lir->TargetReg(kArg2).GetReg() :
-          mir_to_lir->TargetReg(kRet0).GetReg();
+      ret_reg = (cu->instruction_set == kThumb2) ? mir_to_lir->TargetReg(kArg2, false).GetReg() :
+          mir_to_lir->TargetReg(kRet0, false).GetReg();
       break;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
@@ -2092,14 +2100,11 @@
   } else {
     mir_to_lir->FlushAllRegs();   /* Send everything to home location */
     if (check_zero) {
-      RegStorage r_tmp1 = RegStorage::MakeRegPair(mir_to_lir->TargetReg(kArg0),
-                                                  mir_to_lir->TargetReg(kArg1));
-      RegStorage r_tmp2 = RegStorage::MakeRegPair(mir_to_lir->TargetReg(kArg2),
-                                                  mir_to_lir->TargetReg(kArg3));
+      RegStorage r_tmp1 = mir_to_lir->TargetReg(kArg0, kArg1);
+      RegStorage r_tmp2 = mir_to_lir->TargetReg(kArg2, kArg3);
       mir_to_lir->LoadValueDirectWideFixed(rl_src2, r_tmp2);
       RegStorage r_tgt = mir_to_lir->CallHelperSetup(func_offset);
-      mir_to_lir->GenDivZeroCheckWide(RegStorage::MakeRegPair(mir_to_lir->TargetReg(kArg2),
-                                                              mir_to_lir->TargetReg(kArg3)));
+      mir_to_lir->GenDivZeroCheckWide(mir_to_lir->TargetReg(kArg2, kArg3));
       mir_to_lir->LoadValueDirectWideFixed(rl_src1, r_tmp1);
       // NOTE: callout here is not a safepoint
       mir_to_lir->CallHelper(r_tgt, func_offset, false /* not safepoint */);
@@ -2107,7 +2112,7 @@
       mir_to_lir->CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
     }
     // Adjust return regs in to handle case of rem returning kArg2/kArg3
-    if (ret_reg == mir_to_lir->TargetReg(kRet0).GetReg())
+    if (ret_reg == mir_to_lir->TargetReg(kRet0, false).GetReg())
       rl_result = mir_to_lir->GetReturnWide(kCoreReg);
     else
       rl_result = mir_to_lir->GetReturnWideAlt();
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
old mode 100644
new mode 100755
index 008ebfb..4cc1375
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -132,7 +132,7 @@
 template <size_t pointer_size>
 void Mir2Lir::CallRuntimeHelperImm(ThreadOffset<pointer_size> helper_offset, int arg0, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadConstant(TargetReg(kArg0), arg0);
+  LoadConstant(TargetReg(kArg0, false), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -142,7 +142,7 @@
 void Mir2Lir::CallRuntimeHelperReg(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                    bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  OpRegCopy(TargetReg(kArg0), arg0);
+  OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -153,13 +153,13 @@
                                            RegLocation arg0, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   if (arg0.wide == 0) {
-    LoadValueDirectFixed(arg0, TargetReg(kArg0));
+    LoadValueDirectFixed(arg0, TargetReg(arg0.fp ? kFArg0 : kArg0, arg0));
   } else {
     RegStorage r_tmp;
     if (cu_->target64) {
-      r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
+      r_tmp = TargetReg(kArg0, true);
     } else {
-      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+      r_tmp = TargetReg(arg0.fp ? kFArg0 : kArg0, arg0.fp ? kFArg1 : kArg1);
     }
     LoadValueDirectWideFixed(arg0, r_tmp);
   }
@@ -172,8 +172,8 @@
 void Mir2Lir::CallRuntimeHelperImmImm(ThreadOffset<pointer_size> helper_offset, int arg0, int arg1,
                                       bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadConstant(TargetReg(kArg0), arg0);
-  LoadConstant(TargetReg(kArg1), arg1);
+  LoadConstant(TargetReg(kArg0, false), arg0);
+  LoadConstant(TargetReg(kArg1, false), arg1);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -184,17 +184,22 @@
                                               RegLocation arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   if (arg1.wide == 0) {
-    LoadValueDirectFixed(arg1, TargetReg(kArg1));
+    LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
   } else {
     RegStorage r_tmp;
     if (cu_->target64) {
-      r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+      r_tmp = TargetReg(kArg1, true);
     } else {
-      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+      if (cu_->instruction_set == kMips) {
+        // skip kArg1 for stack alignment.
+        r_tmp = TargetReg(kArg2, kArg3);
+      } else {
+        r_tmp = TargetReg(kArg1, kArg2);
+      }
     }
     LoadValueDirectWideFixed(arg1, r_tmp);
   }
-  LoadConstant(TargetReg(kArg0), arg0);
+  LoadConstant(TargetReg(kArg0, false), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -205,8 +210,9 @@
 void Mir2Lir::CallRuntimeHelperRegLocationImm(ThreadOffset<pointer_size> helper_offset,
                                               RegLocation arg0, int arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadValueDirectFixed(arg0, TargetReg(kArg0));
-  LoadConstant(TargetReg(kArg1), arg1);
+  DCHECK(!arg0.wide);
+  LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
+  LoadConstant(TargetReg(kArg1, false), arg1);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -217,8 +223,8 @@
 void Mir2Lir::CallRuntimeHelperImmReg(ThreadOffset<pointer_size> helper_offset, int arg0,
                                       RegStorage arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  OpRegCopy(TargetReg(kArg1), arg1);
-  LoadConstant(TargetReg(kArg0), arg0);
+  OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), arg1);
+  LoadConstant(TargetReg(kArg0, false), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -228,8 +234,8 @@
 void Mir2Lir::CallRuntimeHelperRegImm(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                       int arg1, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  OpRegCopy(TargetReg(kArg0), arg0);
-  LoadConstant(TargetReg(kArg1), arg1);
+  OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
+  LoadConstant(TargetReg(kArg1, false), arg1);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -239,8 +245,8 @@
 void Mir2Lir::CallRuntimeHelperImmMethod(ThreadOffset<pointer_size> helper_offset, int arg0,
                                          bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadCurrMethodDirect(TargetReg(kArg1));
-  LoadConstant(TargetReg(kArg0), arg0);
+  LoadCurrMethodDirect(TargetRefReg(kArg1));
+  LoadConstant(TargetReg(kArg0, false), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -250,11 +256,11 @@
 void Mir2Lir::CallRuntimeHelperRegMethod(ThreadOffset<pointer_size> helper_offset, RegStorage arg0,
                                          bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK_NE(TargetReg(kArg1).GetReg(), arg0.GetReg());
-  if (TargetReg(kArg0) != arg0) {
-    OpRegCopy(TargetReg(kArg0), arg0);
+  DCHECK(!IsSameReg(TargetReg(kArg1, arg0.Is64Bit()), arg0));
+  if (TargetReg(kArg0, arg0.Is64Bit()).NotExactlyEquals(arg0)) {
+    OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
   }
-  LoadCurrMethodDirect(TargetReg(kArg1));
+  LoadCurrMethodDirect(TargetRefReg(kArg1));
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -265,12 +271,12 @@
                                                     RegStorage arg0, RegLocation arg2,
                                                     bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK_NE(TargetReg(kArg1).GetReg(), arg0.GetReg());
-  if (TargetReg(kArg0) != arg0) {
-    OpRegCopy(TargetReg(kArg0), arg0);
+  DCHECK(!IsSameReg(TargetReg(kArg1, arg0.Is64Bit()), arg0));
+  if (TargetReg(kArg0, arg0.Is64Bit()).NotExactlyEquals(arg0)) {
+    OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
   }
-  LoadCurrMethodDirect(TargetReg(kArg1));
-  LoadValueDirectFixed(arg2, TargetReg(kArg2));
+  LoadCurrMethodDirect(TargetRefReg(kArg1));
+  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -282,79 +288,72 @@
                                                       RegLocation arg0, RegLocation arg1,
                                                       bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  if (arg0.wide == 0) {
-    LoadValueDirectFixed(arg0, arg0.fp ? TargetReg(kFArg0) : TargetReg(kArg0));
-    if (arg1.wide == 0) {
-      if (cu_->instruction_set == kMips) {
-        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1));
-      } else if (cu_->instruction_set == kArm64) {
-        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
-      } else if (cu_->instruction_set == kX86_64) {
-        if (arg0.fp) {
-          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg0));
-        } else {
-          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg0) : TargetReg(kArg1));
-        }
-      } else {
-        LoadValueDirectFixed(arg1, TargetReg(kArg1));
-      }
+  if (cu_->instruction_set == kArm64 || cu_->instruction_set == kX86_64) {
+    RegStorage arg0_reg = TargetReg((arg0.fp) ? kFArg0 : kArg0, arg0);
+
+    RegStorage arg1_reg;
+    if (arg1.fp == arg0.fp) {
+      arg1_reg = TargetReg((arg1.fp) ? kFArg1 : kArg1, arg1);
     } else {
-      if (cu_->instruction_set == kMips) {
-        RegStorage r_tmp;
-        if (arg1.fp) {
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
-        } else {
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
-        }
-        LoadValueDirectWideFixed(arg1, r_tmp);
-      } else {
-        RegStorage r_tmp;
-        if (cu_->target64) {
-          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
-        } else {
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
-        }
-        LoadValueDirectWideFixed(arg1, r_tmp);
-      }
+      arg1_reg = TargetReg((arg1.fp) ? kFArg0 : kArg0, arg1);
+    }
+
+    if (arg0.wide == 0) {
+      LoadValueDirectFixed(arg0, arg0_reg);
+    } else {
+      LoadValueDirectWideFixed(arg0, arg0_reg);
+    }
+
+    if (arg1.wide == 0) {
+      LoadValueDirectFixed(arg1, arg1_reg);
+    } else {
+      LoadValueDirectWideFixed(arg1, arg1_reg);
     }
   } else {
-    RegStorage r_tmp;
-    if (arg0.fp) {
-      if (cu_->target64) {
-        r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
+    DCHECK(!cu_->target64);
+    if (arg0.wide == 0) {
+      LoadValueDirectFixed(arg0, arg0.fp ? TargetReg(kFArg0, false) : TargetReg(kArg0, false));
+      if (arg1.wide == 0) {
+        if (cu_->instruction_set == kMips) {
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2, false) : TargetReg(kArg1, false));
+        } else {
+          LoadValueDirectFixed(arg1, TargetReg(kArg1, false));
+        }
       } else {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
-      }
-    } else {
-      if (cu_->target64) {
-        r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
-      } else {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
-      }
-    }
-    LoadValueDirectWideFixed(arg0, r_tmp);
-    if (arg1.wide == 0) {
-      if (cu_->target64) {
-        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
-      } else {
-        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+        if (cu_->instruction_set == kMips) {
+          RegStorage r_tmp;
+          if (arg1.fp) {
+            r_tmp = TargetReg(kFArg2, kFArg3);
+          } else {
+            // skip kArg1 for stack alignment.
+            r_tmp = TargetReg(kArg2, kArg3);
+          }
+          LoadValueDirectWideFixed(arg1, r_tmp);
+        } else {
+          RegStorage r_tmp;
+          r_tmp = TargetReg(kArg1, kArg2);
+          LoadValueDirectWideFixed(arg1, r_tmp);
+        }
       }
     } else {
       RegStorage r_tmp;
-      if (arg1.fp) {
-        if (cu_->target64) {
-          r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
-        } else {
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
-        }
+      if (arg0.fp) {
+        r_tmp = TargetReg(kFArg0, kFArg1);
       } else {
-        if (cu_->target64) {
-          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
-        } else {
-          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
-        }
+        r_tmp = TargetReg(kArg0, kArg1);
       }
-      LoadValueDirectWideFixed(arg1, r_tmp);
+      LoadValueDirectWideFixed(arg0, r_tmp);
+      if (arg1.wide == 0) {
+        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2, false) : TargetReg(kArg2, false));
+      } else {
+        RegStorage r_tmp;
+        if (arg1.fp) {
+          r_tmp = TargetReg(kFArg2, kFArg3);
+        } else {
+          r_tmp = TargetReg(kArg2, kArg3);
+        }
+        LoadValueDirectWideFixed(arg1, r_tmp);
+      }
     }
   }
   ClobberCallerSave();
@@ -363,27 +362,20 @@
 INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocationRegLocation, RegLocation arg0,
             RegLocation arg1, bool safepoint_pc)
 
-// TODO: This is a hack! Reshape the two macros into functions and move them to a better place.
-#define IsSameReg(r1, r2) \
-  (GetRegInfo(r1)->Master()->GetReg().GetReg() == GetRegInfo(r2)->Master()->GetReg().GetReg())
-#define TargetArgReg(arg, is_wide) \
-  (GetRegInfo(TargetReg(arg))->FindMatchingView( \
-     (is_wide) ? RegisterInfo::k64SoloStorageMask : RegisterInfo::k32SoloStorageMask)->GetReg())
-
 void Mir2Lir::CopyToArgumentRegs(RegStorage arg0, RegStorage arg1) {
-  if (IsSameReg(arg1, TargetReg(kArg0))) {
-    if (IsSameReg(arg0, TargetReg(kArg1))) {
+  if (IsSameReg(arg1, TargetReg(kArg0, arg1.Is64Bit()))) {
+    if (IsSameReg(arg0, TargetReg(kArg1, arg0.Is64Bit()))) {
       // Swap kArg0 and kArg1 with kArg2 as temp.
-      OpRegCopy(TargetArgReg(kArg2, arg1.Is64Bit()), arg1);
-      OpRegCopy(TargetArgReg(kArg0, arg0.Is64Bit()), arg0);
-      OpRegCopy(TargetArgReg(kArg1, arg1.Is64Bit()), TargetReg(kArg2));
+      OpRegCopy(TargetReg(kArg2, arg1.Is64Bit()), arg1);
+      OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
+      OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), TargetReg(kArg2, arg1.Is64Bit()));
     } else {
-      OpRegCopy(TargetArgReg(kArg1, arg1.Is64Bit()), arg1);
-      OpRegCopy(TargetArgReg(kArg0, arg0.Is64Bit()), arg0);
+      OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), arg1);
+      OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
     }
   } else {
-    OpRegCopy(TargetArgReg(kArg0, arg0.Is64Bit()), arg0);
-    OpRegCopy(TargetArgReg(kArg1, arg1.Is64Bit()), arg1);
+    OpRegCopy(TargetReg(kArg0, arg0.Is64Bit()), arg0);
+    OpRegCopy(TargetReg(kArg1, arg1.Is64Bit()), arg1);
   }
 }
 
@@ -403,7 +395,7 @@
                                          RegStorage arg1, int arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   CopyToArgumentRegs(arg0, arg1);
-  LoadConstant(TargetReg(kArg2), arg2);
+  LoadConstant(TargetReg(kArg2, false), arg2);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -414,9 +406,9 @@
 void Mir2Lir::CallRuntimeHelperImmMethodRegLocation(ThreadOffset<pointer_size> helper_offset,
                                                     int arg0, RegLocation arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadValueDirectFixed(arg2, TargetReg(kArg2));
-  LoadCurrMethodDirect(TargetReg(kArg1));
-  LoadConstant(TargetReg(kArg0), arg0);
+  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
+  LoadCurrMethodDirect(TargetRefReg(kArg1));
+  LoadConstant(TargetReg(kArg0, false), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -427,9 +419,9 @@
 void Mir2Lir::CallRuntimeHelperImmMethodImm(ThreadOffset<pointer_size> helper_offset, int arg0,
                                             int arg2, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  LoadCurrMethodDirect(TargetReg(kArg1));
-  LoadConstant(TargetReg(kArg2), arg2);
-  LoadConstant(TargetReg(kArg0), arg0);
+  LoadCurrMethodDirect(TargetRefReg(kArg1));
+  LoadConstant(TargetReg(kArg2, false), arg2);
+  LoadConstant(TargetReg(kArg0, false), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -442,19 +434,19 @@
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);  // The static_cast works around an
                                                         // instantiation bug in GCC.
-  LoadValueDirectFixed(arg1, TargetReg(kArg1));
+  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
   if (arg2.wide == 0) {
-    LoadValueDirectFixed(arg2, TargetReg(kArg2));
+    LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   } else {
     RegStorage r_tmp;
     if (cu_->target64) {
-      r_tmp = RegStorage::Solo64(TargetReg(kArg2).GetReg());
+      r_tmp = TargetReg(kArg2, true);
     } else {
-      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+      r_tmp = TargetReg(kArg2, kArg3);
     }
     LoadValueDirectWideFixed(arg2, r_tmp);
   }
-  LoadConstant(TargetReg(kArg0), arg0);
+  LoadConstant(TargetReg(kArg0, false), arg0);
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -467,12 +459,9 @@
                                                                  RegLocation arg2,
                                                                  bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
-  DCHECK_EQ(static_cast<unsigned int>(arg0.wide), 0U);
-  LoadValueDirectFixed(arg0, TargetReg(kArg0));
-  DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);
-  LoadValueDirectFixed(arg1, TargetReg(kArg1));
-  DCHECK_EQ(static_cast<unsigned int>(arg1.wide), 0U);
-  LoadValueDirectFixed(arg2, TargetReg(kArg2));
+  LoadValueDirectFixed(arg0, TargetReg(kArg0, arg0));
+  LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1));
+  LoadValueDirectFixed(arg2, TargetReg(kArg2, arg2));
   ClobberCallerSave();
   CallHelper<pointer_size>(r_tgt, helper_offset, safepoint_pc);
 }
@@ -495,13 +484,13 @@
    */
   RegLocation rl_src = rl_method;
   rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0);
+  rl_src.reg = TargetRefReg(kArg0);
   rl_src.home = false;
   MarkLive(rl_src);
   StoreValue(rl_method, rl_src);
   // If Method* has been promoted, explicitly flush
   if (rl_method.location == kLocPhysReg) {
-    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0), kNotVolatile);
+    StoreRefDisp(TargetPtrReg(kSp), 0, rl_src.reg, kNotVolatile);
   }
 
   if (cu_->num_ins == 0) {
@@ -534,7 +523,7 @@
         OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
         need_flush = false;
       } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
         need_flush = false;
       } else {
         need_flush = true;
@@ -556,23 +545,23 @@
            * halves of the double are promoted.  Make sure they are in a usable form.
            */
           int lowreg_index = start_vreg + i + (t_loc->high_word ? -1 : 0);
-          int low_reg = promotion_map_[lowreg_index].FpReg;
-          int high_reg = promotion_map_[lowreg_index + 1].FpReg;
+          int low_reg = promotion_map_[lowreg_index].fp_reg;
+          int high_reg = promotion_map_[lowreg_index + 1].fp_reg;
           if (((low_reg & 0x1) != 0) || (high_reg != (low_reg + 1))) {
             need_flush = true;
           }
         }
       }
       if (need_flush) {
-        Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg);
+        Store32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg);
       }
     } else {
       // If arriving in frame & promoted
       if (v_map->core_location == kLocPhysReg) {
-        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
+        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
       }
       if (v_map->fp_location == kLocPhysReg) {
-        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg));
       }
     }
   }
@@ -593,13 +582,13 @@
     case 0:  // Get the current Method* [sets kArg0]
       if (direct_code != static_cast<uintptr_t>(-1)) {
         if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-          cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
+          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
         }
       } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
         cg->LoadCodeAddress(target_method, type, kInvokeTgt);
       }
       if (direct_method != static_cast<uintptr_t>(-1)) {
-        cg->LoadConstant(cg->TargetReg(kArg0), direct_method);
+        cg->LoadConstant(cg->TargetRefReg(kArg0), direct_method);
       } else {
         cg->LoadMethodAddress(target_method, type, kArg0);
       }
@@ -608,20 +597,21 @@
       return -1;
     }
   } else {
+    RegStorage arg0_ref = cg->TargetRefReg(kArg0);
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       // TUNING: we can save a reg copy if Method* has been promoted.
-      cg->LoadCurrMethodDirect(cg->TargetReg(kArg0));
+      cg->LoadCurrMethodDirect(arg0_ref);
       break;
     case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadRefDisp(cg->TargetReg(kArg0),
+      cg->LoadRefDisp(arg0_ref,
                       mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
-                      cg->TargetReg(kArg0),
+                      arg0_ref,
                       kNotVolatile);
       // Set up direct code if known.
       if (direct_code != 0) {
         if (direct_code != static_cast<uintptr_t>(-1)) {
-          cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code);
+          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
         } else if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
           CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
           cg->LoadCodeAddress(target_method, type, kInvokeTgt);
@@ -630,17 +620,17 @@
       break;
     case 2:  // Grab target method*
       CHECK_EQ(cu->dex_file, target_method.dex_file);
-      cg->LoadRefDisp(cg->TargetReg(kArg0),
+      cg->LoadRefDisp(arg0_ref,
                       ObjArray::OffsetOfElement(target_method.dex_method_index).Int32Value(),
-                      cg->TargetReg(kArg0),
+                      arg0_ref,
                       kNotVolatile);
       break;
     case 3:  // Grab the code from the method*
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
         if (direct_code == 0) {
-          cg->LoadWordDisp(cg->TargetReg(kArg0),
+          cg->LoadWordDisp(arg0_ref,
                            mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
-                           cg->TargetReg(kInvokeTgt));
+                           cg->TargetPtrReg(kInvokeTgt));
         }
         break;
       }
@@ -671,33 +661,33 @@
   switch (state) {
     case 0: {  // Get "this" [set kArg1]
       RegLocation  rl_arg = info->args[0];
-      cg->LoadValueDirectFixed(rl_arg, cg->TargetReg(kArg1));
+      cg->LoadValueDirectFixed(rl_arg, cg->TargetRefReg(kArg1));
       break;
     }
     case 1:  // Is "this" null? [use kArg1]
-      cg->GenNullCheck(cg->TargetReg(kArg1), info->opt_flags);
+      cg->GenNullCheck(cg->TargetRefReg(kArg1), info->opt_flags);
       // get this->klass_ [use kArg1, set kInvokeTgt]
-      cg->LoadRefDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
-                      cg->TargetReg(kInvokeTgt),
+      cg->LoadRefDisp(cg->TargetRefReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
+                      cg->TargetPtrReg(kInvokeTgt),
                       kNotVolatile);
       cg->MarkPossibleNullPointerException(info->opt_flags);
       break;
     case 2:  // Get this->klass_->vtable [usr kInvokeTgt, set kInvokeTgt]
-      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), mirror::Class::VTableOffset().Int32Value(),
-                      cg->TargetReg(kInvokeTgt),
+      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt), mirror::Class::VTableOffset().Int32Value(),
+                      cg->TargetPtrReg(kInvokeTgt),
                       kNotVolatile);
       break;
     case 3:  // Get target method [use kInvokeTgt, set kArg0]
-      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt),
+      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt),
                       ObjArray::OffsetOfElement(method_idx).Int32Value(),
-                      cg->TargetReg(kArg0),
+                      cg->TargetRefReg(kArg0),
                       kNotVolatile);
       break;
     case 4:  // Get the compiled code address [uses kArg0, sets kInvokeTgt]
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-        cg->LoadWordDisp(cg->TargetReg(kArg0),
+        cg->LoadWordDisp(cg->TargetRefReg(kArg0),
                          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
-                         cg->TargetReg(kInvokeTgt));
+                         cg->TargetPtrReg(kInvokeTgt));
         break;
       }
       // Intentional fallthrough for X86
@@ -722,42 +712,42 @@
   switch (state) {
     case 0:  // Set target method index in case of conflict [set kHiddenArg, kHiddenFpArg (x86)]
       CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
-      cg->LoadConstant(cg->TargetReg(kHiddenArg), target_method.dex_method_index);
+      cg->LoadConstant(cg->TargetReg(kHiddenArg, false), target_method.dex_method_index);
       if (cu->instruction_set == kX86) {
-        cg->OpRegCopy(cg->TargetReg(kHiddenFpArg), cg->TargetReg(kHiddenArg));
+        cg->OpRegCopy(cg->TargetReg(kHiddenFpArg, false), cg->TargetReg(kHiddenArg, false));
       }
       break;
     case 1: {  // Get "this" [set kArg1]
       RegLocation  rl_arg = info->args[0];
-      cg->LoadValueDirectFixed(rl_arg, cg->TargetReg(kArg1));
+      cg->LoadValueDirectFixed(rl_arg, cg->TargetRefReg(kArg1));
       break;
     }
     case 2:  // Is "this" null? [use kArg1]
-      cg->GenNullCheck(cg->TargetReg(kArg1), info->opt_flags);
+      cg->GenNullCheck(cg->TargetRefReg(kArg1), info->opt_flags);
       // Get this->klass_ [use kArg1, set kInvokeTgt]
-      cg->LoadRefDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
-                      cg->TargetReg(kInvokeTgt),
+      cg->LoadRefDisp(cg->TargetRefReg(kArg1), mirror::Object::ClassOffset().Int32Value(),
+                      cg->TargetPtrReg(kInvokeTgt),
                       kNotVolatile);
       cg->MarkPossibleNullPointerException(info->opt_flags);
       break;
     case 3:  // Get this->klass_->imtable [use kInvokeTgt, set kInvokeTgt]
       // NOTE: native pointer.
-      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt), mirror::Class::ImTableOffset().Int32Value(),
-                      cg->TargetReg(kInvokeTgt),
+      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt), mirror::Class::ImTableOffset().Int32Value(),
+                      cg->TargetPtrReg(kInvokeTgt),
                       kNotVolatile);
       break;
     case 4:  // Get target method [use kInvokeTgt, set kArg0]
       // NOTE: native pointer.
-      cg->LoadRefDisp(cg->TargetReg(kInvokeTgt),
+      cg->LoadRefDisp(cg->TargetPtrReg(kInvokeTgt),
                        ObjArray::OffsetOfElement(method_idx % ClassLinker::kImtSize).Int32Value(),
-                       cg->TargetReg(kArg0),
+                       cg->TargetRefReg(kArg0),
                        kNotVolatile);
       break;
     case 5:  // Get the compiled code address [use kArg0, set kInvokeTgt]
       if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
-        cg->LoadWordDisp(cg->TargetReg(kArg0),
+        cg->LoadWordDisp(cg->TargetRefReg(kArg0),
                          mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
-                         cg->TargetReg(kInvokeTgt));
+                         cg->TargetPtrReg(kInvokeTgt));
         break;
       }
       // Intentional fallthrough for X86
@@ -779,11 +769,11 @@
   if (state == 0) {
     if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
       // Load trampoline target
-      cg->LoadWordDisp(cg->TargetReg(kSelf), trampoline.Int32Value(), cg->TargetReg(kInvokeTgt));
+      cg->LoadWordDisp(cg->TargetPtrReg(kSelf), trampoline.Int32Value(), cg->TargetPtrReg(kInvokeTgt));
     }
     // Load kArg0 with method index
     CHECK_EQ(cu->dex_file, target_method.dex_file);
-    cg->LoadConstant(cg->TargetReg(kArg0), target_method.dex_method_index);
+    cg->LoadConstant(cg->TargetReg(kArg0, false), target_method.dex_method_index);
     return 1;
   }
   return -1;
@@ -862,7 +852,7 @@
                          uint32_t vtable_idx, uintptr_t direct_code,
                          uintptr_t direct_method, InvokeType type, bool skip_this) {
   int last_arg_reg = 3 - 1;
-  int arg_regs[3] = {TargetReg(kArg1).GetReg(), TargetReg(kArg2).GetReg(), TargetReg(kArg3).GetReg()};
+  int arg_regs[3] = {TargetReg(kArg1, false).GetReg(), TargetReg(kArg2, false).GetReg(), TargetReg(kArg3, false).GetReg()};
 
   int next_reg = 0;
   int next_arg = 0;
@@ -937,17 +927,17 @@
         }
       } else {
         // kArg2 & rArg3 can safely be used here
-        reg = TargetReg(kArg3);
+        reg = TargetReg(kArg3, false);
         {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          Load32Disp(TargetReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
+          Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
         }
         call_state = next_call_insn(cu_, info, call_state, target_method,
                                     vtable_idx, direct_code, direct_method, type);
       }
       {
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetReg(kSp), (next_use + 1) * 4, reg);
+        Store32Disp(TargetPtrReg(kSp), (next_use + 1) * 4, reg);
       }
       call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                                   direct_code, direct_method, type);
@@ -961,8 +951,7 @@
       if (rl_arg.location == kLocPhysReg) {
         arg_reg = rl_arg.reg;
       } else {
-        arg_reg = rl_arg.wide ? RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3)) :
-            TargetReg(kArg2);
+        arg_reg = rl_arg.wide ? TargetReg(kArg2, kArg3) : TargetReg(kArg2, false);
         if (rl_arg.wide) {
           LoadValueDirectWideFixed(rl_arg, arg_reg);
         } else {
@@ -975,10 +964,10 @@
       {
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
         if (rl_arg.wide) {
-          StoreBaseDisp(TargetReg(kSp), outs_offset, arg_reg, k64, kNotVolatile);
+          StoreBaseDisp(TargetPtrReg(kSp), outs_offset, arg_reg, k64, kNotVolatile);
           next_use += 2;
         } else {
-          Store32Disp(TargetReg(kSp), outs_offset, arg_reg);
+          Store32Disp(TargetPtrReg(kSp), outs_offset, arg_reg);
           next_use++;
         }
       }
@@ -993,13 +982,13 @@
 
   if (pcrLabel) {
     if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+      *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
       RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1), 0, tmp);
+      Load32Disp(TargetRefReg(kArg1), 0, tmp);
       MarkPossibleNullPointerException(info->opt_flags);
       FreeTemp(tmp);
     }
@@ -1045,14 +1034,14 @@
       loc = UpdateLocWide(loc);
       if ((next_arg >= 2) && (loc.location == kLocPhysReg)) {
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
+        StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
       }
       next_arg += 2;
     } else {
       loc = UpdateLoc(loc);
       if ((next_arg >= 3) && (loc.location == kLocPhysReg)) {
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+        Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
       }
       next_arg++;
     }
@@ -1073,23 +1062,23 @@
     // Use vldm/vstm pair using kArg3 as a temp
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
+    OpRegRegImm(kOpAdd, TargetRefReg(kArg3), TargetPtrReg(kSp), start_offset);
     LIR* ld = nullptr;
     {
       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+      ld = OpVldm(TargetRefReg(kArg3), regs_left_to_pass_via_stack);
     }
     // TUNING: loosen barrier
     ld->u.m.def_mask = &kEncodeAll;
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4));
+    OpRegRegImm(kOpAdd, TargetRefReg(kArg3), TargetPtrReg(kSp), 4 /* Method* */ + (3 * 4));
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                              direct_code, direct_method, type);
     LIR* st = nullptr;
     {
       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+      st = OpVstm(TargetRefReg(kArg3), regs_left_to_pass_via_stack);
     }
     st->u.m.def_mask = &kEncodeAll;
     call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
@@ -1138,23 +1127,23 @@
         bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
 
         if (src_is_16b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP);
         } else if (src_is_8b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
-          ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1),
+          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP);
+          ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1),
                             kMovHi128FP);
         } else {
-          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP);
         }
 
         if (dest_is_16b_aligned) {
-          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP);
         } else if (dest_is_8b_aligned) {
-          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
-          st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP);
+          st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1),
                             temp, kMovHi128FP);
         } else {
-          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP);
         }
 
         // TODO If we could keep track of aliasing information for memory accesses that are wider
@@ -1188,11 +1177,11 @@
 
         // Instead of allocating a new temp, simply reuse one of the registers being used
         // for argument passing.
-        RegStorage temp = TargetReg(kArg3);
+        RegStorage temp = TargetReg(kArg3, false);
 
         // Now load the argument VR and store to the outs.
-        Load32Disp(TargetReg(kSp), current_src_offset, temp);
-        Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+        Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
+        Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
       }
 
       current_src_offset += bytes_to_move;
@@ -1201,14 +1190,14 @@
     }
   } else {
     // Generate memcpy
-    OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
-    OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
+    OpRegRegImm(kOpAdd, TargetRefReg(kArg0), TargetPtrReg(kSp), outs_offset);
+    OpRegRegImm(kOpAdd, TargetRefReg(kArg1), TargetPtrReg(kSp), start_offset);
     if (cu_->target64) {
-      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(8, pMemcpy), TargetReg(kArg0),
-                                 TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
+      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(8, pMemcpy), TargetRefReg(kArg0),
+                                 TargetRefReg(kArg1), (info->num_arg_words - 3) * 4, false);
     } else {
-      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(4, pMemcpy), TargetReg(kArg0),
-                                 TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
+      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(4, pMemcpy), TargetRefReg(kArg0),
+                                 TargetRefReg(kArg1), (info->num_arg_words - 3) * 4, false);
     }
   }
 
@@ -1220,13 +1209,13 @@
                            direct_code, direct_method, type);
   if (pcrLabel) {
     if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+      *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
       RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1), 0, tmp);
+      Load32Disp(TargetRefReg(kArg1), 0, tmp);
       MarkPossibleNullPointerException(info->opt_flags);
       FreeTemp(tmp);
     }
@@ -1291,7 +1280,7 @@
     }
     Load32Disp(rl_obj.reg, offset_offset, reg_off);
     MarkPossibleNullPointerException(info->opt_flags);
-    Load32Disp(rl_obj.reg, value_offset, reg_ptr);
+    LoadRefDisp(rl_obj.reg, value_offset, reg_ptr, kNotVolatile);
     if (range_check) {
       // Set up a slow path to allow retry in case of bounds violation */
       OpRegReg(kOpCmp, rl_idx.reg, reg_max);
@@ -1378,16 +1367,16 @@
 }
 
 bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) {
-  if (cu_->instruction_set == kMips) {
-    // TODO - add Mips implementation
+  if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) {
+    // TODO - add Mips implementation; Enable Arm64.
     return false;
   }
   RegLocation rl_src_i = info->args[0];
+  RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
   RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);  // result reg
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (size == k64) {
-    RegLocation rl_i = LoadValueWide(rl_src_i, kCoreReg);
-    if (cu_->instruction_set == kArm64) {
+    if (cu_->instruction_set == kArm64 || cu_->instruction_set == kX86_64) {
       OpRegReg(kOpRev, rl_result.reg, rl_i.reg);
       StoreValueWide(rl_dest, rl_result);
       return true;
@@ -1407,7 +1396,6 @@
   } else {
     DCHECK(size == k32 || size == kSignedHalf);
     OpKind op = (size == k32) ? kOpRev : kOpRevsh;
-    RegLocation rl_i = LoadValue(rl_src_i, kCoreReg);
     OpRegReg(op, rl_result.reg, rl_i.reg);
     StoreValue(rl_dest, rl_result);
   }
@@ -1443,7 +1431,9 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
 
   // If on x86 or if we would clobber a register needed later, just copy the source first.
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64 || rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
+  if (cu_->instruction_set != kX86_64 &&
+      (cu_->instruction_set == kX86 ||
+       rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg())) {
     OpRegCopyWide(rl_result.reg, rl_src.reg);
     if (rl_result.reg.GetLowReg() != rl_src.reg.GetLowReg() &&
         rl_result.reg.GetLowReg() != rl_src.reg.GetHighReg() &&
@@ -1456,12 +1446,20 @@
   }
 
   // abs(x) = y<=x>>31, (x+y)^y.
-  RegStorage sign_reg = AllocTemp();
-  OpRegRegImm(kOpAsr, sign_reg, rl_src.reg.GetHigh(), 31);
-  OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), sign_reg);
-  OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), sign_reg);
-  OpRegReg(kOpXor, rl_result.reg.GetLow(), sign_reg);
-  OpRegReg(kOpXor, rl_result.reg.GetHigh(), sign_reg);
+  RegStorage sign_reg;
+  if (cu_->instruction_set == kX86_64) {
+    sign_reg = AllocTempWide();
+    OpRegRegImm(kOpAsr, sign_reg, rl_src.reg, 63);
+    OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, sign_reg);
+    OpRegReg(kOpXor, rl_result.reg, sign_reg);
+  } else {
+    sign_reg = AllocTemp();
+    OpRegRegImm(kOpAsr, sign_reg, rl_src.reg.GetHigh(), 31);
+    OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), sign_reg);
+    OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), sign_reg);
+    OpRegReg(kOpXor, rl_result.reg.GetLow(), sign_reg);
+    OpRegReg(kOpXor, rl_result.reg.GetHigh(), sign_reg);
+  }
   FreeTemp(sign_reg);
   StoreValueWide(rl_dest, rl_result);
   return true;
@@ -1481,6 +1479,16 @@
   return true;
 }
 
+bool Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
+  // Currently implemented only for ARM64
+  return false;
+}
+
+bool Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) {
+  // Currently implemented only for ARM64
+  return false;
+}
+
 bool Mir2Lir::GenInlinedAbsDouble(CallInfo* info) {
   if (cu_->instruction_set == kMips) {
     // TODO - add Mips implementation
@@ -1491,14 +1499,8 @@
   RegLocation rl_dest = InlineTargetWide(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
 
-  if (cu_->instruction_set == kArm64) {
-    // TODO - Can ecode ? UBXF otherwise
-    // OpRegRegImm(kOpAnd, rl_result.reg, 0x7fffffffffffffff);
-    return false;
-  } else {
-    OpRegCopyWide(rl_result.reg, rl_src.reg);
-    OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
-  }
+  OpRegCopyWide(rl_result.reg, rl_src.reg);
+  OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff);
   StoreValueWide(rl_dest, rl_result);
   return true;
 }
@@ -1534,6 +1536,10 @@
     // TODO - add Mips implementation
     return false;
   }
+  if (cu_->instruction_set == kX86_64) {
+    // TODO - add kX86_64 implementation
+    return false;
+  }
   RegLocation rl_obj = info->args[0];
   RegLocation rl_char = info->args[1];
   if (rl_char.is_const && (mir_graph_->ConstantValue(rl_char) & ~0xFFFF) != 0) {
@@ -1543,9 +1549,9 @@
 
   ClobberCallerSave();
   LockCallTemps();  // Using fixed registers
-  RegStorage reg_ptr = TargetReg(kArg0);
-  RegStorage reg_char = TargetReg(kArg1);
-  RegStorage reg_start = TargetReg(kArg2);
+  RegStorage reg_ptr = TargetRefReg(kArg0);
+  RegStorage reg_char = TargetReg(kArg1, false);
+  RegStorage reg_start = TargetReg(kArg2, false);
 
   LoadValueDirectFixed(rl_obj, reg_ptr);
   LoadValueDirectFixed(rl_char, reg_char);
@@ -1587,8 +1593,8 @@
   }
   ClobberCallerSave();
   LockCallTemps();  // Using fixed registers
-  RegStorage reg_this = TargetReg(kArg0);
-  RegStorage reg_cmp = TargetReg(kArg1);
+  RegStorage reg_this = TargetRefReg(kArg0);
+  RegStorage reg_cmp = TargetRefReg(kArg1);
 
   RegLocation rl_this = info->args[0];
   RegLocation rl_cmp = info->args[1];
@@ -1627,7 +1633,7 @@
 
 bool Mir2Lir::GenInlinedCurrentThread(CallInfo* info) {
   RegLocation rl_dest = InlineTarget(info);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
 
   switch (cu_->instruction_set) {
     case kArm:
@@ -1635,11 +1641,12 @@
     case kThumb2:
       // Fall-through.
     case kMips:
-      Load32Disp(TargetReg(kSelf), Thread::PeerOffset<4>().Int32Value(), rl_result.reg);
+      Load32Disp(TargetPtrReg(kSelf), Thread::PeerOffset<4>().Int32Value(), rl_result.reg);
       break;
 
     case kArm64:
-      Load32Disp(TargetReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg);
+      LoadRefDisp(TargetPtrReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg,
+                  kNotVolatile);
       break;
 
     case kX86:
@@ -1673,10 +1680,11 @@
 
   RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, LocToRegClass(rl_dest), true);
   if (is_long) {
-    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-      LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg, k64);
+    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64
+        || cu_->instruction_set == kArm64) {
+      LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k64);
     } else {
       RegStorage rl_temp_offset = AllocTemp();
       OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
@@ -1684,7 +1692,11 @@
       FreeTemp(rl_temp_offset);
     }
   } else {
-    LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k32);
+    if (rl_result.ref) {
+      LoadRefIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0);
+    } else {
+      LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k32);
+    }
   }
 
   if (is_volatile) {
@@ -1722,8 +1734,9 @@
   RegLocation rl_value;
   if (is_long) {
     rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-      StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg, k64);
+    if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64
+        || cu_->instruction_set == kArm64) {
+      StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k64);
     } else {
       RegStorage rl_temp_offset = AllocTemp();
       OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg);
@@ -1732,7 +1745,11 @@
     }
   } else {
     rl_value = LoadValue(rl_src_value);
-    StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32);
+    if (rl_value.ref) {
+      StoreRefIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0);
+    } else {
+      StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32);
+    }
   }
 
   // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
@@ -1760,14 +1777,9 @@
     return;
   }
   DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  // TODO: Enable instrinsics for x86_64
-  // Temporary disable intrinsics for x86_64. We will enable them later step by step.
-  // Temporary disable intrinsics for Arm64. We will enable them later step by step.
-  if ((cu_->instruction_set != kX86_64) && (cu_->instruction_set != kArm64)) {
-    if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
-        ->GenIntrinsic(this, info)) {
-      return;
-    }
+  if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
+      ->GenIntrinsic(this, info)) {
+    return;
   }
   GenInvokeNoInline(info);
 }
@@ -1853,7 +1865,7 @@
   }
   LIR* call_inst;
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
-    call_inst = OpReg(kOpBlx, TargetReg(kInvokeTgt));
+    call_inst = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
   } else {
     if (fast_path) {
       if (method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
@@ -1861,7 +1873,7 @@
         call_inst =
           reinterpret_cast<X86Mir2Lir*>(this)->CallWithLinkerFixup(target_method, info->type);
       } else {
-        call_inst = OpMem(kOpBlx, TargetReg(kArg0),
+        call_inst = OpMem(kOpBlx, TargetRefReg(kArg0),
                           mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
       }
     } else {
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index bfb77fc..e5798fd 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -66,7 +66,7 @@
       } else {
         // Lives in the frame, need to store.
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32, kNotVolatile);
+        StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), temp_reg, k32, kNotVolatile);
       }
       if (!zero_reg.Valid()) {
         FreeTemp(temp_reg);
@@ -93,9 +93,9 @@
            (rl_src.location == kLocCompilerTemp));
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     if (rl_src.ref) {
-      LoadRefDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, kNotVolatile);
+      LoadRefDisp(TargetPtrReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, kNotVolatile);
     } else {
-      Load32Disp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest);
+      Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest);
     }
   }
 }
@@ -126,7 +126,7 @@
     DCHECK((rl_src.location == kLocDalvikFrame) ||
            (rl_src.location == kLocCompilerTemp));
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    LoadBaseDisp(TargetReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64, kNotVolatile);
+    LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_src.s_reg_low), r_dest, k64, kNotVolatile);
   }
 }
 
@@ -192,7 +192,7 @@
       IsPromoted(rl_src.reg) ||
       (rl_dest.location == kLocPhysReg)) {
       // Src is live/promoted or Dest has assigned reg.
-      rl_dest = EvalLoc(rl_dest, kAnyReg, false);
+      rl_dest = EvalLoc(rl_dest, rl_dest.ref || rl_src.ref ? kRefReg : kAnyReg, false);
       OpRegCopy(rl_dest.reg, rl_src.reg);
     } else {
       // Just re-assign the registers.  Dest gets Src's regs
@@ -201,7 +201,7 @@
     }
   } else {
     // Load Src either into promoted Dest or temps allocated for Dest
-    rl_dest = EvalLoc(rl_dest, kAnyReg, false);
+    rl_dest = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kAnyReg, false);
     LoadValueDirect(rl_src, rl_dest.reg);
   }
 
@@ -215,9 +215,9 @@
     def_start = last_lir_insn_;
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     if (rl_dest.ref) {
-      StoreRefDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, kNotVolatile);
+      StoreRefDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, kNotVolatile);
     } else {
-      Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
+      Store32Disp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
     }
     MarkClean(rl_dest);
     def_end = last_lir_insn_;
@@ -305,7 +305,7 @@
     DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
               mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64, kNotVolatile);
+    StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64, kNotVolatile);
     MarkClean(rl_dest);
     def_end = last_lir_insn_;
     MarkDefWide(rl_dest, def_start, def_end);
@@ -333,7 +333,7 @@
   if (IsDirty(rl_dest.reg) && LiveOut(rl_dest.s_reg_low)) {
     LIR *def_start = last_lir_insn_;
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    Store32Disp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
+    Store32Disp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg);
     MarkClean(rl_dest);
     LIR *def_end = last_lir_insn_;
     if (!rl_dest.ref) {
@@ -369,7 +369,7 @@
     DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
               mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64, kNotVolatile);
+    StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_dest.s_reg_low), rl_dest.reg, k64, kNotVolatile);
     MarkClean(rl_dest);
     LIR *def_end = last_lir_insn_;
     MarkDefWide(rl_dest, def_start, def_end);
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index c0ad916..025f97a 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -50,6 +50,7 @@
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
+    RegStorage Solo64ToPair64(RegStorage reg);
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
@@ -64,8 +65,6 @@
     void ClobberCallerSave();
     void FreeCallTemps();
     void LockCallTemps();
-    void MarkPreservedSingle(int v_reg, RegStorage reg);
-    void MarkPreservedDouble(int v_reg, RegStorage reg);
     void CompilerInitializeRegAlloc();
 
     // Required for target - miscellaneous.
@@ -112,7 +111,7 @@
                   RegLocation rl_src2);
     void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
     bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-    bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+    bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
@@ -181,10 +180,10 @@
 
     // TODO: collapse r_dest.
     LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                          RegStorage r_dest_hi, OpSize size);
+                          OpSize size);
     // TODO: collapse r_src.
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
-                           RegStorage r_src_hi, OpSize size);
+                           OpSize size);
     void SpillCoreRegs();
     void UnSpillCoreRegs();
     static const MipsEncodingMap EncodingMap[kMipsLast];
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 4e31477..7087be9 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -230,7 +230,7 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-bool MipsMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+bool MipsMir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
   // TODO: need Mips implementation
   return false;
 }
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 903a770..c3a4c17 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -18,6 +18,7 @@
 
 #include "codegen_mips.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mips_lir.h"
 #include "mirror/array.h"
@@ -485,9 +486,11 @@
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
   RegLocation rl_result;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   rl_index = LoadValue(rl_index, kCoreReg);
 
+  // FIXME: need to add support for rl_index.is_const.
+
   if (size == k64 || size == kDouble) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
@@ -558,8 +561,11 @@
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   rl_index = LoadValue(rl_index, kCoreReg);
+
+  // FIXME: need to add support for rl_index.is_const.
+
   RegStorage reg_ptr;
   bool allocated_reg_ptr_temp = false;
   if (IsTemp(rl_array.reg) && !card_mark) {
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
index 5b2cb9d..495eb16 100644
--- a/compiler/dex/quick/mips/mips_lir.h
+++ b/compiler/dex/quick/mips/mips_lir.h
@@ -138,6 +138,10 @@
 #define ENCODE_MIPS_REG_HI           (1ULL << kMipsRegHI)
 #define ENCODE_MIPS_REG_LO           (1ULL << kMipsRegLO)
 
+// Set FR_BIT to 0
+// This bit determines how the CPU access FP registers.
+#define FR_BIT   0
+
 enum MipsNativeRegisterPool {
   rZERO = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  0,
   rAT   = RegStorage::k32BitSolo | RegStorage::kCoreRegister |  1,
@@ -210,6 +214,26 @@
   rF30 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
   rF31 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
 #endif
+#if (FR_BIT == 0)
+  rD0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
+  rD1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
+  rD2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
+  rD3  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
+  rD4  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
+  rD5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+  rD6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+  rD7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
+#if 0  // TODO: expand resource mask to enable use of all MIPS fp registers.
+  rD8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
+  rD9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
+  rD10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
+  rD11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
+  rD12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
+  rD13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
+  rD14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
+  rD15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
+#endif
+#else
   rD0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
   rD1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
   rD2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
@@ -228,6 +252,7 @@
   rD14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
   rD15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
 #endif
+#endif
 };
 
 constexpr RegStorage rs_rZERO(RegStorage::kValid | rZERO);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 76b5243..a5b7824 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -75,6 +75,13 @@
   return mips_loc_c_return_double;
 }
 
+// Convert k64BitSolo into k64BitPair
+RegStorage MipsMir2Lir::Solo64ToPair64(RegStorage reg) {
+    DCHECK(reg.IsDouble());
+    int reg_num = (reg.GetRegNum() & ~1) | RegStorage::kFloatingPoint;
+    return RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
+}
+
 // Return a target-dependent special register.
 RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) {
   RegStorage res_reg;
@@ -123,7 +130,11 @@
 ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
   return reg.IsDouble()
       /* Each double register is equal to a pair of single-precision FP registers */
+#if (FR_BIT == 0)
+      ? ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0)
+#else
       ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0)
+#endif
       : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kMipsFPReg0 : reg.GetRegNum());
 }
 
@@ -329,20 +340,6 @@
   num_core_spills_++;
 }
 
-/*
- * Mark a callee-save fp register as promoted.  Note that
- * vpush/vpop uses contiguous register lists so we must
- * include any holes in the mask.  Associate holes with
- * Dalvik register INVALID_VREG (0xFFFFU).
- */
-void MipsMir2Lir::MarkPreservedSingle(int s_reg, RegStorage reg) {
-  LOG(FATAL) << "No support yet for promoted FP regs";
-}
-
-void MipsMir2Lir::MarkPreservedDouble(int s_reg, RegStorage reg) {
-  LOG(FATAL) << "No support yet for promoted FP regs";
-}
-
 /* Clobber all regs that might be used by an external C call */
 void MipsMir2Lir::ClobberCallerSave() {
   Clobber(rs_rZERO);
@@ -443,7 +440,11 @@
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
     int sp_reg_num = info->GetReg().GetRegNum();
+#if (FR_BIT == 0)
+    int dp_reg_num = sp_reg_num & ~1;
+#else
     int dp_reg_num = sp_reg_num >> 1;
+#endif
     RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
     // Double precision register's master storage should refer to itself.
@@ -462,7 +463,11 @@
   // TODO: adjust when we roll to hard float calling convention.
   reg_pool_->next_core_reg_ = 2;
   reg_pool_->next_sp_reg_ = 2;
+#if (FR_BIT == 0)
+  reg_pool_->next_dp_reg_ = 2;
+#else
   reg_pool_->next_dp_reg_ = 1;
+#endif
 }
 
 /*
@@ -531,8 +536,13 @@
 }
 
 RegisterClass MipsMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
-  // No support for 64-bit atomic load/store on mips.
-  DCHECK(size != k64 && size != kDouble);
+  if (UNLIKELY(is_volatile)) {
+    // On Mips, atomic 64-bit load/store requires an fp register.
+    // Smaller aligned load/store is atomic for both core and fp registers.
+    if (size == k64 || size == kDouble) {
+      return kFPReg;
+    }
+  }
   // TODO: Verify that both core and fp registers are suitable for smaller sizes.
   return RegClassBySize(size);
 }
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index b49f436..129a696 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -16,6 +16,7 @@
 
 #include "codegen_mips.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "mips_lir.h"
 
 namespace art {
@@ -342,6 +343,10 @@
 
 LIR* MipsMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
   LIR *res;
+  if (!r_dest.IsPair()) {
+    // Form 64-bit pair
+    r_dest = Solo64ToPair64(r_dest);
+  }
   res = LoadConstantNoClobber(r_dest.GetLow(), Low32Bits(value));
   LoadConstantNoClobber(r_dest.GetHigh(), High32Bits(value));
   return res;
@@ -448,7 +453,7 @@
 
 // FIXME: don't split r_dest into 2 containers.
 LIR* MipsMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
-                                   RegStorage r_dest_hi, OpSize size) {
+                                   OpSize size) {
 /*
  * Load value from base + displacement.  Optionally perform null check
  * on base (which must have an associated s_reg and MIR).  If not
@@ -462,23 +467,21 @@
   LIR *load2 = NULL;
   MipsOpCode opcode = kMipsNop;
   bool short_form = IS_SIMM16(displacement);
-  bool pair = false;
+  bool pair = r_dest.IsPair();
 
   switch (size) {
     case k64:
     case kDouble:
-      pair = true;
-      opcode = kMipsLw;
+      if (!pair) {
+        // Form 64-bit pair
+        r_dest = Solo64ToPair64(r_dest);
+        pair = 1;
+      }
       if (r_dest.IsFloat()) {
+        DCHECK_EQ(r_dest.GetLowReg(), r_dest.GetHighReg() - 1);
         opcode = kMipsFlwc1;
-        if (r_dest.IsDouble()) {
-          int reg_num = (r_dest.GetRegNum() << 1) | RegStorage::kFloatingPoint;
-          r_dest = RegStorage(RegStorage::k64BitSolo, reg_num, reg_num + 1);
-        } else {
-          DCHECK(r_dest_hi.IsFloat());
-          DCHECK_EQ(r_dest.GetReg(), r_dest_hi.GetReg() - 1);
-          r_dest_hi.SetReg(r_dest.GetReg() + 1);
-        }
+      } else {
+        opcode = kMipsLw;
       }
       short_form = IS_SIMM16_2WORD(displacement);
       DCHECK_EQ((displacement & 0x3), 0);
@@ -515,15 +518,15 @@
     if (!pair) {
       load = res = NewLIR3(opcode, r_dest.GetReg(), displacement, r_base.GetReg());
     } else {
-      load = res = NewLIR3(opcode, r_dest.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
-      load2 = NewLIR3(opcode, r_dest_hi.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      load = res = NewLIR3(opcode, r_dest.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+      load2 = NewLIR3(opcode, r_dest.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
     }
   } else {
     if (pair) {
       RegStorage r_tmp = AllocTemp();
       res = OpRegRegImm(kOpAdd, r_tmp, r_base, displacement);
-      load = NewLIR3(opcode, r_dest.GetReg(), LOWORD_OFFSET, r_tmp.GetReg());
-      load2 = NewLIR3(opcode, r_dest_hi.GetReg(), HIWORD_OFFSET, r_tmp.GetReg());
+      load = NewLIR3(opcode, r_dest.GetLowReg(), LOWORD_OFFSET, r_tmp.GetReg());
+      load2 = NewLIR3(opcode, r_dest.GetHighReg(), HIWORD_OFFSET, r_tmp.GetReg());
       FreeTemp(r_tmp);
     } else {
       RegStorage r_tmp = (r_base == r_dest) ? AllocTemp() : r_dest;
@@ -557,11 +560,7 @@
     size = k32;
   }
   LIR* load;
-  if (size == k64 || size == kDouble) {
-    load = LoadBaseDispBody(r_base, displacement, r_dest.GetLow(), r_dest.GetHigh(), size);
-  } else {
-    load = LoadBaseDispBody(r_base, displacement, r_dest, RegStorage::InvalidReg(), size);
-  }
+  load = LoadBaseDispBody(r_base, displacement, r_dest, size);
 
   if (UNLIKELY(is_volatile == kVolatile)) {
     // Without context sensitive analysis, we must issue the most conservative barriers.
@@ -575,7 +574,7 @@
 
 // FIXME: don't split r_dest into 2 containers.
 LIR* MipsMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement,
-                                    RegStorage r_src, RegStorage r_src_hi, OpSize size) {
+                                    RegStorage r_src, OpSize size) {
   LIR *res;
   LIR *store = NULL;
   LIR *store2 = NULL;
@@ -586,17 +585,16 @@
   switch (size) {
     case k64:
     case kDouble:
-      opcode = kMipsSw;
+      if (!pair) {
+        // Form 64-bit pair
+        r_src = Solo64ToPair64(r_src);
+        pair = 1;
+      }
       if (r_src.IsFloat()) {
+        DCHECK_EQ(r_src.GetLowReg(), r_src.GetHighReg() - 1);
         opcode = kMipsFswc1;
-        if (r_src.IsDouble()) {
-          int reg_num = (r_src.GetRegNum() << 1) | RegStorage::kFloatingPoint;
-          r_src = RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
-        } else {
-          DCHECK(r_src_hi.IsFloat());
-          DCHECK_EQ(r_src.GetReg(), (r_src_hi.GetReg() - 1));
-          r_src_hi.SetReg(r_src.GetReg() + 1);
-        }
+      } else {
+        opcode = kMipsSw;
       }
       short_form = IS_SIMM16_2WORD(displacement);
       DCHECK_EQ((displacement & 0x3), 0);
@@ -628,8 +626,8 @@
     if (!pair) {
       store = res = NewLIR3(opcode, r_src.GetReg(), displacement, r_base.GetReg());
     } else {
-      store = res = NewLIR3(opcode, r_src.GetReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
-      store2 = NewLIR3(opcode, r_src_hi.GetReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
+      store = res = NewLIR3(opcode, r_src.GetLowReg(), displacement + LOWORD_OFFSET, r_base.GetReg());
+      store2 = NewLIR3(opcode, r_src.GetHighReg(), displacement + HIWORD_OFFSET, r_base.GetReg());
     }
   } else {
     RegStorage r_scratch = AllocTemp();
@@ -637,8 +635,8 @@
     if (!pair) {
       store =  NewLIR3(opcode, r_src.GetReg(), 0, r_scratch.GetReg());
     } else {
-      store =  NewLIR3(opcode, r_src.GetReg(), LOWORD_OFFSET, r_scratch.GetReg());
-      store2 = NewLIR3(opcode, r_src_hi.GetReg(), HIWORD_OFFSET, r_scratch.GetReg());
+      store =  NewLIR3(opcode, r_src.GetLowReg(), LOWORD_OFFSET, r_scratch.GetReg());
+      store2 = NewLIR3(opcode, r_src.GetHighReg(), HIWORD_OFFSET, r_scratch.GetReg());
     }
     FreeTemp(r_scratch);
   }
@@ -669,11 +667,7 @@
     size = k32;
   }
   LIR* store;
-  if (size == k64 || size == kDouble) {
-    store = StoreBaseDispBody(r_base, displacement, r_src.GetLow(), r_src.GetHigh(), size);
-  } else {
-    store = StoreBaseDispBody(r_base, displacement, r_src, RegStorage::InvalidReg(), size);
-  }
+  store = StoreBaseDispBody(r_base, displacement, r_src, size);
 
   if (UNLIKELY(is_volatile == kVolatile)) {
     // A load might follow the volatile store so insert a StoreLoad barrier.
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 9912101..9ce5bb7 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -31,7 +31,7 @@
     p->MarkDead();
     if (p->IsWide()) {
       p->SetIsWide(false);
-      if (p->GetReg() != p->Partner()) {
+      if (p->GetReg().NotExactlyEquals(p->Partner())) {
         // Register pair - deal with the other half.
         p = GetRegInfo(p->Partner());
         p->SetIsWide(false);
@@ -253,6 +253,19 @@
   return res;
 }
 
+inline void Mir2Lir::CheckRegLocation(RegLocation rl) const {
+  if (kFailOnSizeError || kReportSizeError) {
+    CheckRegLocationImpl(rl, kFailOnSizeError, kReportSizeError);
+  }
+}
+
+inline void Mir2Lir::CheckRegStorage(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp)
+    const {
+  if (kFailOnSizeError || kReportSizeError) {
+    CheckRegStorageImpl(rs, wide, ref, fp, kFailOnSizeError, kReportSizeError);
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 5d68187..caadc0a 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -61,7 +61,7 @@
   if (reg_arg_low.Valid()) {
     LockTemp(reg_arg_low);
   }
-  if (reg_arg_high.Valid() && reg_arg_low != reg_arg_high) {
+  if (reg_arg_high.Valid() && reg_arg_low.NotExactlyEquals(reg_arg_high)) {
     LockTemp(reg_arg_high);
   }
 }
@@ -92,7 +92,7 @@
     if (!reg_arg.Valid()) {
       RegStorage new_reg =
           wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
-      LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32, kNotVolatile);
+      LoadBaseDisp(TargetPtrReg(kSp), offset, new_reg, wide ? k64 : k32, kNotVolatile);
       return new_reg;
     } else {
       // Check if we need to copy the arg to a different reg_class.
@@ -120,7 +120,7 @@
     // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
     if (!reg_arg_low.Valid()) {
       RegStorage new_regs = AllocTypedTempWide(false, reg_class);
-      LoadBaseDisp(TargetReg(kSp), offset, new_regs, k64, kNotVolatile);
+      LoadBaseDisp(TargetPtrReg(kSp), offset, new_regs, k64, kNotVolatile);
       return new_regs;  // The reg_class is OK, we can return.
     } else {
       // Assume that no ABI allows splitting a wide fp reg between a narrow fp reg and memory,
@@ -128,7 +128,7 @@
       DCHECK(!reg_arg_low.IsFloat());
       reg_arg_high = AllocTemp();
       int offset_high = offset + sizeof(uint32_t);
-      Load32Disp(TargetReg(kSp), offset_high, reg_arg_high);
+      Load32Disp(TargetPtrReg(kSp), offset_high, reg_arg_high);
       // Continue below to check the reg_class.
     }
   }
@@ -140,7 +140,7 @@
     // conceivably break this assumption but Android supports only little-endian architectures.
     DCHECK(!wide);
     reg_arg_low = AllocTypedTemp(false, reg_class);
-    Load32Disp(TargetReg(kSp), offset, reg_arg_low);
+    Load32Disp(TargetPtrReg(kSp), offset, reg_arg_low);
     return reg_arg_low;  // The reg_class is OK, we can return.
   }
 
@@ -185,7 +185,7 @@
     if (reg.Valid()) {
       OpRegCopy(rl_dest.reg, reg);
     } else {
-      Load32Disp(TargetReg(kSp), offset, rl_dest.reg);
+      Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg);
     }
   } else {
     if (cu_->target64) {
@@ -193,7 +193,7 @@
       if (reg.Valid()) {
         OpRegCopy(rl_dest.reg, reg);
       } else {
-        LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
+        LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
       }
       return;
     }
@@ -206,12 +206,12 @@
     } else if (reg_arg_low.Valid() && !reg_arg_high.Valid()) {
       OpRegCopy(rl_dest.reg, reg_arg_low);
       int offset_high = offset + sizeof(uint32_t);
-      Load32Disp(TargetReg(kSp), offset_high, rl_dest.reg.GetHigh());
+      Load32Disp(TargetPtrReg(kSp), offset_high, rl_dest.reg.GetHigh());
     } else if (!reg_arg_low.Valid() && reg_arg_high.Valid()) {
       OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high);
-      Load32Disp(TargetReg(kSp), offset, rl_dest.reg.GetLow());
+      Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg.GetLow());
     } else {
-      LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
+      LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
     }
   }
 }
@@ -249,7 +249,7 @@
     LoadBaseDisp(reg_obj, data.field_offset, r_result, size, data.is_volatile ? kVolatile :
         kNotVolatile);
   }
-  if (r_result != rl_dest.reg) {
+  if (r_result.NotExactlyEquals(rl_dest.reg)) {
     if (wide) {
       OpRegCopyWide(rl_dest.reg, r_result);
     } else {
@@ -1267,4 +1267,55 @@
   return target;
 }
 
+
+void Mir2Lir::CheckRegStorageImpl(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp,
+                                  bool fail, bool report)
+    const  {
+  if (rs.Valid()) {
+    if (ref == RefCheck::kCheckRef) {
+      if (cu_->target64 && !rs.Is64Bit()) {
+        if (fail) {
+          CHECK(false) << "Reg storage not 64b for ref.";
+        } else if (report) {
+          LOG(WARNING) << "Reg storage not 64b for ref.";
+        }
+      }
+    }
+    if (wide == WidenessCheck::kCheckWide) {
+      if (!rs.Is64Bit()) {
+        if (fail) {
+          CHECK(false) << "Reg storage not 64b for wide.";
+        } else if (report) {
+          LOG(WARNING) << "Reg storage not 64b for wide.";
+        }
+      }
+    }
+    // A tighter check would be nice, but for now soft-float will not check float at all.
+    if (fp == FPCheck::kCheckFP && cu_->instruction_set != kArm) {
+      if (!rs.IsFloat()) {
+        if (fail) {
+          CHECK(false) << "Reg storage not float for fp.";
+        } else if (report) {
+          LOG(WARNING) << "Reg storage not float for fp.";
+        }
+      }
+    } else if (fp == FPCheck::kCheckNotFP) {
+      if (rs.IsFloat()) {
+        if (fail) {
+          CHECK(false) << "Reg storage float for not-fp.";
+        } else if (report) {
+          LOG(WARNING) << "Reg storage float for not-fp.";
+        }
+      }
+    }
+  }
+}
+
+void Mir2Lir::CheckRegLocationImpl(RegLocation rl, bool fail, bool report) const {
+  // Regrettably can't use the fp part of rl, as that is not really indicative of where a value
+  // will be stored.
+  CheckRegStorageImpl(rl.reg, rl.wide ? WidenessCheck::kCheckWide : WidenessCheck::kCheckNotWide,
+      rl.ref ? RefCheck::kCheckRef : RefCheck::kCheckNotRef, FPCheck::kIgnoreFP, fail, report);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index b07c85e..cfcc5c8 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -21,6 +21,7 @@
 #include "compiled_method.h"
 #include "dex/compiler_enums.h"
 #include "dex/compiler_ir.h"
+#include "dex/reg_location.h"
 #include "dex/reg_storage.h"
 #include "dex/backend.h"
 #include "dex/quick/resource_mask.h"
@@ -124,7 +125,6 @@
 struct InlineMethod;
 struct MIR;
 struct LIR;
-struct RegLocation;
 struct RegisterInfo;
 class DexFileMethodInliner;
 class MIRGraph;
@@ -177,8 +177,6 @@
                           ArenaAllocator* const arena);
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
-Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                          ArenaAllocator* const arena);
 
 // Utility macros to traverse the LIR list.
 #define NEXT_LIR(lir) (lir->next)
@@ -197,8 +195,8 @@
     high_reg = (both_regs >> 8) & 0xff; \
   } while (false)
 
-// Mask to denote sreg as the start of a double.  Must not interfere with low 16 bits.
-#define STARTING_DOUBLE_SREG 0x10000
+// Mask to denote sreg as the start of a 64-bit item.  Must not interfere with low 16 bits.
+#define STARTING_WIDE_SREG 0x10000
 
 // TODO: replace these macros
 #define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath))
@@ -239,6 +237,9 @@
 
 class Mir2Lir : public Backend {
   public:
+    static constexpr bool kFailOnSizeError = true && kIsDebugBuild;
+    static constexpr bool kReportSizeError = true && kIsDebugBuild;
+
     /*
      * Auxiliary information describing the location of data embedded in the Dalvik
      * byte code stream.
@@ -486,7 +487,7 @@
       RegLocationType core_location:3;
       uint8_t core_reg;
       RegLocationType fp_location:3;
-      uint8_t FpReg;
+      uint8_t fp_reg;
       bool first_in_pair;
     };
 
@@ -739,9 +740,9 @@
     int SRegToPMap(int s_reg);
     void RecordCorePromotion(RegStorage reg, int s_reg);
     RegStorage AllocPreservedCoreReg(int s_reg);
-    void RecordSinglePromotion(RegStorage reg, int s_reg);
-    void RecordDoublePromotion(RegStorage reg, int s_reg);
-    RegStorage AllocPreservedSingle(int s_reg);
+    void RecordFpPromotion(RegStorage reg, int s_reg);
+    RegStorage AllocPreservedFpReg(int s_reg);
+    virtual RegStorage AllocPreservedSingle(int s_reg);
     virtual RegStorage AllocPreservedDouble(int s_reg);
     RegStorage AllocTempBody(GrowableArray<RegisterInfo*> &regs, int* next_temp, bool required);
     virtual RegStorage AllocFreeTemp();
@@ -817,8 +818,8 @@
 
     // Shared by all targets - implemented in gen_common.cc.
     void AddIntrinsicSlowPath(CallInfo* info, LIR* branch, LIR* resume = nullptr);
-    bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
-                          RegLocation rl_src, RegLocation rl_dest, int lit);
+    virtual bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
+                                  RegLocation rl_src, RegLocation rl_dest, int lit);
     bool HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit);
     virtual void HandleSlowPaths();
     void GenBarrier();
@@ -983,6 +984,7 @@
 
     bool GenInlinedCharAt(CallInfo* info);
     bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty);
+    virtual bool GenInlinedReverseBits(CallInfo* info, OpSize size);
     bool GenInlinedReverseBytes(CallInfo* info, OpSize size);
     bool GenInlinedAbsInt(CallInfo* info);
     virtual bool GenInlinedAbsLong(CallInfo* info);
@@ -1021,8 +1023,9 @@
       return LoadBaseDisp(r_base, displacement, r_dest, kReference, is_volatile);
     }
     // Load a reference at base + index and decompress into register.
-    virtual LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest) {
-      return LoadBaseIndexed(r_base, r_index, r_dest, 2, kReference);
+    virtual LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
+                                int scale) {
+      return LoadBaseIndexed(r_base, r_index, r_dest, scale, kReference);
     }
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
@@ -1048,8 +1051,9 @@
       return StoreBaseDisp(r_base, displacement, r_src, kReference, is_volatile);
     }
     // Store an uncompressed reference into a compressed 32-bit container by index.
-    virtual LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src) {
-      return StoreBaseIndexed(r_base, r_index, r_src, 2, kReference);
+    virtual LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
+                                 int scale) {
+      return StoreBaseIndexed(r_base, r_index, r_src, scale, kReference);
     }
     // Store 32 bits, regardless of target.
     virtual LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) {
@@ -1173,7 +1177,68 @@
     virtual void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) = 0;
 
     // Required for target - register utilities.
+
+    bool IsSameReg(RegStorage reg1, RegStorage reg2) {
+      RegisterInfo* info1 = GetRegInfo(reg1);
+      RegisterInfo* info2 = GetRegInfo(reg2);
+      return (info1->Master() == info2->Master() &&
+             (info1->StorageMask() & info2->StorageMask()) != 0);
+    }
+
+    /**
+     * @brief Portable way of getting special registers from the backend.
+     * @param reg Enumeration describing the purpose of the register.
+     * @return Return the #RegStorage corresponding to the given purpose @p reg.
+     * @note This function is currently allowed to return any suitable view of the registers
+     *   (e.g. this could be 64-bit solo or 32-bit solo for 64-bit backends).
+     */
     virtual RegStorage TargetReg(SpecialTargetRegister reg) = 0;
+
+    /**
+     * @brief Portable way of getting special registers from the backend.
+     * @param reg Enumeration describing the purpose of the register.
+     * @param is_wide Whether the view should be 64-bit (rather than 32-bit).
+     * @return Return the #RegStorage corresponding to the given purpose @p reg.
+     */
+    virtual RegStorage TargetReg(SpecialTargetRegister reg, bool is_wide) {
+      return TargetReg(reg);
+    }
+
+    /**
+     * @brief Portable way of getting special register pair from the backend.
+     * @param reg Enumeration describing the purpose of the first register.
+     * @param reg Enumeration describing the purpose of the second register.
+     * @return Return the #RegStorage corresponding to the given purpose @p reg.
+     */
+    virtual RegStorage TargetReg(SpecialTargetRegister reg1, SpecialTargetRegister reg2) {
+      return RegStorage::MakeRegPair(TargetReg(reg1, false), TargetReg(reg2, false));
+    }
+
+    /**
+     * @brief Portable way of getting a special register for storing a reference.
+     * @see TargetReg()
+     */
+    virtual RegStorage TargetRefReg(SpecialTargetRegister reg) {
+      return TargetReg(reg);
+    }
+
+    /**
+     * @brief Portable way of getting a special register for storing a pointer.
+     * @see TargetReg()
+     */
+    virtual RegStorage TargetPtrReg(SpecialTargetRegister reg) {
+      return TargetReg(reg);
+    }
+
+    // Get a reg storage corresponding to the wide & ref flags of the reg location.
+    virtual RegStorage TargetReg(SpecialTargetRegister reg, RegLocation loc) {
+      if (loc.ref) {
+        return TargetRefReg(reg);
+      } else {
+        return TargetReg(reg, loc.wide);
+      }
+    }
+
     virtual RegStorage GetArgMappingToPhysicalReg(int arg_num) = 0;
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
@@ -1187,8 +1252,6 @@
     virtual void ClobberCallerSave() = 0;
     virtual void FreeCallTemps() = 0;
     virtual void LockCallTemps() = 0;
-    virtual void MarkPreservedSingle(int v_reg, RegStorage reg) = 0;
-    virtual void MarkPreservedDouble(int v_reg, RegStorage reg) = 0;
     virtual void CompilerInitializeRegAlloc() = 0;
 
     // Required for target - miscellaneous.
@@ -1239,9 +1302,11 @@
      * directly into the destination register as specified by the invoke information.
      * @param info Information about the invoke.
      * @param is_min If true generates code that computes minimum. Otherwise computes maximum.
+     * @param is_long If true the value value is Long. Otherwise the value is Int.
      * @return Returns true if successfully generated
      */
-    virtual bool GenInlinedMinMaxInt(CallInfo* info, bool is_min) = 0;
+    virtual bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) = 0;
+    virtual bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double);
 
     virtual bool GenInlinedSqrt(CallInfo* info) = 0;
     virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
@@ -1569,6 +1634,45 @@
      */
     virtual void GenConst(RegLocation rl_dest, int value);
 
+    enum class WidenessCheck {  // private
+      kIgnoreWide,
+      kCheckWide,
+      kCheckNotWide
+    };
+
+    enum class RefCheck {  // private
+      kIgnoreRef,
+      kCheckRef,
+      kCheckNotRef
+    };
+
+    enum class FPCheck {  // private
+      kIgnoreFP,
+      kCheckFP,
+      kCheckNotFP
+    };
+
+    /**
+     * Check whether a reg storage seems well-formed, that is, if a reg storage is valid,
+     * that it has the expected form for the flags.
+     * A flag value of 0 means ignore. A flag value of -1 means false. A flag value of 1 means true.
+     */
+    void CheckRegStorageImpl(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp, bool fail,
+                             bool report)
+        const;
+
+    /**
+     * Check whether a reg location seems well-formed, that is, if a reg storage is encoded,
+     * that it has the expected size.
+     */
+    void CheckRegLocationImpl(RegLocation rl, bool fail, bool report) const;
+
+    // See CheckRegStorageImpl. Will print or fail depending on kFailOnSizeError and
+    // kReportSizeError.
+    void CheckRegStorage(RegStorage rs, WidenessCheck wide, RefCheck ref, FPCheck fp) const;
+    // See CheckRegLocationImpl.
+    void CheckRegLocation(RegLocation rl) const;
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 60eebe4..13bd443 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -178,7 +178,7 @@
   } else {
     RegisterInfo* info = GetRegInfo(reg);
     if (info->IsTemp() && !info->IsDead()) {
-      if (info->GetReg() != info->Partner()) {
+      if (info->GetReg().NotExactlyEquals(info->Partner())) {
         ClobberBody(GetRegInfo(info->Partner()));
       }
       ClobberBody(info);
@@ -225,7 +225,7 @@
     GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
     for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
       if (info->SReg() == s_reg) {
-        if (info->GetReg() != info->Partner()) {
+        if (info->GetReg().NotExactlyEquals(info->Partner())) {
           // Dealing with a pair - clobber the other half.
           DCHECK(!info->IsAliased());
           ClobberBody(GetRegInfo(info->Partner()));
@@ -284,8 +284,13 @@
 
 /* Reserve a callee-save register.  Return InvalidReg if none available */
 RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) {
-  // TODO: 64-bit and refreg update
   RegStorage res;
+  /*
+   * Note: it really doesn't matter much whether we allocate from the core or core64
+   * pool for 64-bit targets - but for some targets it does matter whether allocations
+   * happens from the single or double pool.  This entire section of code could stand
+   * a good refactoring.
+   */
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
     if (!info->IsTemp() && !info->InUse()) {
@@ -297,49 +302,50 @@
   return res;
 }
 
-void Mir2Lir::RecordSinglePromotion(RegStorage reg, int s_reg) {
+void Mir2Lir::RecordFpPromotion(RegStorage reg, int s_reg) {
+  DCHECK_NE(cu_->instruction_set, kThumb2);
   int p_map_idx = SRegToPMap(s_reg);
   int v_reg = mir_graph_->SRegToVReg(s_reg);
+  int reg_num = reg.GetRegNum();
   GetRegInfo(reg)->MarkInUse();
-  MarkPreservedSingle(v_reg, reg);
+  fp_spill_mask_ |= (1 << reg_num);
+  // Include reg for later sort
+  fp_vmap_table_.push_back(reg_num << VREG_NUM_WIDTH | (v_reg & ((1 << VREG_NUM_WIDTH) - 1)));
+  num_fp_spills_++;
   promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-  promotion_map_[p_map_idx].FpReg = reg.GetReg();
+  promotion_map_[p_map_idx].fp_reg = reg.GetReg();
 }
 
-// Reserve a callee-save sp single register.
-RegStorage Mir2Lir::AllocPreservedSingle(int s_reg) {
+// Reserve a callee-save floating point.
+RegStorage Mir2Lir::AllocPreservedFpReg(int s_reg) {
+  /*
+   * For targets other than Thumb2, it doesn't matter whether we allocate from
+   * the sp_regs_ or dp_regs_ pool.  Some refactoring is in order here.
+   */
+  DCHECK_NE(cu_->instruction_set, kThumb2);
   RegStorage res;
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->sp_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
     if (!info->IsTemp() && !info->InUse()) {
       res = info->GetReg();
-      RecordSinglePromotion(res, s_reg);
+      RecordFpPromotion(res, s_reg);
       break;
     }
   }
   return res;
 }
 
-void Mir2Lir::RecordDoublePromotion(RegStorage reg, int s_reg) {
-  int p_map_idx = SRegToPMap(s_reg);
-  int v_reg = mir_graph_->SRegToVReg(s_reg);
-  GetRegInfo(reg)->MarkInUse();
-  MarkPreservedDouble(v_reg, reg);
-  promotion_map_[p_map_idx].fp_location = kLocPhysReg;
-  promotion_map_[p_map_idx].FpReg = reg.GetReg();
-}
-
-// Reserve a callee-save dp solo register.
+// TODO: this is Thumb2 only.  Remove when DoPromotion refactored.
 RegStorage Mir2Lir::AllocPreservedDouble(int s_reg) {
   RegStorage res;
-  GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->dp_regs_);
-  for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
-    if (!info->IsTemp() && !info->InUse()) {
-      res = info->GetReg();
-      RecordDoublePromotion(res, s_reg);
-      break;
-    }
-  }
+  UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedDouble";
+  return res;
+}
+
+// TODO: this is Thumb2 only.  Remove when DoPromotion refactored.
+RegStorage Mir2Lir::AllocPreservedSingle(int s_reg) {
+  RegStorage res;
+  UNIMPLEMENTED(FATAL) << "Unexpected use of AllocPreservedSingle";
   return res;
 }
 
@@ -359,7 +365,13 @@
        * NOTE: "wideness" is an attribute of how the container is used, not its physical size.
        * The caller will set wideness as appropriate.
        */
-      info->SetIsWide(false);
+      if (info->IsWide()) {
+        RegisterInfo* partner = GetRegInfo(info->Partner());
+        DCHECK_EQ(info->GetReg().GetRegNum(), partner->Partner().GetRegNum());
+        DCHECK(partner->IsWide());
+        info->SetIsWide(false);
+        partner->SetIsWide(false);
+      }
       *next_temp = next + 1;
       return info->GetReg();
     }
@@ -414,24 +426,28 @@
     RegStorage high_reg = AllocTemp();
     res = RegStorage::MakeRegPair(low_reg, high_reg);
   }
+  CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kIgnoreRef, FPCheck::kCheckNotFP);
   return res;
 }
 
 RegStorage Mir2Lir::AllocTempRef() {
   RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, true);
   DCHECK(!res.IsPair());
+  CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckRef, FPCheck::kCheckNotFP);
   return res;
 }
 
 RegStorage Mir2Lir::AllocTempSingle() {
   RegStorage res = AllocTempBody(reg_pool_->sp_regs_, &reg_pool_->next_sp_reg_, true);
   DCHECK(res.IsSingle()) << "Reg: 0x" << std::hex << res.GetRawBits();
+  CheckRegStorage(res, WidenessCheck::kCheckNotWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
   return res;
 }
 
 RegStorage Mir2Lir::AllocTempDouble() {
   RegStorage res = AllocTempBody(reg_pool_->dp_regs_, &reg_pool_->next_dp_reg_, true);
   DCHECK(res.IsDouble()) << "Reg: 0x" << std::hex << res.GetRawBits();
+  CheckRegStorage(res, WidenessCheck::kCheckWide, RefCheck::kCheckNotRef, FPCheck::kIgnoreFP);
   return res;
 }
 
@@ -468,13 +484,15 @@
   RegStorage reg;
   if (reg_class == kRefReg) {
     reg = FindLiveReg(*reg_pool_->ref_regs_, s_reg);
+    CheckRegStorage(reg, WidenessCheck::kCheckNotWide, RefCheck::kCheckRef, FPCheck::kCheckNotFP);
   }
   if (!reg.Valid() && ((reg_class == kAnyReg) || (reg_class == kFPReg))) {
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
     if (cu_->target64) {
-      reg = FindLiveReg(wide ? reg_pool_->core64_regs_ : reg_pool_->core_regs_, s_reg);
+      reg = FindLiveReg(wide || reg_class == kRefReg ? reg_pool_->core64_regs_ :
+                                                       reg_pool_->core_regs_, s_reg);
     } else {
       reg = FindLiveReg(reg_pool_->core_regs_, s_reg);
     }
@@ -519,6 +537,9 @@
       ClobberSReg(s_reg + 1);
     }
   }
+  CheckRegStorage(reg, WidenessCheck::kIgnoreWide,
+                  reg_class == kRefReg ? RefCheck::kCheckRef : RefCheck::kIgnoreRef,
+                  FPCheck::kIgnoreFP);
   return reg;
 }
 
@@ -721,7 +742,8 @@
     RegisterInfo* info1 = GetRegInfo(reg.GetLow());
     RegisterInfo* info2 = GetRegInfo(reg.GetHigh());
     DCHECK(info1 && info2 && info1->IsWide() && info2->IsWide() &&
-         (info1->Partner() == info2->GetReg()) && (info2->Partner() == info1->GetReg()));
+           (info1->Partner().ExactlyEquals(info2->GetReg())) &&
+           (info2->Partner().ExactlyEquals(info1->GetReg())));
     if ((info1->IsLive() && info1->IsDirty()) || (info2->IsLive() && info2->IsDirty())) {
       if (!(info1->IsTemp() && info2->IsTemp())) {
         /* Should not happen.  If it does, there's a problem in eval_loc */
@@ -735,7 +757,7 @@
       }
       int v_reg = mir_graph_->SRegToVReg(info1->SReg());
       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64, kNotVolatile);
+      StoreBaseDisp(TargetPtrReg(kSp), VRegOffset(v_reg), reg, k64, kNotVolatile);
     }
   } else {
     RegisterInfo* info = GetRegInfo(reg);
@@ -743,7 +765,7 @@
       info->SetIsDirty(false);
       int v_reg = mir_graph_->SRegToVReg(info->SReg());
       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, k64, kNotVolatile);
+      StoreBaseDisp(TargetPtrReg(kSp), VRegOffset(v_reg), reg, k64, kNotVolatile);
     }
   }
 }
@@ -755,7 +777,7 @@
     info->SetIsDirty(false);
     int v_reg = mir_graph_->SRegToVReg(info->SReg());
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    StoreBaseDisp(TargetReg(kSp), VRegOffset(v_reg), reg, kWord, kNotVolatile);
+    StoreBaseDisp(TargetPtrReg(kSp), VRegOffset(v_reg), reg, kWord, kNotVolatile);
   }
 }
 
@@ -857,10 +879,10 @@
     RegisterInfo* info_lo = GetRegInfo(reg.GetLow());
     RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
     // Unpair any old partners.
-    if (info_lo->IsWide() && info_lo->Partner() != info_hi->GetReg()) {
+    if (info_lo->IsWide() && info_lo->Partner().NotExactlyEquals(info_hi->GetReg())) {
       GetRegInfo(info_lo->Partner())->SetIsWide(false);
     }
-    if (info_hi->IsWide() && info_hi->Partner() != info_lo->GetReg()) {
+    if (info_hi->IsWide() && info_hi->Partner().NotExactlyEquals(info_lo->GetReg())) {
       GetRegInfo(info_hi->Partner())->SetIsWide(false);
     }
     info_lo->SetIsWide(true);
@@ -990,7 +1012,7 @@
   if (loc.location != kLocPhysReg) {
     DCHECK((loc.location == kLocDalvikFrame) ||
          (loc.location == kLocCompilerTemp));
-    RegStorage reg = AllocLiveReg(loc.s_reg_low, kAnyReg, false);
+    RegStorage reg = AllocLiveReg(loc.s_reg_low, loc.ref ? kRefReg : kAnyReg, false);
     if (reg.Valid()) {
       bool match = true;
       RegisterInfo* info = GetRegInfo(reg);
@@ -1004,6 +1026,7 @@
         FreeTemp(reg);
       }
     }
+    CheckRegLocation(loc);
   }
   return loc;
 }
@@ -1023,12 +1046,12 @@
         RegisterInfo* info_hi = GetRegInfo(reg.GetHigh());
         match &= info_lo->IsWide();
         match &= info_hi->IsWide();
-        match &= (info_lo->Partner() == info_hi->GetReg());
-        match &= (info_hi->Partner() == info_lo->GetReg());
+        match &= (info_lo->Partner().ExactlyEquals(info_hi->GetReg()));
+        match &= (info_hi->Partner().ExactlyEquals(info_lo->GetReg()));
       } else {
         RegisterInfo* info = GetRegInfo(reg);
         match &= info->IsWide();
-        match &= (info->GetReg() == info->Partner());
+        match &= (info->GetReg().ExactlyEquals(info->Partner()));
       }
       if (match) {
         loc.location = kLocPhysReg;
@@ -1038,6 +1061,7 @@
         FreeTemp(reg);
       }
     }
+    CheckRegLocation(loc);
   }
   return loc;
 }
@@ -1067,6 +1091,7 @@
       MarkWide(loc.reg);
       MarkLive(loc);
     }
+    CheckRegLocation(loc);
     return loc;
   }
 
@@ -1080,10 +1105,16 @@
     loc.location = kLocPhysReg;
     MarkLive(loc);
   }
+  CheckRegLocation(loc);
   return loc;
 }
 
 RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
+  // Narrow reg_class if the loc is a ref.
+  if (loc.ref && reg_class == kAnyReg) {
+    reg_class = kRefReg;
+  }
+
   if (loc.wide) {
     return EvalLocWide(loc, reg_class, update);
   }
@@ -1100,17 +1131,20 @@
       loc.reg = new_reg;
       MarkLive(loc);
     }
+    CheckRegLocation(loc);
     return loc;
   }
 
   DCHECK_NE(loc.s_reg_low, INVALID_SREG);
 
   loc.reg = AllocTypedTemp(loc.fp, reg_class);
+  CheckRegLocation(loc);
 
   if (update) {
     loc.location = kLocPhysReg;
     MarkLive(loc);
   }
+  CheckRegLocation(loc);
   return loc;
 }
 
@@ -1120,16 +1154,23 @@
     RegLocation loc = mir_graph_->reg_location_[i];
     RefCounts* counts = loc.fp ? fp_counts : core_counts;
     int p_map_idx = SRegToPMap(loc.s_reg_low);
+    int use_count = mir_graph_->GetUseCount(i);
     if (loc.fp) {
       if (loc.wide) {
         // Treat doubles as a unit, using upper half of fp_counts array.
-        counts[p_map_idx + num_regs].count += mir_graph_->GetUseCount(i);
+        counts[p_map_idx + num_regs].count += use_count;
         i++;
       } else {
-        counts[p_map_idx].count += mir_graph_->GetUseCount(i);
+        counts[p_map_idx].count += use_count;
       }
     } else if (!IsInexpensiveConstant(loc)) {
-      counts[p_map_idx].count += mir_graph_->GetUseCount(i);
+      if (loc.wide && cu_->target64) {
+        // Treat long as a unit, using upper half of core_counts array.
+        counts[p_map_idx + num_regs].count += use_count;
+        i++;
+      } else {
+        counts[p_map_idx].count += use_count;
+      }
     }
   }
 }
@@ -1149,10 +1190,10 @@
 void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) {
   LOG(INFO) << msg;
   for (int i = 0; i < size; i++) {
-    if ((arr[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
-      LOG(INFO) << "s_reg[D" << (arr[i].s_reg & ~STARTING_DOUBLE_SREG) << "]: " << arr[i].count;
+    if ((arr[i].s_reg & STARTING_WIDE_SREG) != 0) {
+      LOG(INFO) << "s_reg[64_" << (arr[i].s_reg & ~STARTING_WIDE_SREG) << "]: " << arr[i].count;
     } else {
-      LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count;
+      LOG(INFO) << "s_reg[32_" << arr[i].s_reg << "]: " << arr[i].count;
     }
   }
 }
@@ -1183,69 +1224,83 @@
    * TUNING: replace with linear scan once we have the ability
    * to describe register live ranges for GC.
    */
+  size_t core_reg_count_size = cu_->target64 ? num_regs * 2 : num_regs;
+  size_t fp_reg_count_size = num_regs * 2;
   RefCounts *core_regs =
-      static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs,
+      static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * core_reg_count_size,
                                             kArenaAllocRegAlloc));
-  RefCounts *FpRegs =
-      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs * 2,
+  RefCounts *fp_regs =
+      static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * fp_reg_count_size,
                                              kArenaAllocRegAlloc));
   // Set ssa names for original Dalvik registers
   for (int i = 0; i < dalvik_regs; i++) {
-    core_regs[i].s_reg = FpRegs[i].s_reg = i;
+    core_regs[i].s_reg = fp_regs[i].s_reg = i;
   }
 
   // Set ssa names for compiler temporaries
   for (unsigned int ct_idx = 0; ct_idx < mir_graph_->GetNumUsedCompilerTemps(); ct_idx++) {
     CompilerTemp* ct = mir_graph_->GetCompilerTemp(ct_idx);
     core_regs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
-    FpRegs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
-    FpRegs[num_regs + dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
+    fp_regs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
   }
 
-  // Duplicate in upper half to represent possible fp double starting sregs.
-  for (int i = 0; i < num_regs; i++) {
-    FpRegs[num_regs + i].s_reg = FpRegs[i].s_reg | STARTING_DOUBLE_SREG;
+  // Duplicate in upper half to represent possible wide starting sregs.
+  for (size_t i = num_regs; i < fp_reg_count_size; i++) {
+    fp_regs[i].s_reg = fp_regs[i - num_regs].s_reg | STARTING_WIDE_SREG;
+  }
+  for (size_t i = num_regs; i < core_reg_count_size; i++) {
+    core_regs[i].s_reg = core_regs[i - num_regs].s_reg | STARTING_WIDE_SREG;
   }
 
   // Sum use counts of SSA regs by original Dalvik vreg.
-  CountRefs(core_regs, FpRegs, num_regs);
+  CountRefs(core_regs, fp_regs, num_regs);
 
 
   // Sort the count arrays
-  qsort(core_regs, num_regs, sizeof(RefCounts), SortCounts);
-  qsort(FpRegs, num_regs * 2, sizeof(RefCounts), SortCounts);
+  qsort(core_regs, core_reg_count_size, sizeof(RefCounts), SortCounts);
+  qsort(fp_regs, fp_reg_count_size, sizeof(RefCounts), SortCounts);
 
   if (cu_->verbose) {
-    DumpCounts(core_regs, num_regs, "Core regs after sort");
-    DumpCounts(FpRegs, num_regs * 2, "Fp regs after sort");
+    DumpCounts(core_regs, core_reg_count_size, "Core regs after sort");
+    DumpCounts(fp_regs, fp_reg_count_size, "Fp regs after sort");
   }
 
   if (!(cu_->disable_opt & (1 << kPromoteRegs))) {
-    // Promote FpRegs
-    for (int i = 0; (i < (num_regs * 2)) && (FpRegs[i].count >= promotion_threshold); i++) {
-      int p_map_idx = SRegToPMap(FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG);
-      if ((FpRegs[i].s_reg & STARTING_DOUBLE_SREG) != 0) {
-        if ((promotion_map_[p_map_idx].fp_location != kLocPhysReg) &&
-            (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg)) {
-          int low_sreg = FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG;
-          // Ignore result - if can't alloc double may still be able to alloc singles.
-          AllocPreservedDouble(low_sreg);
+    // Promote fp regs
+    for (size_t i = 0; (i < fp_reg_count_size) && (fp_regs[i].count >= promotion_threshold); i++) {
+      int low_sreg = fp_regs[i].s_reg & ~STARTING_WIDE_SREG;
+      size_t p_map_idx = SRegToPMap(low_sreg);
+      RegStorage reg = RegStorage::InvalidReg();
+      if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
+        // TODO: break out the Thumb2-specific code.
+        if (cu_->instruction_set == kThumb2) {
+          bool wide = fp_regs[i].s_reg & STARTING_WIDE_SREG;
+          if (wide) {
+            if (promotion_map_[p_map_idx + 1].fp_location == kLocPhysReg) {
+              // Ignore result - if can't alloc double may still be able to alloc singles.
+              AllocPreservedDouble(low_sreg);
+            }
+            // Continue regardless of success - might still be able to grab a single.
+            continue;
+          } else {
+            reg = AllocPreservedSingle(low_sreg);
+          }
+        } else {
+          reg = AllocPreservedFpReg(low_sreg);
         }
-      } else if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) {
-        RegStorage reg = AllocPreservedSingle(FpRegs[i].s_reg);
         if (!reg.Valid()) {
-          break;  // No more left.
+           break;  // No more left
         }
       }
     }
 
     // Promote core regs
-    for (int i = 0; (i < num_regs) &&
-            (core_regs[i].count >= promotion_threshold); i++) {
-      int p_map_idx = SRegToPMap(core_regs[i].s_reg);
-      if (promotion_map_[p_map_idx].core_location !=
-          kLocPhysReg) {
-        RegStorage reg = AllocPreservedCoreReg(core_regs[i].s_reg);
+    for (size_t i = 0; (i < core_reg_count_size) &&
+         (core_regs[i].count >= promotion_threshold); i++) {
+      int low_sreg = core_regs[i].s_reg & ~STARTING_WIDE_SREG;
+      size_t p_map_idx = SRegToPMap(low_sreg);
+      if (promotion_map_[p_map_idx].core_location != kLocPhysReg) {
+        RegStorage reg = AllocPreservedCoreReg(low_sreg);
         if (!reg.Valid()) {
            break;  // No more left
         }
@@ -1257,51 +1312,35 @@
   for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
     RegLocation *curr = &mir_graph_->reg_location_[i];
     int p_map_idx = SRegToPMap(curr->s_reg_low);
-    if (!curr->wide) {
-      if (curr->fp) {
-        if (promotion_map_[p_map_idx].fp_location == kLocPhysReg) {
-          curr->location = kLocPhysReg;
-          curr->reg = RegStorage::Solo32(promotion_map_[p_map_idx].FpReg);
-          curr->home = true;
-        }
-      } else {
-        if (promotion_map_[p_map_idx].core_location == kLocPhysReg) {
-          curr->location = kLocPhysReg;
-          curr->reg = RegStorage::Solo32(promotion_map_[p_map_idx].core_reg);
-          curr->home = true;
-        }
-      }
-    } else {
-      if (curr->high_word) {
-        continue;
-      }
-      if (curr->fp) {
-        if ((promotion_map_[p_map_idx].fp_location == kLocPhysReg) &&
-            (promotion_map_[p_map_idx+1].fp_location == kLocPhysReg)) {
-          int low_reg = promotion_map_[p_map_idx].FpReg;
-          int high_reg = promotion_map_[p_map_idx+1].FpReg;
-          // Doubles require pair of singles starting at even reg
+    int reg_num = curr->fp ? promotion_map_[p_map_idx].fp_reg : promotion_map_[p_map_idx].core_reg;
+    bool wide = curr->wide || (cu_->target64 && curr->ref);
+    RegStorage reg = RegStorage::InvalidReg();
+    if (curr->fp && promotion_map_[p_map_idx].fp_location == kLocPhysReg) {
+      if (wide && cu_->instruction_set == kThumb2) {
+        if (promotion_map_[p_map_idx + 1].fp_location == kLocPhysReg) {
+          int high_reg = promotion_map_[p_map_idx+1].fp_reg;
           // TODO: move target-specific restrictions out of here.
-          if (((low_reg & 0x1) == 0) && ((low_reg + 1) == high_reg)) {
-            curr->location = kLocPhysReg;
-            if (cu_->instruction_set == kThumb2) {
-              curr->reg = RegStorage::FloatSolo64(RegStorage::RegNum(low_reg) >> 1);
-            } else {
-              curr->reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg);
-            }
-            curr->home = true;
+          if (((reg_num & 0x1) == 0) && ((reg_num + 1) == high_reg)) {
+            reg = RegStorage::FloatSolo64(RegStorage::RegNum(reg_num) >> 1);
           }
         }
       } else {
-        if ((promotion_map_[p_map_idx].core_location == kLocPhysReg)
-           && (promotion_map_[p_map_idx+1].core_location ==
-           kLocPhysReg)) {
-          curr->location = kLocPhysReg;
-          curr->reg = RegStorage(RegStorage::k64BitPair, promotion_map_[p_map_idx].core_reg,
-                                 promotion_map_[p_map_idx+1].core_reg);
-          curr->home = true;
-        }
+        reg = wide ? RegStorage::FloatSolo64(reg_num) : RegStorage::FloatSolo32(reg_num);
       }
+    } else if (!curr->fp && promotion_map_[p_map_idx].core_location == kLocPhysReg) {
+      if (wide && !cu_->target64) {
+        if (promotion_map_[p_map_idx + 1].core_location == kLocPhysReg) {
+          int high_reg = promotion_map_[p_map_idx+1].core_reg;
+          reg = RegStorage(RegStorage::k64BitPair, reg_num, high_reg);
+        }
+      } else {
+        reg = wide ? RegStorage::Solo64(reg_num) : RegStorage::Solo32(reg_num);
+      }
+    }
+    if (reg.Valid()) {
+      curr->reg = reg;
+      curr->location = kLocPhysReg;
+      curr->home = true;
     }
   }
   if (cu_->verbose) {
@@ -1332,6 +1371,7 @@
   Clobber(res.reg);
   LockTemp(res.reg);
   MarkWide(res.reg);
+  CheckRegLocation(res);
   return res;
 }
 
@@ -1348,6 +1388,7 @@
   } else {
     LockTemp(res.reg);
   }
+  CheckRegLocation(res);
   return res;
 }
 
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 6d2b6fa..7baf2d9 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -208,7 +208,8 @@
   { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RM", "!0r,[!1r+!2d]" },
   { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,             0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
   { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RT", "!0r,fs:[!1d]" },
-  { kX86Mov64RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { REX_W,             0, 0xB8, 0, 0, 0, 0, 8, false }, "Mov64RI", "!0r,!1d" },
+  { kX86Mov64RI32, kRegImm,             IS_BINARY_OP   | REG_DEF0,       { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64RI32", "!0r,!1d" },
+  { kX86Mov64RI64, kMovRegQuadImm,      IS_TERTIARY_OP | REG_DEF0,       { REX_W,             0, 0xB8, 0, 0, 0, 0, 8, false }, "Mov64RI64", "!0r,!1q" },
   { kX86Mov64MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64MI", "[!0r+!1d],!2d" },
   { kX86Mov64AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { REX_W,             0, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4, false }, "Mov64TI", "fs:[!0d],!1d" },
@@ -317,6 +318,7 @@
   { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0,     0, 0x99, 0,    0, 0, 0, 0, false }, "Cdq", "" },
   { kx86Cqo64Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { REX_W, 0, 0x99, 0,    0, 0, 0, 0, false }, "Cqo", "" },
   { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0,     0, 0x0F, 0xC8, 0, 0, 0, 0, false }, "Bswap32R", "!0r" },
+  { kX86Bswap64R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { REX_W, 0, 0x0F, 0xC8, 0, 0, 0, 0, false }, "Bswap64R", "!0r" },
   { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0,     0, 0x50, 0,    0, 0, 0, 0, false }, "Push32R",  "!0r" },
   { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0,     0, 0x58, 0,    0, 0, 0, 0, false }, "Pop32R",   "!0r" },
 
@@ -432,7 +434,7 @@
   { kX86Fst64M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M",  "[!0r,!1d]" },
   { kX86Fprem,    kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xD9, 0,    0xF8, 0,    0, 0, 0, 0, false }, "Fprem64",  "" },
   { kX86Fucompp,  kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xDA, 0,    0xE9, 0,    0, 0, 0, 0, false }, "Fucompp",  "" },
-  { kX86Fstsw16R, kNullary, NO_OPERAND | USE_FP_STACK,                          { 0x9B, 0xDF, 0xE0, 0,    0, 0, 0, 0, false }, "Fstsw16R", "ax" },
+  { kX86Fstsw16R, kNullary, NO_OPERAND | REG_DEFA | USE_FP_STACK,               { 0x9B, 0xDF, 0xE0, 0,    0, 0, 0, 0, false }, "Fstsw16R", "ax" },
 
   EXT_0F_ENCODING_MAP(Mova128,    0x66, 0x6F, REG_DEF0),
   { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" },
@@ -488,6 +490,7 @@
   { kX86CmpxchgAR, kArrayReg,     IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0,    0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86LockCmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1d],!2r" },
   { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86LockCmpxchg64AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES,    { 0xF0, REX_W, 0x0F, 0xB1, 0, 0, 0, 0, false }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86LockCmpxchg64M, kMem,     IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1d]" },
   { kX86LockCmpxchg64A, kArray,   IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES,  { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
   { kX86XchgMR, kMemReg,          IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02,          { 0, 0, 0x87, 0, 0, 0, 0, 0, false }, "Xchg", "[!0r+!1d],!2r" },
@@ -613,7 +616,7 @@
       ++size;
     }
   }
-  if (Gen64Bit() || kIsDebugBuild) {
+  if (cu_->target64 || kIsDebugBuild) {
     bool registers_need_rex_prefix = NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base);
     if (r8_form) {
       // Do we need an empty REX prefix to normalize byte registers?
@@ -622,7 +625,7 @@
           (modrm_is_reg_reg && (RegStorage::RegNum(raw_base) >= 4));
     }
     if (registers_need_rex_prefix) {
-      DCHECK(Gen64Bit()) << "Attempt to use a 64-bit only addressable register "
+      DCHECK(cu_->target64) << "Attempt to use a 64-bit only addressable register "
           << RegStorage::RegNum(raw_reg) << " with instruction " << entry->name;
       if (entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
         ++size;  // rex
@@ -641,7 +644,7 @@
   }
   if (!modrm_is_reg_reg) {
     if (has_sib || LowRegisterBits(raw_base) == rs_rX86_SP.GetRegNum()
-        || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
+        || (cu_->target64 && entry->skeleton.prefix1 == THREAD_PREFIX)) {
       // SP requires a SIB byte.
       // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
       ++size;
@@ -728,6 +731,7 @@
       return ComputeSize(entry, lir->operands[0], lir->operands[2], lir->operands[1],
                          lir->operands[4]);
     case kMovRegImm:  // lir operands - 0: reg, 1: immediate
+    case kMovRegQuadImm:
       return ((entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])) ? 1 : 0) + 1 +
           entry->skeleton.immediate_bytes;
     case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
@@ -817,7 +821,7 @@
     case kMacro:  // lir operands - 0: reg
       DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
       return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
-          ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
+          ComputeSize(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI],
                       lir->operands[0], NO_REG, NO_REG, 0) -
               // Shorter ax encoding.
               (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
@@ -854,7 +858,7 @@
     }
     if (RegStorage::RegNum(raw_reg) >= 4) {
       // ah, bh, ch and dh are not valid registers in 32-bit.
-      CHECK(Gen64Bit() || !entry->skeleton.r8_form)
+      CHECK(cu_->target64 || !entry->skeleton.r8_form)
                << "Invalid register " << static_cast<int>(RegStorage::RegNum(raw_reg))
                << " for instruction " << entry->name << " in "
                << PrettyMethod(cu_->method_idx, *cu_->dex_file);
@@ -898,7 +902,7 @@
     rex |= 0x41;  // REX.000B
   }
   if (entry->skeleton.prefix1 != 0) {
-    if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) {
+    if (cu_->target64 && entry->skeleton.prefix1 == THREAD_PREFIX) {
       // 64 bit addresses by GS, not FS.
       code_buffer_.push_back(THREAD_PREFIX_GS);
     } else {
@@ -923,7 +927,7 @@
     DCHECK_EQ(0, entry->skeleton.prefix2);
   }
   if (rex != 0) {
-    DCHECK(Gen64Bit());
+    DCHECK(cu_->target64);
     code_buffer_.push_back(rex);
   }
 }
@@ -964,7 +968,7 @@
 }
 
 void X86Mir2Lir::EmitModrmThread(uint8_t reg_or_opcode) {
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     // Absolute adressing for GS access.
     uint8_t modrm = (0 << 6) | (reg_or_opcode << 3) | rs_rX86_SP.GetRegNum();
     code_buffer_.push_back(modrm);
@@ -1558,7 +1562,7 @@
   uint8_t low_reg = LowRegisterBits(raw_reg);
   code_buffer_.push_back(0x58 + low_reg);  // pop reg
 
-  EmitRegImm(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
+  EmitRegImm(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI],
              raw_reg, offset + 5 /* size of call +0 */);
 }
 
@@ -1792,6 +1796,12 @@
       case kMovRegImm:  // lir operands - 0: reg, 1: immediate
         EmitMovRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
+      case kMovRegQuadImm: {
+          int64_t value = static_cast<int64_t>(static_cast<int64_t>(lir->operands[1]) << 32 |
+                          static_cast<uint32_t>(lir->operands[2]));
+          EmitMovRegImm(entry, lir->operands[0], value);
+        }
+        break;
       case kShiftRegImm:  // lir operands - 0: reg, 1: immediate
         EmitShiftRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 425caec..6ca220c 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -94,7 +94,7 @@
     start_of_method_reg = rl_method.reg;
     store_method_addr_used_ = true;
   } else {
-    if (Gen64Bit()) {
+    if (cu_->target64) {
       start_of_method_reg = AllocTempWide();
     } else {
       start_of_method_reg = AllocTemp();
@@ -119,7 +119,7 @@
   NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
           2, WrapPointer(tab_rec));
   // Add displacement to start of method
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     NewLIR2(kX86Add64RR, start_of_method_reg.GetReg(), disp_reg.GetReg());
   } else {
     OpRegReg(kOpAdd, start_of_method_reg, disp_reg);
@@ -158,29 +158,33 @@
 
   // Making a call - use explicit registers
   FlushAllRegs();   /* Everything to home location */
-  LoadValueDirectFixed(rl_src, rs_rX86_ARG0);
+  RegStorage array_ptr = TargetRefReg(kArg0);
+  RegStorage payload = TargetPtrReg(kArg1);
+  RegStorage method_start = TargetPtrReg(kArg2);
+
+  LoadValueDirectFixed(rl_src, array_ptr);
   // Materialize a pointer to the fill data image
   if (base_of_code_ != nullptr) {
     // We can use the saved value.
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
     if (rl_method.wide) {
-      LoadValueDirectWide(rl_method, rs_rX86_ARG2);
+      LoadValueDirectWide(rl_method, method_start);
     } else {
-      LoadValueDirect(rl_method, rs_rX86_ARG2);
+      LoadValueDirect(rl_method, method_start);
     }
     store_method_addr_used_ = true;
   } else {
     // TODO(64) force to be 64-bit
-    NewLIR1(kX86StartOfMethod, rs_rX86_ARG2.GetReg());
+    NewLIR1(kX86StartOfMethod, method_start.GetReg());
   }
-  NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec));
-  NewLIR2(Gen64Bit() ? kX86Add64RR : kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
+  NewLIR2(kX86PcRelAdr, payload.GetReg(), WrapPointer(tab_rec));
+  OpRegReg(kOpAdd, payload, method_start);
   if (cu_->target64) {
-    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0,
-                            rs_rX86_ARG1, true);
+    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), array_ptr,
+                            payload, true);
   } else {
-    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData), rs_rX86_ARG0,
-                            rs_rX86_ARG1, true);
+    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData), array_ptr,
+                            payload, true);
   }
 }
 
@@ -204,7 +208,7 @@
   int ct_offset = cu_->target64 ?
       Thread::CardTableOffset<8>().Int32Value() :
       Thread::CardTableOffset<4>().Int32Value();
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     NewLIR2(kX86Mov64RT, reg_card_base.GetReg(), ct_offset);
   } else {
     NewLIR2(kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
@@ -236,7 +240,7 @@
    * a leaf *and* our frame size < fudge factor.
    */
   const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
-      !IsLargeFrame(frame_size_, Gen64Bit() ? kX86_64 : kX86);
+      !IsLargeFrame(frame_size_, cu_->target64 ? kX86_64 : kX86);
   NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
@@ -291,12 +295,13 @@
   FlushIns(ArgLocs, rl_method);
 
   if (base_of_code_ != nullptr) {
+    RegStorage method_start = TargetPtrReg(kArg0);
     // We have been asked to save the address of the method start for later use.
-    setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rs_rX86_ARG0.GetReg());
+    setup_method_address_[0] = NewLIR1(kX86StartOfMethod, method_start.GetReg());
     int displacement = SRegOffset(base_of_code_->s_reg_low);
     // Native pointer - must be natural word size.
-    setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, rs_rX86_ARG0,
-                                             Gen64Bit() ? k64 : k32, kNotVolatile);
+    setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, method_start,
+                                             cu_->target64 ? k64 : k32, kNotVolatile);
   }
 
   FreeTemp(rs_rX86_ARG0);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 70382c7..123fe90 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -34,9 +34,11 @@
 
   class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
    public:
-    InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
+    explicit InToRegStorageX86_64Mapper(Mir2Lir* ml) : ml_(ml), cur_core_reg_(0), cur_fp_reg_(0) {}
     virtual ~InToRegStorageX86_64Mapper() {}
     virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+   protected:
+    Mir2Lir* ml_;
    private:
     int cur_core_reg_;
     int cur_fp_reg_;
@@ -59,7 +61,7 @@
   };
 
  public:
-  X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
+  X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
   // Required for target - codegen helpers.
   bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
@@ -85,7 +87,22 @@
   void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
   // Required for target - register utilities.
-  RegStorage TargetReg(SpecialTargetRegister reg);
+  RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
+  RegStorage TargetReg32(SpecialTargetRegister reg);
+  RegStorage TargetReg(SpecialTargetRegister symbolic_reg, bool is_wide) OVERRIDE {
+    RegStorage reg = TargetReg32(symbolic_reg);
+    if (is_wide) {
+      return (reg.Is64Bit()) ? reg : As64BitReg(reg);
+    } else {
+      return (reg.Is32Bit()) ? reg : As32BitReg(reg);
+    }
+  }
+  RegStorage TargetRefReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
+    return TargetReg(symbolic_reg, cu_->target64);
+  }
+  RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
+    return TargetReg(symbolic_reg, cu_->target64);
+  }
   RegStorage GetArgMappingToPhysicalReg(int arg_num);
   RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
   RegLocation GetReturnAlt();
@@ -100,8 +117,6 @@
   void ClobberCallerSave();
   void FreeCallTemps();
   void LockCallTemps();
-  void MarkPreservedSingle(int v_reg, RegStorage reg);
-  void MarkPreservedDouble(int v_reg, RegStorage reg);
   void CompilerInitializeRegAlloc();
 
   // Required for target - miscellaneous.
@@ -149,7 +164,7 @@
                 RegLocation rl_src2);
   void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
   bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object);
-  bool GenInlinedMinMaxInt(CallInfo* info, bool is_min);
+  bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long);
   bool GenInlinedSqrt(CallInfo* info);
   bool GenInlinedAbsFloat(CallInfo* info) OVERRIDE;
   bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE;
@@ -390,6 +405,43 @@
   std::vector<uint8_t>* ReturnCallFrameInformation();
 
  protected:
+  // Casting of RegStorage
+  RegStorage As32BitReg(RegStorage reg) {
+    DCHECK(!reg.IsPair());
+    if ((kFailOnSizeError || kReportSizeError) && !reg.Is64Bit()) {
+      if (kFailOnSizeError) {
+        LOG(FATAL) << "Expected 64b register " << reg.GetReg();
+      } else {
+        LOG(WARNING) << "Expected 64b register " << reg.GetReg();
+        return reg;
+      }
+    }
+    RegStorage ret_val = RegStorage(RegStorage::k32BitSolo,
+                                    reg.GetRawBits() & RegStorage::kRegTypeMask);
+    DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k32SoloStorageMask)
+                             ->GetReg().GetReg(),
+              ret_val.GetReg());
+    return ret_val;
+  }
+
+  RegStorage As64BitReg(RegStorage reg) {
+    DCHECK(!reg.IsPair());
+    if ((kFailOnSizeError || kReportSizeError) && !reg.Is32Bit()) {
+      if (kFailOnSizeError) {
+        LOG(FATAL) << "Expected 32b register " << reg.GetReg();
+      } else {
+        LOG(WARNING) << "Expected 32b register " << reg.GetReg();
+        return reg;
+      }
+    }
+    RegStorage ret_val = RegStorage(RegStorage::k64BitSolo,
+                                    reg.GetRawBits() & RegStorage::kRegTypeMask);
+    DCHECK_EQ(GetRegInfo(reg)->FindMatchingView(RegisterInfo::k64SoloStorageMask)
+                             ->GetReg().GetReg(),
+              ret_val.GetReg());
+    return ret_val;
+  }
+
   size_t ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
                      int32_t raw_base, int32_t displacement);
   void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg);
@@ -802,8 +854,6 @@
    */
   void AnalyzeInvokeStatic(int opcode, BasicBlock * bb, MIR *mir);
 
-  bool Gen64Bit() const  { return gen64bit_; }
-
   // Information derived from analysis of MIR
 
   // The compiler temporary for the code address of the method.
@@ -833,9 +883,6 @@
   // Epilogue increment of stack pointer.
   LIR* stack_increment_;
 
-  // 64-bit mode
-  bool gen64bit_;
-
   // The list of const vector literals.
   LIR *const_vectors_;
 
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
old mode 100644
new mode 100755
index f854adb..fc65deb
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -16,6 +16,7 @@
 
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "x86_lir.h"
 
 namespace art {
@@ -144,12 +145,12 @@
     } else {
       // It must have been register promoted if it is not a temp but is still in physical
       // register. Since we need it to be in memory to convert, we place it there now.
-      StoreBaseDisp(TargetReg(kSp), src_v_reg_offset, rl_src.reg, k64, kNotVolatile);
+      StoreBaseDisp(rs_rX86_SP, src_v_reg_offset, rl_src.reg, k64, kNotVolatile);
     }
   }
 
   // Push the source virtual register onto the x87 stack.
-  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, TargetReg(kSp).GetReg(),
+  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, rs_rX86_SP.GetReg(),
                               src_v_reg_offset + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
@@ -157,7 +158,7 @@
   // Now pop off x87 stack and store it in the destination VR's stack location.
   int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
   int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
-  LIR *fstp = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement);
+  LIR *fstp = NewLIR2NoDest(opcode, rs_rX86_SP.GetReg(), displacement);
   AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
 
   /*
@@ -178,11 +179,11 @@
      */
     rl_result = EvalLoc(rl_dest, kFPReg, true);
     if (is_double) {
-      LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
+      LoadBaseDisp(rs_rX86_SP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
 
       StoreFinalValueWide(rl_dest, rl_result);
     } else {
-      Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
+      Load32Disp(rs_rX86_SP, dest_v_reg_offset, rl_result.reg);
 
       StoreFinalValue(rl_dest, rl_result);
     }
@@ -254,7 +255,7 @@
       return;
     }
     case Instruction::LONG_TO_DOUBLE:
-      if (Gen64Bit()) {
+      if (cu_->target64) {
         rcSrc = kCoreReg;
         op = kX86Cvtsqi2sdRR;
         break;
@@ -262,7 +263,7 @@
       GenLongToFP(rl_dest, rl_src, true /* is_double */);
       return;
     case Instruction::LONG_TO_FLOAT:
-      if (Gen64Bit()) {
+      if (cu_->target64) {
         rcSrc = kCoreReg;
         op = kX86Cvtsqi2ssRR;
        break;
@@ -270,7 +271,7 @@
       GenLongToFP(rl_dest, rl_src, false /* is_double */);
       return;
     case Instruction::FLOAT_TO_LONG:
-      if (Gen64Bit()) {
+      if (cu_->target64) {
         rl_src = LoadValue(rl_src, kFPReg);
         // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
         ClobberSReg(rl_dest.s_reg_low);
@@ -295,7 +296,7 @@
       }
       return;
     case Instruction::DOUBLE_TO_LONG:
-      if (Gen64Bit()) {
+      if (cu_->target64) {
         rl_src = LoadValueWide(rl_src, kFPReg);
         // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
         ClobberSReg(rl_dest.s_reg_low);
@@ -363,7 +364,7 @@
     } else {
       // It must have been register promoted if it is not a temp but is still in physical
       // register. Since we need it to be in memory to convert, we place it there now.
-      StoreBaseDisp(TargetReg(kSp), src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32,
+      StoreBaseDisp(rs_rX86_SP, src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32,
                     kNotVolatile);
     }
   }
@@ -374,7 +375,7 @@
       FlushSpecificReg(reg_info);
       ResetDef(rl_src2.reg);
     } else {
-      StoreBaseDisp(TargetReg(kSp), src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32,
+      StoreBaseDisp(rs_rX86_SP, src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32,
                     kNotVolatile);
     }
   }
@@ -382,12 +383,12 @@
   int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
 
   // Push the source virtual registers onto the x87 stack.
-  LIR *fld_2 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+  LIR *fld_2 = NewLIR2NoDest(fld_opcode, rs_rX86_SP.GetReg(),
                              src2_v_reg_offset + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
                           true /* is_load */, is_double /* is64bit */);
 
-  LIR *fld_1 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+  LIR *fld_1 = NewLIR2NoDest(fld_opcode, rs_rX86_SP.GetReg(),
                              src1_v_reg_offset + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
                           true /* is_load */, is_double /* is64bit */);
@@ -416,7 +417,7 @@
   // Now store result in the destination VR's stack location.
   int displacement = dest_v_reg_offset + LOWORD_OFFSET;
   int opcode = is_double ? kX86Fst64M : kX86Fst32M;
-  LIR *fst = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement);
+  LIR *fst = NewLIR2NoDest(opcode, rs_rX86_SP.GetReg(), displacement);
   AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
 
   // Pop ST(1) and ST(0).
@@ -435,10 +436,10 @@
   if (rl_result.location == kLocPhysReg) {
     rl_result = EvalLoc(rl_dest, kFPReg, true);
     if (is_double) {
-      LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
+      LoadBaseDisp(rs_rX86_SP, dest_v_reg_offset, rl_result.reg, k64, kNotVolatile);
       StoreFinalValueWide(rl_dest, rl_result);
     } else {
-      Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
+      Load32Disp(rs_rX86_SP, dest_v_reg_offset, rl_result.reg);
       StoreFinalValue(rl_dest, rl_result);
     }
   }
@@ -569,7 +570,7 @@
   RegLocation rl_result;
   rl_src = LoadValueWide(rl_src, kCoreReg);
   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     OpRegCopy(rl_result.reg, rl_src.reg);
     // Flip sign bit.
     NewLIR2(kX86Rol64RI, rl_result.reg.GetReg(), 1);
@@ -626,7 +627,7 @@
     // Operate directly into memory.
     int displacement = SRegOffset(rl_dest.s_reg_low);
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    LIR *lir = NewLIR3(kX86And32MI, TargetReg(kSp).GetReg(), displacement, 0x7fffffff);
+    LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP.GetReg(), displacement, 0x7fffffff);
     AnnotateDalvikRegAccess(lir, displacement >> 2, false /*is_load */, false /* is_64bit */);
     AnnotateDalvikRegAccess(lir, displacement >> 2, true /* is_load */, false /* is_64bit*/);
     return true;
@@ -647,6 +648,15 @@
     // Result is unused, the code is dead. Inlining successful, no code generated.
     return true;
   }
+  if (cu_->target64) {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegCopyWide(rl_result.reg, rl_src.reg);
+    OpRegImm(kOpLsl, rl_result.reg, 1);
+    OpRegImm(kOpLsr, rl_result.reg, 1);
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  }
   int v_src_reg = mir_graph_->SRegToVReg(rl_src.s_reg_low);
   int v_dst_reg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
   rl_src = UpdateLocWide(rl_src);
@@ -681,7 +691,7 @@
     // Operate directly into memory.
     int displacement = SRegOffset(rl_dest.s_reg_low);
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    LIR *lir = NewLIR3(kX86And32MI, TargetReg(kSp).GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
+    LIR *lir = NewLIR3(kX86And32MI, rs_rX86_SP.GetReg(), displacement  + HIWORD_OFFSET, 0x7fffffff);
     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, true /* is_load */, true /* is_64bit*/);
     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, false /*is_load */, true /* is_64bit */);
     return true;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
old mode 100644
new mode 100755
index fd20a81..cf29e52
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -18,6 +18,7 @@
 
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "mirror/array.h"
 #include "x86_lir.h"
 
@@ -31,7 +32,7 @@
  */
 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
     RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -92,7 +93,7 @@
 }
 
 LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
-  NewLIR2(kX86Cmp32RR, src1.GetReg(), src2.GetReg());
+  NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
   X86ConditionCode cc = X86ConditionEncoding(cond);
   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
                         cc);
@@ -104,9 +105,13 @@
                                 int check_value, LIR* target) {
   if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
     // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
-    NewLIR2(kX86Test32RR, reg.GetReg(), reg.GetReg());
+    NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
   } else {
-    NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
+    if (reg.Is64Bit()) {
+      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
+    } else {
+      NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
+    }
   }
   X86ConditionCode cc = X86ConditionEncoding(cond);
   LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
@@ -240,7 +245,7 @@
     // FIXME: depending on how you use registers you could get a false != mismatch when dealing
     // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
     const bool result_reg_same_as_src =
-        (rl_src.location == kLocPhysReg && rl_src.reg.GetReg() == rl_result.reg.GetReg());
+        (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
     const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
     const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
     const bool catch_all_case = !(true_zero_case || false_zero_case);
@@ -322,7 +327,7 @@
     return;
   }
 
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     rl_src1 = LoadValueWide(rl_src1, kCoreReg);
     rl_src2 = LoadValueWide(rl_src2, kCoreReg);
 
@@ -376,7 +381,7 @@
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
 
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     if (is_equality_test && val == 0) {
       // We can simplify of comparing for ==, != to 0.
       NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
@@ -708,16 +713,20 @@
   return rl_result;
 }
 
-bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
+bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
 
+  if (is_long && cu_->instruction_set == kX86) {
+    return false;
+  }
+
   // Get the two arguments to the invoke and place them in GP registers.
   RegLocation rl_src1 = info->args[0];
-  RegLocation rl_src2 = info->args[1];
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
+  RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
+  rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
+  rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
 
-  RegLocation rl_dest = InlineTarget(info);
+  RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
 
   /*
@@ -743,43 +752,63 @@
     OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
   }
 
-  StoreValue(rl_dest, rl_result);
-  return true;
-}
-
-bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
-  RegLocation rl_src_address = info->args[0];  // long address
-  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
-  RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
-  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  // Unaligned access is allowed on x86.
-  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
-  if (size == k64) {
+  if (is_long) {
     StoreValueWide(rl_dest, rl_result);
   } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
     StoreValue(rl_dest, rl_result);
   }
   return true;
 }
 
+bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
+  return false;
+// Turned off until tests available in Art.
+//
+//  RegLocation rl_src_address = info->args[0];  // long address
+//  RegLocation rl_address;
+//  if (!cu_->target64) {
+//    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
+//    rl_address = LoadValue(rl_src_address, kCoreReg);
+//  } else {
+//    rl_address = LoadValueWide(rl_src_address, kCoreReg);
+//  }
+//  RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
+//  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+//  // Unaligned access is allowed on x86.
+//  LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
+//  if (size == k64) {
+//    StoreValueWide(rl_dest, rl_result);
+//  } else {
+//    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+//    StoreValue(rl_dest, rl_result);
+//  }
+//  return true;
+}
+
 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
-  RegLocation rl_src_address = info->args[0];  // long address
-  rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[1]
-  RegLocation rl_src_value = info->args[2];  // [size] value
-  RegLocation rl_address = LoadValue(rl_src_address, kCoreReg);
-  if (size == k64) {
-    // Unaligned access is allowed on x86.
-    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
-    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-  } else {
-    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-    // Unaligned access is allowed on x86.
-    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
-    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-  }
-  return true;
+  return false;
+// Turned off until tests available in Art.
+//
+//  RegLocation rl_src_address = info->args[0];  // long address
+//  RegLocation rl_address;
+//  if (!cu_->target64) {
+//    rl_src_address = NarrowRegLoc(rl_src_address);  // ignore high half in info->args[0]
+//    rl_address = LoadValue(rl_src_address, kCoreReg);
+//  } else {
+//    rl_address = LoadValueWide(rl_src_address, kCoreReg);
+//  }
+//  RegLocation rl_src_value = info->args[2];  // [size] value
+//  if (size == k64) {
+//    // Unaligned access is allowed on x86.
+//    RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
+//    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
+//  } else {
+//    DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+//    // Unaligned access is allowed on x86.
+//    RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
+//    StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
+//  }
+//  return true;
 }
 
 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
@@ -802,6 +831,10 @@
 
 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
+  if (cu_->instruction_set == kX86_64) {
+    return false;  // TODO: Verify working on x86-64.
+  }
+
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object - known non-null
   RegLocation rl_src_offset = info->args[2];  // long low
@@ -811,7 +844,24 @@
   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
   // If is_long, high half is in info->args[7]
 
-  if (is_long) {
+  if (is_long && cu_->target64) {
+    // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
+    FlushReg(rs_r0);
+    Clobber(rs_r0);
+    LockTemp(rs_r0);
+
+    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
+    RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
+    RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
+    LoadValueDirectWide(rl_src_expected, rs_r0);
+    NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
+
+    // After a store we need to insert barrier in case of potential load. Since the
+    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
+    GenMemBarrier(kStoreLoad);
+
+    FreeTemp(rs_r0);
+  } else if (is_long) {
     // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
     // TODO: CFI support.
     FlushAllRegs();
@@ -845,14 +895,14 @@
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
     if (!obj_in_si && !obj_in_di) {
-      LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
+      LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
       AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
     }
     if (!off_in_si && !off_in_di) {
-      LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
+      LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
       // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
       DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
       int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
@@ -969,7 +1019,7 @@
 }
 
 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     DCHECK(reg.Is64Bit());
 
     NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
@@ -1006,23 +1056,24 @@
 
       RegStorage new_index = index_;
       // Move index out of kArg1, either directly to kArg0, or to kArg2.
-      if (index_.GetReg() == m2l_->TargetReg(kArg1).GetReg()) {
-        if (array_base_.GetReg() == m2l_->TargetReg(kArg0).GetReg()) {
-          m2l_->OpRegCopy(m2l_->TargetReg(kArg2), index_);
-          new_index = m2l_->TargetReg(kArg2);
+      // TODO: clean-up to check not a number but with type
+      if (index_ == m2l_->TargetReg(kArg1, false)) {
+        if (array_base_ == m2l_->TargetRefReg(kArg0)) {
+          m2l_->OpRegCopy(m2l_->TargetReg(kArg2, false), index_);
+          new_index = m2l_->TargetReg(kArg2, false);
         } else {
-          m2l_->OpRegCopy(m2l_->TargetReg(kArg0), index_);
-          new_index = m2l_->TargetReg(kArg0);
+          m2l_->OpRegCopy(m2l_->TargetReg(kArg0, false), index_);
+          new_index = m2l_->TargetReg(kArg0, false);
         }
       }
       // Load array length to kArg1.
-      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
+      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, false), array_base_, len_offset_);
       if (cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
-                                      new_index, m2l_->TargetReg(kArg1), true);
+                                      new_index, m2l_->TargetReg(kArg1, false), true);
       } else {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                      new_index, m2l_->TargetReg(kArg1), true);
+                                      new_index, m2l_->TargetReg(kArg1, false), true);
       }
     }
 
@@ -1055,14 +1106,14 @@
       GenerateTargetLabel(kPseudoThrowTarget);
 
       // Load array length to kArg1.
-      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
-      m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
+      m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, false), array_base_, len_offset_);
+      m2l_->LoadConstant(m2l_->TargetReg(kArg0, false), index_);
       if (cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
-                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+                                      m2l_->TargetReg(kArg0, false), m2l_->TargetReg(kArg1, false), true);
       } else {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+                                      m2l_->TargetReg(kArg0, false), m2l_->TargetReg(kArg1, false), true);
       }
     }
 
@@ -1153,7 +1204,7 @@
   // All memory accesses below reference dalvik regs.
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
 
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     if (rl_src1.is_const) {
       std::swap(rl_src1, rl_src2);
     }
@@ -1382,7 +1433,7 @@
   if (rl_src.location == kLocPhysReg) {
     // Both operands are in registers.
     // But we must ensure that rl_src is in pair
-    if (Gen64Bit()) {
+    if (cu_->target64) {
       NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
     } else {
       rl_src = LoadValueWide(rl_src, kCoreReg);
@@ -1404,14 +1455,14 @@
   // RHS is in memory.
   DCHECK((rl_src.location == kLocDalvikFrame) ||
          (rl_src.location == kLocCompilerTemp));
-  int r_base = TargetReg(kSp).GetReg();
+  int r_base = rs_rX86_SP.GetReg();
   int displacement = SRegOffset(rl_src.s_reg_low);
 
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  LIR *lir = NewLIR3(x86op, Gen64Bit() ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
+  LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
-  if (!Gen64Bit()) {
+  if (!cu_->target64) {
     x86op = GetOpcode(op, rl_dest, rl_src, true);
     lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
@@ -1438,17 +1489,17 @@
 
   // Operate directly into memory.
   X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
-  int r_base = TargetReg(kSp).GetReg();
+  int r_base = rs_rX86_SP.GetReg();
   int displacement = SRegOffset(rl_dest.s_reg_low);
 
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
-                     Gen64Bit() ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
+                     cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           false /* is_load */, true /* is64bit */);
-  if (!Gen64Bit()) {
+  if (!cu_->target64) {
     x86op = GetOpcode(op, rl_dest, rl_src, true);
     lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
     AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
@@ -1502,7 +1553,7 @@
 
   // Get one of the source operands into temporary register.
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     if (IsTemp(rl_src1.reg)) {
       GenLongRegOrMemOp(rl_src1, rl_src2, op);
     } else if (is_commutative) {
@@ -1571,7 +1622,7 @@
 }
 
 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     rl_src = LoadValueWide(rl_src, kCoreReg);
     RegLocation rl_result;
     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
@@ -1585,7 +1636,7 @@
 
 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2, bool is_div) {
-  if (!Gen64Bit()) {
+  if (!cu_->target64) {
     LOG(FATAL) << "Unexpected use GenDivRemLong()";
     return;
   }
@@ -1640,7 +1691,7 @@
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
   RegLocation rl_result;
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     rl_result = EvalLocWide(rl_dest, kCoreReg, true);
     OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
   } else {
@@ -1675,7 +1726,7 @@
 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
   DCHECK_EQ(kX86_64, cu_->instruction_set);
   X86OpCode opcode = kX86Bkpt;
-  if (Gen64Bit() && r_dest.Is64BitSolo()) {
+  if (cu_->target64 && r_dest.Is64BitSolo()) {
     switch (op) {
     case kOpCmp: opcode = kX86Cmp64RT;  break;
     case kOpMov: opcode = kX86Mov64RT;  break;
@@ -1807,7 +1858,7 @@
 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                           RegLocation rl_src, int shift_amount) {
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
     switch (opcode) {
       case Instruction::SHL_LONG:
@@ -1981,7 +2032,7 @@
                                 bool is_high_op) {
   bool rhs_in_mem = rhs.location != kLocPhysReg;
   bool dest_in_mem = dest.location != kLocPhysReg;
-  bool is64Bit = Gen64Bit();
+  bool is64Bit = cu_->target64;
   DCHECK(!rhs_in_mem || !dest_in_mem);
   switch (op) {
     case Instruction::ADD_LONG:
@@ -2036,7 +2087,7 @@
 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
                                 int32_t value) {
   bool in_mem = loc.location != kLocPhysReg;
-  bool is64Bit = Gen64Bit();
+  bool is64Bit = cu_->target64;
   bool byte_imm = IS_SIMM8(value);
   DCHECK(in_mem || !loc.reg.IsFloat());
   switch (op) {
@@ -2110,7 +2161,7 @@
   DCHECK(rl_src.is_const);
   int64_t val = mir_graph_->ConstantValueWide(rl_src);
 
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     // We can do with imm only if it fits 32 bit
     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
       return false;
@@ -2120,7 +2171,7 @@
 
     if ((rl_dest.location == kLocDalvikFrame) ||
         (rl_dest.location == kLocCompilerTemp)) {
-      int r_base = TargetReg(kSp).GetReg();
+      int r_base = rs_rX86_SP.GetReg();
       int displacement = SRegOffset(rl_dest.s_reg_low);
 
       ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
@@ -2151,7 +2202,7 @@
   // Can we just do this into memory?
   if ((rl_dest.location == kLocDalvikFrame) ||
       (rl_dest.location == kLocCompilerTemp)) {
-    int r_base = TargetReg(kSp).GetReg();
+    int r_base = rs_rX86_SP.GetReg();
     int displacement = SRegOffset(rl_dest.s_reg_low);
 
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
@@ -2195,7 +2246,7 @@
   DCHECK(rl_src2.is_const);
   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
 
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     // We can do with imm only if it fits 32 bit
     if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
       return false;
@@ -2269,7 +2320,8 @@
   RegStorage result_reg = rl_result.reg;
 
   // For 32-bit, SETcc only works with EAX..EDX.
-  if (result_reg == object.reg || !IsByteRegister(result_reg)) {
+  RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
+  if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
     result_reg = AllocateByteRegister();
   }
 
@@ -2335,8 +2387,10 @@
   FlushAllRegs();
   // May generate a call - use explicit registers.
   LockCallTemps();
-  LoadCurrMethodDirect(TargetReg(kArg1));  // kArg1 gets current Method*.
-  RegStorage class_reg = TargetReg(kArg2);  // kArg2 will hold the Class*.
+  RegStorage method_reg = TargetRefReg(kArg1);  // kArg1 gets current Method*.
+  LoadCurrMethodDirect(method_reg);
+  RegStorage class_reg = TargetRefReg(kArg2);  // kArg2 will hold the Class*.
+  RegStorage ref_reg = TargetRefReg(kArg0);  // kArg2 will hold the ref.
   // Reference must end up in kArg0.
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
@@ -2348,16 +2402,16 @@
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     }
-    OpRegCopy(class_reg, TargetReg(kRet0));
-    LoadValueDirectFixed(rl_src, TargetReg(kArg0));
+    OpRegCopy(class_reg, TargetRefReg(kRet0));
+    LoadValueDirectFixed(rl_src, ref_reg);
   } else if (use_declaring_class) {
-    LoadValueDirectFixed(rl_src, TargetReg(kArg0));
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+    LoadValueDirectFixed(rl_src, ref_reg);
+    LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
                 class_reg, kNotVolatile);
   } else {
     // Load dex cache entry into class_reg (kArg2).
-    LoadValueDirectFixed(rl_src, TargetReg(kArg0));
-    LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+    LoadValueDirectFixed(rl_src, ref_reg);
+    LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                 class_reg, kNotVolatile);
     int32_t offset_of_type =
         mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
@@ -2372,8 +2426,8 @@
       } else {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
       }
-      OpRegCopy(TargetReg(kArg2), TargetReg(kRet0));  // Align usage with fast path.
-      LoadValueDirectFixed(rl_src, TargetReg(kArg0));  /* Reload Ref. */
+      OpRegCopy(class_reg, TargetRefReg(kRet0));  // Align usage with fast path.
+      LoadValueDirectFixed(rl_src, ref_reg);  /* Reload Ref. */
       // Rejoin code paths
       LIR* hop_target = NewLIR0(kPseudoTargetLabel);
       hop_branch->target = hop_target;
@@ -2383,34 +2437,35 @@
   RegLocation rl_result = GetReturn(kRefReg);
 
   // On x86-64 kArg0 is not EAX, so we have to copy ref from kArg0 to EAX.
-  if (Gen64Bit()) {
-    OpRegCopy(rl_result.reg, TargetReg(kArg0));
+  if (cu_->target64) {
+    OpRegCopy(rl_result.reg, ref_reg);
   }
 
   // For 32-bit, SETcc only works with EAX..EDX.
   DCHECK_LT(rl_result.reg.GetRegNum(), 4);
 
   // Is the class NULL?
-  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
+  LIR* branch1 = OpCmpImmBranch(kCondEq, ref_reg, 0, NULL);
 
+  RegStorage ref_class_reg = TargetRefReg(kArg1);  // kArg2 will hold the Class*.
   /* Load object->klass_. */
   DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadRefDisp(TargetReg(kArg0),  mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1),
+  LoadRefDisp(ref_reg,  mirror::Object::ClassOffset().Int32Value(), ref_class_reg,
               kNotVolatile);
   /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */
   LIR* branchover = nullptr;
   if (type_known_final) {
     // Ensure top 3 bytes of result are 0.
     LoadConstant(rl_result.reg, 0);
-    OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2));
+    OpRegReg(kOpCmp, ref_class_reg, class_reg);
     // Set the low byte of the result to 0 or 1 from the compare condition code.
     NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondEq);
   } else {
     if (!type_known_abstract) {
       LoadConstant(rl_result.reg, 1);     // Assume result succeeds.
-      branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
+      branchover = OpCmpBranch(kCondEq, ref_class_reg, class_reg, NULL);
     }
-    OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
+    OpRegCopy(TargetRefReg(kArg0), class_reg);
     if (cu_->target64) {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial));
     } else {
@@ -2550,7 +2605,7 @@
   } else {
     if (shift_op) {
       // X86 doesn't require masking and must use ECX.
-      RegStorage t_reg = TargetReg(kCount);  // rCX
+      RegStorage t_reg = TargetReg(kCount, false);  // rCX
       LoadValueDirectFixed(rl_rhs, t_reg);
       if (is_two_addr) {
         // Can we do this directly into memory?
@@ -2684,7 +2739,7 @@
 }
 
 void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
-  if (!Gen64Bit()) {
+  if (!cu_->target64) {
     Mir2Lir::GenIntToLong(rl_dest, rl_src);
     return;
   }
@@ -2705,7 +2760,7 @@
 
 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                         RegLocation rl_src1, RegLocation rl_shift) {
-  if (!Gen64Bit()) {
+  if (!cu_->target64) {
     Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
     return;
   }
@@ -2738,7 +2793,7 @@
   }
 
   // X86 doesn't require masking and must use ECX.
-  RegStorage t_reg = TargetReg(kCount);  // rCX
+  RegStorage t_reg = TargetReg(kCount, false);  // rCX
   LoadValueDirectFixed(rl_shift, t_reg);
   if (is_two_addr) {
     // Can we do this directly into memory?
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
old mode 100644
new mode 100755
index 408a40a..72e47d0
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -20,6 +20,7 @@
 #include "codegen_x86.h"
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
 #include "mirror/array.h"
 #include "mirror/string.h"
 #include "x86_lir.h"
@@ -153,12 +154,11 @@
 }
 
 RegLocation X86Mir2Lir::LocCReturnRef() {
-  // FIXME: return x86_loc_c_return_wide for x86_64 when wide refs supported.
-  return x86_loc_c_return;
+  return cu_->target64 ? x86_64_loc_c_return_ref : x86_loc_c_return_ref;
 }
 
 RegLocation X86Mir2Lir::LocCReturnWide() {
-  return Gen64Bit() ? x86_64_loc_c_return_wide : x86_loc_c_return_wide;
+  return cu_->target64 ? x86_64_loc_c_return_wide : x86_loc_c_return_wide;
 }
 
 RegLocation X86Mir2Lir::LocCReturnFloat() {
@@ -169,8 +169,8 @@
   return x86_loc_c_return_double;
 }
 
-// Return a target-dependent special register.
-RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
+// Return a target-dependent special register for 32-bit.
+RegStorage X86Mir2Lir::TargetReg32(SpecialTargetRegister reg) {
   RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
     case kSelf: res_reg = RegStorage::InvalidReg(); break;
@@ -196,13 +196,18 @@
     case kRet1: res_reg = rs_rX86_RET1; break;
     case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
     case kHiddenArg: res_reg = rs_rAX; break;
-    case kHiddenFpArg: DCHECK(!Gen64Bit()); res_reg = rs_fr0; break;
+    case kHiddenFpArg: DCHECK(!cu_->target64); res_reg = rs_fr0; break;
     case kCount: res_reg = rs_rX86_COUNT; break;
     default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
 
+RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
+  LOG(FATAL) << "Do not use this function!!!";
+  return RegStorage::InvalidReg();
+}
+
 /*
  * Decode the register id.
  */
@@ -330,6 +335,11 @@
           case 'd':
             buf += StringPrintf("%d", operand);
             break;
+          case 'q': {
+             int64_t value = static_cast<int64_t>(static_cast<int64_t>(operand) << 32 |
+                             static_cast<uint32_t>(lir->operands[operand_number+1]));
+             buf +=StringPrintf("%" PRId64, value);
+          }
           case 'p': {
             EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand));
             buf += StringPrintf("0x%08x", tab_rec->offset);
@@ -409,30 +419,16 @@
   num_core_spills_++;
 }
 
-/*
- * Mark a callee-save fp register as promoted.  Note that
- * vpush/vpop uses contiguous register lists so we must
- * include any holes in the mask.  Associate holes with
- * Dalvik register INVALID_VREG (0xFFFFU).
- */
-void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
-  UNIMPLEMENTED(FATAL) << "MarkPreservedSingle";
-}
-
-void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
-  UNIMPLEMENTED(FATAL) << "MarkPreservedDouble";
-}
-
 RegStorage X86Mir2Lir::AllocateByteRegister() {
   RegStorage reg = AllocTypedTemp(false, kCoreReg);
-  if (!Gen64Bit()) {
+  if (!cu_->target64) {
     DCHECK_LT(reg.GetRegNum(), rs_rX86_SP.GetRegNum());
   }
   return reg;
 }
 
 bool X86Mir2Lir::IsByteRegister(RegStorage reg) {
-  return Gen64Bit() || reg.GetRegNum() < rs_rX86_SP.GetRegNum();
+  return cu_->target64 || reg.GetRegNum() < rs_rX86_SP.GetRegNum();
 }
 
 /* Clobber all regs that might be used by an external C call */
@@ -451,7 +447,7 @@
   Clobber(rs_fr6);
   Clobber(rs_fr7);
 
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     Clobber(rs_r8);
     Clobber(rs_r9);
     Clobber(rs_r10);
@@ -494,7 +490,7 @@
   LockTemp(rs_rX86_ARG1);
   LockTemp(rs_rX86_ARG2);
   LockTemp(rs_rX86_ARG3);
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     LockTemp(rs_rX86_ARG4);
     LockTemp(rs_rX86_ARG5);
     LockTemp(rs_rX86_FARG0);
@@ -514,7 +510,7 @@
   FreeTemp(rs_rX86_ARG1);
   FreeTemp(rs_rX86_ARG2);
   FreeTemp(rs_rX86_ARG3);
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     FreeTemp(rs_rX86_ARG4);
     FreeTemp(rs_rX86_ARG5);
     FreeTemp(rs_rX86_FARG0);
@@ -586,7 +582,7 @@
 }
 
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64q, sp_regs_64,
                                           dp_regs_64, reserved_regs_64, reserved_regs_64q,
                                           core_temps_64, core_temps_64q, sp_temps_64, dp_temps_64);
@@ -599,7 +595,7 @@
   // Target-specific adjustments.
 
   // Add in XMM registers.
-  const ArrayRef<const RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32;
+  const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32;
   for (RegStorage reg : *xp_temps) {
     RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg));
     reginfo_map_.Put(reg.GetReg(), info);
@@ -627,7 +623,7 @@
     DCHECK_EQ(info->StorageMask(), 0x1U);
   }
 
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     // Alias 32bit W registers to corresponding 64bit X registers.
     GrowableArray<RegisterInfo*>::Iterator w_it(&reg_pool_->core_regs_);
     for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) {
@@ -690,7 +686,7 @@
 
 RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
   // X86_64 can handle any size.
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     if (size == kReference) {
       return kRefReg;
     }
@@ -707,13 +703,13 @@
   return RegClassBySize(size);
 }
 
-X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit)
+X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
       base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
       method_address_insns_(arena, 100, kGrowableArrayMisc),
       class_type_address_insns_(arena, 100, kGrowableArrayMisc),
       call_method_insns_(arena, 100, kGrowableArrayMisc),
-      stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit),
+      stack_decrement_(nullptr), stack_increment_(nullptr),
       const_vectors_(nullptr) {
   store_method_addr_used_ = false;
   if (kIsDebugBuild) {
@@ -725,7 +721,7 @@
       }
     }
   }
-  if (Gen64Bit()) {
+  if (cu_->target64) {
     rs_rX86_SP = rs_rX86_SP_64;
 
     rs_rX86_ARG0 = rs_rDI;
@@ -798,12 +794,7 @@
 
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena) {
-  return new X86Mir2Lir(cu, mir_graph, arena, false);
-}
-
-Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
-                          ArenaAllocator* const arena) {
-  return new X86Mir2Lir(cu, mir_graph, arena, true);
+  return new X86Mir2Lir(cu, mir_graph, arena);
 }
 
 // Not used in x86
@@ -845,7 +836,7 @@
       (rl_dest.location == kLocCompilerTemp)) {
     int32_t val_lo = Low32Bits(value);
     int32_t val_hi = High32Bits(value);
-    int r_base = TargetReg(kSp).GetReg();
+    int r_base = rs_rX86_SP.GetReg();
     int displacement = SRegOffset(rl_dest.s_reg_low);
 
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
@@ -900,7 +891,7 @@
   uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id);
 
   // Generate the move instruction with the unique pointer and save index, dex_file, and type.
-  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
+  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg, false).GetReg(),
                      static_cast<int>(target_method_id_ptr), target_method_idx,
                      WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
   AppendLIR(move);
@@ -917,7 +908,7 @@
   uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
 
   // Generate the move instruction with the unique pointer and save index and type.
-  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(),
+  LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg, false).GetReg(),
                      static_cast<int>(ptr), type_idx);
   AppendLIR(move);
   class_type_address_insns_.Insert(move);
@@ -1044,11 +1035,12 @@
   // ECX: count: number of words to be searched.
   // EDI: String being searched.
   // EDX: temporary during execution.
-  // EBX: temporary during execution.
+  // EBX or R11: temporary during execution (depending on mode).
 
   RegLocation rl_obj = info->args[0];
   RegLocation rl_char = info->args[1];
   RegLocation rl_start;  // Note: only present in III flavor or IndexOf.
+  RegStorage tmpReg = cu_->target64 ? rs_r11 : rs_rBX;
 
   uint32_t char_value =
     rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0;
@@ -1121,9 +1113,9 @@
       rl_start = UpdateLocTyped(rl_start, kCoreReg);
       if (rl_start.location == kLocPhysReg) {
         // Handle "start index < 0" case.
-        OpRegReg(kOpXor, rs_rBX, rs_rBX);
-        OpRegReg(kOpCmp, rl_start.reg, rs_rBX);
-        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX);
+        OpRegReg(kOpXor, tmpReg, tmpReg);
+        OpRegReg(kOpCmp, rl_start.reg, tmpReg);
+        OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, tmpReg);
 
         // The length of the string should be greater than the start index.
         length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr);
@@ -1135,19 +1127,19 @@
         }
       } else {
         // Load the start index from stack, remembering that we pushed EDI.
-        int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t);
+        int displacement = SRegOffset(rl_start.s_reg_low) + (cu_->target64 ? 2 : 1) * sizeof(uint32_t);
         {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          Load32Disp(rs_rX86_SP, displacement, rs_rBX);
+          Load32Disp(rs_rX86_SP, displacement, tmpReg);
         }
         OpRegReg(kOpXor, rs_rDI, rs_rDI);
-        OpRegReg(kOpCmp, rs_rBX, rs_rDI);
-        OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI);
+        OpRegReg(kOpCmp, tmpReg, rs_rDI);
+        OpCondRegReg(kOpCmov, kCondLt, tmpReg, rs_rDI);
 
-        length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr);
-        OpRegReg(kOpSub, rs_rCX, rs_rBX);
+        length_compare = OpCmpBranch(kCondLe, rs_rCX, tmpReg, nullptr);
+        OpRegReg(kOpSub, rs_rCX, tmpReg);
         // Put the start index to stack.
-        NewLIR1(kX86Push32R, rs_rBX.GetReg());
+        NewLIR1(kX86Push32R, tmpReg.GetReg());
         is_index_on_stack = true;
       }
     }
@@ -1156,26 +1148,26 @@
 
   // ECX now contains the count in words to be searched.
 
-  // Load the address of the string into EBX.
+  // Load the address of the string into R11 or EBX (depending on mode).
   // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET.
   Load32Disp(rs_rDX, value_offset, rs_rDI);
-  Load32Disp(rs_rDX, offset_offset, rs_rBX);
-  OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset);
+  Load32Disp(rs_rDX, offset_offset, tmpReg);
+  OpLea(tmpReg, rs_rDI, tmpReg, 1, data_offset);
 
   // Now compute into EDI where the search will start.
   if (zero_based || rl_start.is_const) {
     if (start_value == 0) {
-      OpRegCopy(rs_rDI, rs_rBX);
+      OpRegCopy(rs_rDI, tmpReg);
     } else {
-      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value);
+      NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), tmpReg.GetReg(), 2 * start_value);
     }
   } else {
     if (is_index_on_stack == true) {
       // Load the start index from stack.
       NewLIR1(kX86Pop32R, rs_rDX.GetReg());
-      OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0);
+      OpLea(rs_rDI, tmpReg, rs_rDX, 1, 0);
     } else {
-      OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0);
+      OpLea(rs_rDI, tmpReg, rl_start.reg, 1, 0);
     }
   }
 
@@ -1188,7 +1180,7 @@
 
   // yes, we matched.  Compute the index of the result.
   // index = ((curr_ptr - orig_ptr) / 2) - 1.
-  OpRegReg(kOpSub, rs_rDI, rs_rBX);
+  OpRegReg(kOpSub, rs_rDI, tmpReg);
   OpRegImm(kOpAsr, rs_rDI, 1);
   NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1);
   LIR *all_done = NewLIR1(kX86Jmp8, 0);
@@ -1759,29 +1751,22 @@
 
 // ------------ ABI support: mapping of args to physical registers -------------
 RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) {
-  const RegStorage coreArgMappingToPhysicalReg[] = {rs_rX86_ARG1, rs_rX86_ARG2, rs_rX86_ARG3, rs_rX86_ARG4, rs_rX86_ARG5};
-  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
-  const RegStorage fpArgMappingToPhysicalReg[] = {rs_rX86_FARG0, rs_rX86_FARG1, rs_rX86_FARG2, rs_rX86_FARG3,
-                                                  rs_rX86_FARG4, rs_rX86_FARG5, rs_rX86_FARG6, rs_rX86_FARG7};
-  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3, kArg4, kArg5};
+  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(SpecialTargetRegister);
+  const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3,
+                                                  kFArg4, kFArg5, kFArg6, kFArg7};
+  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(SpecialTargetRegister);
 
-  RegStorage result = RegStorage::InvalidReg();
   if (is_double_or_float) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
-      if (result.Valid()) {
-        result = is_wide ? RegStorage::FloatSolo64(result.GetReg()) : RegStorage::FloatSolo32(result.GetReg());
-      }
+      return ml_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], is_wide);
     }
   } else {
     if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      result = coreArgMappingToPhysicalReg[cur_core_reg_++];
-      if (result.Valid()) {
-        result = is_wide ? RegStorage::Solo64(result.GetReg()) : RegStorage::Solo32(result.GetReg());
-      }
+      return ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], is_wide);
     }
   }
-  return result;
+  return RegStorage::InvalidReg();
 }
 
 RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
@@ -1811,7 +1796,7 @@
 }
 
 RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!Gen64Bit()) {
+  if (!cu_->target64) {
     return GetCoreArgMappingToPhysicalReg(arg_num);
   }
 
@@ -1819,7 +1804,7 @@
     int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
     RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
 
-    InToRegStorageX86_64Mapper mapper;
+    InToRegStorageX86_64Mapper mapper(this);
     in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper);
   }
   return in_to_reg_storage_mapping_.Get(arg_num);
@@ -1851,7 +1836,7 @@
  * with one location record per word of argument.
  */
 void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
-  if (!Gen64Bit()) return Mir2Lir::FlushIns(ArgLocs, rl_method);
+  if (!cu_->target64) return Mir2Lir::FlushIns(ArgLocs, rl_method);
   /*
    * Dummy up a RegLocation for the incoming Method*
    * It will attempt to keep kArg0 live (or copy it to home location
@@ -1860,13 +1845,13 @@
 
   RegLocation rl_src = rl_method;
   rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0);
+  rl_src.reg = TargetRefReg(kArg0);
   rl_src.home = false;
   MarkLive(rl_src);
   StoreValue(rl_method, rl_src);
   // If Method* has been promoted, explicitly flush
   if (rl_method.location == kLocPhysReg) {
-    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0), kNotVolatile);
+    StoreRefDisp(rs_rX86_SP, 0, As32BitReg(TargetRefReg(kArg0)), kNotVolatile);
   }
 
   if (cu_->num_ins == 0) {
@@ -1888,53 +1873,41 @@
    */
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   for (int i = 0; i < cu_->num_ins; i++) {
-    PromotionMap* v_map = &promotion_map_[start_vreg + i];
-    RegStorage reg = RegStorage::InvalidReg();
     // get reg corresponding to input
-    reg = GetArgMappingToPhysicalReg(i);
+    RegStorage reg = GetArgMappingToPhysicalReg(i);
 
+    RegLocation* t_loc = &ArgLocs[i];
     if (reg.Valid()) {
-      // If arriving in register
-      bool need_flush = true;
-      RegLocation* t_loc = &ArgLocs[i];
-      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-        need_flush = false;
-      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
-        need_flush = false;
-      } else {
-        need_flush = true;
-      }
+      // If arriving in register.
 
-      // For wide args, force flush if not fully promoted
-      if (t_loc->wide) {
-        PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
-        // Is only half promoted?
-        need_flush |= (p_map->core_location != v_map->core_location) ||
-            (p_map->fp_location != v_map->fp_location);
-      }
-      if (need_flush) {
-        if (t_loc->wide && t_loc->fp) {
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64, kNotVolatile);
-          // Increment i to skip the next one
-          i++;
-        } else if (t_loc->wide && !t_loc->fp) {
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64, kNotVolatile);
-          // Increment i to skip the next one
-          i++;
+      // We have already updated the arg location with promoted info
+      // so we can be based on it.
+      if (t_loc->location == kLocPhysReg) {
+        // Just copy it.
+        OpRegCopy(t_loc->reg, reg);
+      } else {
+        // Needs flush.
+        if (t_loc->ref) {
+          StoreRefDisp(rs_rX86_SP, SRegOffset(start_vreg + i), reg, kNotVolatile);
         } else {
-          Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg);
+          StoreBaseDisp(rs_rX86_SP, SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
+                        kNotVolatile);
         }
       }
     } else {
-      // If arriving in frame & promoted
-      if (v_map->core_location == kLocPhysReg) {
-        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
+      // If arriving in frame & promoted.
+      if (t_loc->location == kLocPhysReg) {
+        if (t_loc->ref) {
+          LoadRefDisp(rs_rX86_SP, SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
+        } else {
+          LoadBaseDisp(rs_rX86_SP, SRegOffset(start_vreg + i), t_loc->reg,
+                       t_loc->wide ? k64 : k32, kNotVolatile);
+        }
       }
-      if (v_map->fp_location == kLocPhysReg) {
-        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
-      }
+    }
+    if (t_loc->wide) {
+      // Increment i to skip the next one.
+      i++;
     }
   }
 }
@@ -1951,7 +1924,7 @@
                                   const MethodReference& target_method,
                                   uint32_t vtable_idx, uintptr_t direct_code,
                                   uintptr_t direct_method, InvokeType type, bool skip_this) {
-  if (!Gen64Bit()) {
+  if (!cu_->target64) {
     return Mir2Lir::GenDalvikArgsNoRange(info,
                                   call_state, pcrLabel, next_call_insn,
                                   target_method,
@@ -1985,7 +1958,7 @@
                                 const MethodReference& target_method,
                                 uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
                                 InvokeType type, bool skip_this) {
-  if (!Gen64Bit()) {
+  if (!cu_->target64) {
     return Mir2Lir::GenDalvikArgsRange(info, call_state,
                                 pcrLabel, next_call_insn,
                                 target_method,
@@ -1999,7 +1972,7 @@
 
   const int start_index = skip_this ? 1 : 0;
 
-  InToRegStorageX86_64Mapper mapper;
+  InToRegStorageX86_64Mapper mapper(this);
   InToRegStorageMapping in_to_reg_storage_mapping;
   in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
   const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
@@ -2018,14 +1991,14 @@
         loc = UpdateLocWide(loc);
         if (loc.location == kLocPhysReg) {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
+          StoreBaseDisp(rs_rX86_SP, SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
         }
         next_arg += 2;
       } else {
         loc = UpdateLoc(loc);
         if (loc.location == kLocPhysReg) {
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
+          StoreBaseDisp(rs_rX86_SP, SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
         }
         next_arg++;
       }
@@ -2082,23 +2055,23 @@
 
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
         if (src_is_16b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+          ld1 = OpMovRegMem(temp, rs_rX86_SP, current_src_offset, kMovA128FP);
         } else if (src_is_8b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
-          ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1),
+          ld1 = OpMovRegMem(temp, rs_rX86_SP, current_src_offset, kMovLo128FP);
+          ld2 = OpMovRegMem(temp, rs_rX86_SP, current_src_offset + (bytes_to_move >> 1),
                             kMovHi128FP);
         } else {
-          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+          ld1 = OpMovRegMem(temp, rs_rX86_SP, current_src_offset, kMovU128FP);
         }
 
         if (dest_is_16b_aligned) {
-          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+          st1 = OpMovMemReg(rs_rX86_SP, current_dest_offset, temp, kMovA128FP);
         } else if (dest_is_8b_aligned) {
-          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
-          st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+          st1 = OpMovMemReg(rs_rX86_SP, current_dest_offset, temp, kMovLo128FP);
+          st2 = OpMovMemReg(rs_rX86_SP, current_dest_offset + (bytes_to_move >> 1),
                             temp, kMovHi128FP);
         } else {
-          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+          st1 = OpMovMemReg(rs_rX86_SP, current_dest_offset, temp, kMovU128FP);
         }
 
         // TODO If we could keep track of aliasing information for memory accesses that are wider
@@ -2132,11 +2105,11 @@
 
         // Instead of allocating a new temp, simply reuse one of the registers being used
         // for argument passing.
-        RegStorage temp = TargetReg(kArg3);
+        RegStorage temp = TargetReg(kArg3, false);
 
         // Now load the argument VR and store to the outs.
-        Load32Disp(TargetReg(kSp), current_src_offset, temp);
-        Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+        Load32Disp(rs_rX86_SP, current_src_offset, temp);
+        Store32Disp(rs_rX86_SP, current_dest_offset, temp);
       }
 
       current_src_offset += bytes_to_move;
@@ -2148,8 +2121,8 @@
 
   // Now handle rest not registers if they are
   if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regSingle = TargetReg(kArg2);
-    RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg());
+    RegStorage regSingle = TargetReg(kArg2, false);
+    RegStorage regWide = TargetReg(kArg3, true);
     for (int i = start_index;
          i < last_mapped_in + size_of_the_last_mapped + regs_left_to_pass_via_stack; i++) {
       RegLocation rl_arg = info->args[i];
@@ -2162,17 +2135,17 @@
           ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
           if (rl_arg.wide) {
             if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
+              StoreBaseDisp(rs_rX86_SP, out_offset, rl_arg.reg, k64, kNotVolatile);
             } else {
               LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64, kNotVolatile);
+              StoreBaseDisp(rs_rX86_SP, out_offset, regWide, k64, kNotVolatile);
             }
           } else {
             if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
+              StoreBaseDisp(rs_rX86_SP, out_offset, rl_arg.reg, k32, kNotVolatile);
             } else {
               LoadValueDirectFixed(rl_arg, regSingle);
-              StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32, kNotVolatile);
+              StoreBaseDisp(rs_rX86_SP, out_offset, regSingle, k32, kNotVolatile);
             }
           }
         }
@@ -2208,13 +2181,13 @@
                            direct_code, direct_method, type);
   if (pcrLabel) {
     if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+      *pcrLabel = GenExplicitNullCheck(TargetRefReg(kArg1), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
       // In lieu of generating a check for kArg1 being null, we need to
       // perform a load when doing implicit checks.
       RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1), 0, tmp);
+      Load32Disp(TargetRefReg(kArg1), 0, tmp);
       MarkPossibleNullPointerException(info->opt_flags);
       FreeTemp(tmp);
     }
@@ -2223,4 +2196,3 @@
 }
 
 }  // namespace art
-
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 0352808..657160f 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -20,6 +20,7 @@
 #include "x86_lir.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/reg_storage_eq.h"
 
 namespace art {
 
@@ -121,7 +122,7 @@
   switch (op) {
     case kOpNeg: opcode = r_dest_src.Is64Bit() ? kX86Neg64R : kX86Neg32R; break;
     case kOpNot: opcode = r_dest_src.Is64Bit() ? kX86Not64R : kX86Not32R; break;
-    case kOpRev: opcode = kX86Bswap32R; break;
+    case kOpRev: opcode = r_dest_src.Is64Bit() ? kX86Bswap64R : kX86Bswap32R; break;
     case kOpBlx: opcode = kX86CallR; break;
     default:
       LOG(FATAL) << "Bad case in OpReg " << op;
@@ -355,7 +356,9 @@
 LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
   // The only conditional reg to reg operation supported is Cmov
   DCHECK_EQ(op, kOpCmov);
-  return NewLIR3(kX86Cmov32RRC, r_dest.GetReg(), r_src.GetReg(), X86ConditionEncoding(cc));
+  DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
+  return NewLIR3(r_dest.Is64Bit() ? kX86Cmov64RRC : kX86Cmov32RRC, r_dest.GetReg(),
+                 r_src.GetReg(), X86ConditionEncoding(cc));
 }
 
 LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
@@ -492,10 +495,10 @@
 }
 
 LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int value) {
-  if (op == kOpMul && !Gen64Bit()) {
+  if (op == kOpMul && !cu_->target64) {
     X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
     return NewLIR3(opcode, r_dest.GetReg(), r_src.GetReg(), value);
-  } else if (op == kOpAnd && !Gen64Bit()) {
+  } else if (op == kOpAnd && !cu_->target64) {
     if (value == 0xFF && r_src.Low4()) {
       return NewLIR2(kX86Movzx8RR, r_dest.GetReg(), r_src.GetReg());
     } else if (value == 0xFFFF) {
@@ -607,18 +610,12 @@
         res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
         LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
       } else {
-        // TODO(64) make int64_t value parameter of LoadConstantNoClobber
-        if (val_lo < 0) {
-          val_hi += 1;
-        }
-        if (val_hi != 0) {
-          res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi);
-          NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32);
-        } else {
+        if (value == 0) {
           res = NewLIR2(kX86Xor64RR, r_dest.GetReg(), r_dest.GetReg());
-        }
-        if (val_lo != 0) {
-          NewLIR2(kX86Add64RI, r_dest.GetReg(), val_lo);
+        } else if (value >= INT_MIN && value <= INT_MAX) {
+          res = NewLIR2(kX86Mov64RI32, r_dest.GetReg(), val_lo);
+        } else {
+          res = NewLIR3(kX86Mov64RI64, r_dest.GetReg(), val_hi, val_lo);
         }
       }
     }
@@ -647,7 +644,7 @@
       DCHECK_EQ((displacement & 0x3), 0);
       break;
     case kWord:
-      if (Gen64Bit()) {
+      if (cu_->target64) {
         opcode = is_array ? kX86Mov64RA  : kX86Mov64RM;
         CHECK_EQ(is_array, false);
         CHECK_EQ(r_dest.IsFloat(), false);
@@ -796,7 +793,7 @@
       DCHECK_EQ((displacement & 0x3), 0);
       break;
     case kWord:
-      if (Gen64Bit()) {
+      if (cu_->target64) {
         opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
         CHECK_EQ(is_array, false);
         CHECK_EQ(r_src.IsFloat(), false);
@@ -906,7 +903,7 @@
 
   // Did we need a pointer to the method code?
   if (store_method_addr_) {
-    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, Gen64Bit() == true);
+    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, cu_->target64 == true);
   } else {
     base_of_code_ = nullptr;
   }
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index b719a12..e271e9d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -353,6 +353,12 @@
 const RegLocation x86_loc_c_return_wide
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG};
+const RegLocation x86_loc_c_return_ref
+    {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1,
+     RegStorage(RegStorage::k32BitSolo, rAX), INVALID_SREG, INVALID_SREG};
+const RegLocation x86_64_loc_c_return_ref
+    {kLocPhysReg, 0, 0, 0, 0, 0, 1, 0, 1,
+     RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG};
 const RegLocation x86_64_loc_c_return_wide
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG};
@@ -439,7 +445,7 @@
   kX86Lea32RA,
   kX86Mov64MR, kX86Mov64AR, kX86Mov64TR,
   kX86Mov64RR, kX86Mov64RM, kX86Mov64RA, kX86Mov64RT,
-  kX86Mov64RI, kX86Mov64MI, kX86Mov64AI, kX86Mov64TI,
+  kX86Mov64RI32, kX86Mov64RI64, kX86Mov64MI, kX86Mov64AI, kX86Mov64TI,
   kX86Lea64RM,
   kX86Lea64RA,
   // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
@@ -500,6 +506,7 @@
   kx86Cdq32Da,
   kx86Cqo64Da,
   kX86Bswap32R,
+  kX86Bswap64R,
   kX86Push32R, kX86Pop32R,
 #undef UnaryOpcode
 #define Binary0fOpCode(opcode) \
@@ -606,7 +613,7 @@
   Binary0fOpCode(kX86Imul32),   // 32bit multiply
   Binary0fOpCode(kX86Imul64),   // 64bit multiply
   kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,  // compare and exchange
-  kX86LockCmpxchgMR, kX86LockCmpxchgAR,  // locked compare and exchange
+  kX86LockCmpxchgMR, kX86LockCmpxchgAR, kX86LockCmpxchg64AR,  // locked compare and exchange
   kX86LockCmpxchg64M, kX86LockCmpxchg64A,  // locked compare and exchange
   kX86XchgMR,  // exchange memory with register (automatically locked)
   Binary0fOpCode(kX86Movzx8),   // zero-extend 8-bit value
@@ -653,6 +660,7 @@
   kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
   kRegRegImm, kRegMemImm, kRegArrayImm,     // RRI, RMI and RAI instruction kinds.
   kMovRegImm,                               // Shorter form move RI.
+  kMovRegQuadImm,                           // 64 bit move RI
   kRegRegImmStore,                          // RRI following the store modrm reg-reg encoding rather than the load.
   kMemRegImm,                               // MRI instruction kinds.
   kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
diff --git a/compiler/dex/reg_location.h b/compiler/dex/reg_location.h
new file mode 100644
index 0000000..38f59da
--- /dev/null
+++ b/compiler/dex/reg_location.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_REG_LOCATION_H_
+#define ART_COMPILER_DEX_REG_LOCATION_H_
+
+#include "reg_storage.h"
+
+namespace art {
+
+
+/*
+ * Whereas a SSA name describes a definition of a Dalvik vreg, the RegLocation describes
+ * the type of an SSA name (and, can also be used by code generators to record where the
+ * value is located (i.e. - physical register, frame, spill, etc.).  For each SSA name (SReg)
+ * there is a RegLocation.
+ * A note on SSA names:
+ *   o SSA names for Dalvik vRegs v0..vN will be assigned 0..N.  These represent the "vN_0"
+ *     names.  Negative SSA names represent special values not present in the Dalvik byte code.
+ *     For example, SSA name -1 represents an invalid SSA name, and SSA name -2 represents the
+ *     the Method pointer.  SSA names < -2 are reserved for future use.
+ *   o The vN_0 names for non-argument Dalvik should in practice never be used (as they would
+ *     represent the read of an undefined local variable).  The first definition of the
+ *     underlying Dalvik vReg will result in a vN_1 name.
+ *
+ * FIXME: The orig_sreg field was added as a workaround for llvm bitcode generation.  With
+ * the latest restructuring, we should be able to remove it and rely on s_reg_low throughout.
+ */
+struct RegLocation {
+  RegLocationType location:3;
+  unsigned wide:1;
+  unsigned defined:1;   // Do we know the type?
+  unsigned is_const:1;  // Constant, value in mir_graph->constant_values[].
+  unsigned fp:1;        // Floating point?
+  unsigned core:1;      // Non-floating point?
+  unsigned ref:1;       // Something GC cares about.
+  unsigned high_word:1;  // High word of pair?
+  unsigned home:1;      // Does this represent the home location?
+  RegStorage reg;       // Encoded physical registers.
+  int16_t s_reg_low;    // SSA name for low Dalvik word.
+  int16_t orig_sreg;    // TODO: remove after Bitcode gen complete
+                        // and consolidate usage w/ s_reg_low.
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_REG_LOCATION_H_
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index 3b891f2..8ed3adc 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -122,11 +122,18 @@
   constexpr explicit RegStorage(uint16_t val) : reg_(val) {}
   RegStorage() : reg_(kInvalid) {}
 
-  bool operator==(const RegStorage rhs) const {
+  // We do not provide a general operator overload for equality of reg storage, as this is
+  // dangerous in the case of architectures with multiple views, and the naming ExactEquals
+  // expresses the exact match expressed here. It is more likely that a comparison between the views
+  // is intended in most cases. Such code can be found in, for example, Mir2Lir::IsSameReg.
+  //
+  // If you know what you are doing, include reg_storage_eq.h, which defines == and != for brevity.
+
+  bool ExactlyEquals(const RegStorage& rhs) const {
     return (reg_ == rhs.GetRawBits());
   }
 
-  bool operator!=(const RegStorage rhs) const {
+  bool NotExactlyEquals(const RegStorage& rhs) const {
     return (reg_ != rhs.GetRawBits());
   }
 
diff --git a/compiler/dex/reg_storage_eq.h b/compiler/dex/reg_storage_eq.h
new file mode 100644
index 0000000..b688dac
--- /dev/null
+++ b/compiler/dex/reg_storage_eq.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_REG_STORAGE_EQ_H_
+#define ART_COMPILER_DEX_REG_STORAGE_EQ_H_
+
+#include "reg_storage.h"
+
+namespace art {
+
+// Define == and != operators for RegStorage. These are based on exact equality of the reg storage,
+// that is, 32b and 64b views of the same physical register won't match. This is often not the
+// intended behavior, so be careful when including this header.
+
+inline bool operator==(const RegStorage& lhs, const RegStorage& rhs) {
+  return lhs.ExactlyEquals(rhs);
+}
+
+inline bool operator!=(const RegStorage& lhs, const RegStorage& rhs) {
+  return lhs.NotExactlyEquals(rhs);
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_REG_STORAGE_EQ_H_
+
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 96625c5..770ae89 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1461,6 +1461,18 @@
   return false;
 }
 
+static void CheckAndClearResolveException(Thread* self)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  CHECK(self->IsExceptionPending());
+  mirror::Throwable* exception = self->GetException(nullptr);
+  std::string descriptor = exception->GetClass()->GetDescriptor();
+  if (descriptor != "Ljava/lang/IncompatibleClassChangeError;" &&
+      descriptor != "Ljava/lang/NoClassDefFoundError;") {
+    LOG(FATAL) << "Unexpected exeption " << exception->Dump();
+  }
+  self->ClearException();
+}
+
 static void ResolveClassFieldsAndMethods(const ParallelCompilationManager* manager,
                                          size_t class_def_index)
     LOCKS_EXCLUDED(Locks::mutator_lock_) {
@@ -1496,8 +1508,7 @@
     if (klass == NULL) {
       // Class couldn't be resolved, for example, super-class is in a different dex file. Don't
       // attempt to resolve methods and fields when there is no declaring class.
-      CHECK(soa.Self()->IsExceptionPending());
-      soa.Self()->ClearException();
+      CheckAndClearResolveException(soa.Self());
       resolve_fields_and_methods = false;
     } else {
       resolve_fields_and_methods = manager->GetCompiler()->IsImage();
@@ -1516,8 +1527,7 @@
           mirror::ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(),
                                                                dex_cache, class_loader, true);
           if (field == NULL) {
-            CHECK(soa.Self()->IsExceptionPending());
-            soa.Self()->ClearException();
+            CheckAndClearResolveException(soa.Self());
           }
         }
         it.Next();
@@ -1532,8 +1542,7 @@
           mirror::ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(),
                                                                dex_cache, class_loader, false);
           if (field == NULL) {
-            CHECK(soa.Self()->IsExceptionPending());
-            soa.Self()->ClearException();
+            CheckAndClearResolveException(soa.Self());
           }
         }
         it.Next();
@@ -1545,8 +1554,7 @@
                                                                   NullHandle<mirror::ArtMethod>(),
                                                                   it.GetMethodInvokeType(class_def));
           if (method == NULL) {
-            CHECK(soa.Self()->IsExceptionPending());
-            soa.Self()->ClearException();
+            CheckAndClearResolveException(soa.Self());
           }
           it.Next();
         }
@@ -1556,8 +1564,7 @@
                                                                   NullHandle<mirror::ArtMethod>(),
                                                                   it.GetMethodInvokeType(class_def));
           if (method == NULL) {
-            CHECK(soa.Self()->IsExceptionPending());
-            soa.Self()->ClearException();
+            CheckAndClearResolveException(soa.Self());
           }
           it.Next();
         }
diff --git a/compiler/elf_writer.cc b/compiler/elf_writer.cc
index 4c093c7..55ee18e 100644
--- a/compiler/elf_writer.cc
+++ b/compiler/elf_writer.cc
@@ -43,7 +43,7 @@
                                      size_t& oat_data_offset) {
   std::string error_msg;
   std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, false, false, &error_msg));
-  CHECK(elf_file.get() != NULL) << error_msg;
+  CHECK(elf_file.get() != nullptr) << error_msg;
 
   oat_loaded_size = elf_file->GetLoadedSize();
   CHECK_NE(0U, oat_loaded_size);
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 78757ec..e4dcaa7 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -20,6 +20,7 @@
 #include "base/unix_file/fd_file.h"
 #include "buffered_output_stream.h"
 #include "driver/compiler_driver.h"
+#include "dwarf.h"
 #include "elf_utils.h"
 #include "file_output_stream.h"
 #include "globals.h"
@@ -469,9 +470,9 @@
   pieces.push_back(ElfFilePiece(".hash", hash_builder_.section_.sh_offset,
                                 hash.data(), hash.size() * sizeof(Elf32_Word)));
   pieces.push_back(ElfFilePiece(".rodata", rodata_builder_.section_.sh_offset,
-                                NULL, rodata_builder_.section_.sh_size));
+                                nullptr, rodata_builder_.section_.sh_size));
   pieces.push_back(ElfFilePiece(".text", text_builder_.section_.sh_offset,
-                                NULL, text_builder_.section_.sh_size));
+                                nullptr, text_builder_.section_.sh_size));
   if (IncludingDebugSymbols()) {
     pieces.push_back(ElfFilePiece(".symtab", symtab_builder_.section_.sh_offset,
                                   symtab.data(), symtab.size() * sizeof(Elf32_Sym)));
@@ -547,7 +548,7 @@
   if (tag == DT_NULL) {
     return;
   }
-  dynamics_.push_back({NULL, tag, d_un});
+  dynamics_.push_back({nullptr, tag, d_un});
 }
 
 void ElfWriterQuick::ElfDynamicBuilder::AddDynamicTag(Elf32_Sword tag, Elf32_Word d_un,
@@ -650,7 +651,7 @@
   // Lets say the state is something like this.
   // +--------+       +--------+      +-----------+
   // | symtab |       | bucket |      |   chain   |
-  // |  NULL  |       | 1      |      | STN_UNDEF |
+  // |  nullptr  |       | 1      |      | STN_UNDEF |
   // | <sym1> |       | 4      |      | 2         |
   // | <sym2> |       |        |      | 5         |
   // | <sym3> |       |        |      | STN_UNDEF |
@@ -821,10 +822,10 @@
 
   bool generateDebugInformation = compiler_driver_->GetCallFrameInformation() != nullptr;
   if (generateDebugInformation) {
-    ElfRawSectionBuilder debug_info(".debug_info",   SHT_PROGBITS, 0, NULL, 0, 1, 0);
-    ElfRawSectionBuilder debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, NULL, 0, 1, 0);
-    ElfRawSectionBuilder debug_str(".debug_str",    SHT_PROGBITS, 0, NULL, 0, 1, 0);
-    ElfRawSectionBuilder debug_frame(".debug_frame",  SHT_PROGBITS, 0, NULL, 0, 4, 0);
+    ElfRawSectionBuilder debug_info(".debug_info",   SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    ElfRawSectionBuilder debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    ElfRawSectionBuilder debug_str(".debug_str",    SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    ElfRawSectionBuilder debug_frame(".debug_frame",  SHT_PROGBITS, 0, nullptr, 0, 4, 0);
     debug_frame.SetBuffer(*compiler_driver_->GetCallFrameInformation());
 
     FillInCFIInformation(oat_writer, debug_info.GetBuffer(),
@@ -866,31 +867,6 @@
   buf->push_back((data >> 8) & 0xff);
 }
 
-// DWARF constants needed to generate CFI information.
-enum {
-  // Tag encodings.
-  DW_TAG_compile_unit = 0x11,
-  DW_TAG_subprogram = 0X2e,
-
-  // Attribute encodings.
-  DW_AT_name = 0x03,
-  DW_AT_low_pc = 0x11,
-  DW_AT_high_pc = 0x12,
-  DW_AT_language = 0x13,
-
-  // Constant encoding.
-  DW_CHILDREN_no = 0x00,
-  DW_CHILDREN_yes = 0x01,
-
-  // Attribute form encodings.
-  DW_FORM_addr = 0x01,
-  DW_FORM_data1 = 0x0b,
-  DW_FORM_strp = 0x0e,
-
-  // Language encoding.
-  DW_LANG_Java = 0x000b
-};
-
 void ElfWriterQuick::FillInCFIInformation(OatWriter* oat_writer,
                                           std::vector<uint8_t>* dbg_info,
                                           std::vector<uint8_t>* dbg_abbrev,
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index dbdccfc..6eb5d68 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -167,10 +167,10 @@
 
     ElfSymtabBuilder(const std::string& sec_name, Elf32_Word type,
                      const std::string& str_name, Elf32_Word str_type, bool alloc)
-        : ElfSectionBuilder(sec_name, type, ((alloc)?SHF_ALLOC:0), &strtab_, 0,
+        : ElfSectionBuilder(sec_name, type, ((alloc) ? SHF_ALLOC : 0U), &strtab_, 0,
                             sizeof(Elf32_Word), sizeof(Elf32_Sym)),
           str_name_(str_name), str_type_(str_type),
-          strtab_(str_name, str_type, ((alloc) ? SHF_ALLOC : 0), NULL, 0, 1, 1) {}
+          strtab_(str_name, str_type, ((alloc) ? SHF_ALLOC : 0U), NULL, 0, 1, 1) {}
     ~ElfSymtabBuilder() {}
 
    protected:
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index e8bbaef..d52ec0a 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -129,11 +129,7 @@
   runtime_.reset();
   java_lang_dex_file_ = NULL;
 
-  std::string error_msg;
-  std::unique_ptr<const DexFile> dex(DexFile::Open(GetLibCoreDexFileName().c_str(),
-                                             GetLibCoreDexFileName().c_str(),
-                                             &error_msg));
-  ASSERT_TRUE(dex.get() != nullptr) << error_msg;
+  std::unique_ptr<const DexFile> dex(LoadExpectSingleDexFile(GetLibCoreDexFileName().c_str()));
 
   // Remove the reservation of the memory for use to load the image.
   UnreserveImageSpace();
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 8f4eddb..25b489b 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -1284,13 +1284,6 @@
   EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE);
 }
 
-template <typename U, typename V> V convert(U in) {
-  DCHECK_LE(sizeof(U), sizeof(V));
-  union { U u; V v; } tmp;
-  tmp.u = in;
-  return tmp.v;
-}
-
 void Java_MyClassNatives_stackArgsIntsFirst(JNIEnv* env, jclass klass, jint i1, jint i2, jint i3,
                                             jint i4, jint i5, jint i6, jint i7, jint i8, jint i9,
                                             jint i10, jfloat f1, jfloat f2, jfloat f3, jfloat f4,
@@ -1307,25 +1300,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = convert<jfloat, jint>(f1);
+  jint i11 = bit_cast<jfloat, jint>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = convert<jfloat, jint>(f2);
+  jint i12 = bit_cast<jfloat, jint>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = convert<jfloat, jint>(f3);
+  jint i13 = bit_cast<jfloat, jint>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = convert<jfloat, jint>(f4);
+  jint i14 = bit_cast<jfloat, jint>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = convert<jfloat, jint>(f5);
+  jint i15 = bit_cast<jfloat, jint>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = convert<jfloat, jint>(f6);
+  jint i16 = bit_cast<jfloat, jint>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = convert<jfloat, jint>(f7);
+  jint i17 = bit_cast<jfloat, jint>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = convert<jfloat, jint>(f8);
+  jint i18 = bit_cast<jfloat, jint>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = convert<jfloat, jint>(f9);
+  jint i19 = bit_cast<jfloat, jint>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = convert<jfloat, jint>(f10);
+  jint i20 = bit_cast<jfloat, jint>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1345,16 +1338,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = convert<jint, jfloat>(11);
-  jfloat f2 = convert<jint, jfloat>(12);
-  jfloat f3 = convert<jint, jfloat>(13);
-  jfloat f4 = convert<jint, jfloat>(14);
-  jfloat f5 = convert<jint, jfloat>(15);
-  jfloat f6 = convert<jint, jfloat>(16);
-  jfloat f7 = convert<jint, jfloat>(17);
-  jfloat f8 = convert<jint, jfloat>(18);
-  jfloat f9 = convert<jint, jfloat>(19);
-  jfloat f10 = convert<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jint, jfloat>(11);
+  jfloat f2 = bit_cast<jint, jfloat>(12);
+  jfloat f3 = bit_cast<jint, jfloat>(13);
+  jfloat f4 = bit_cast<jint, jfloat>(14);
+  jfloat f5 = bit_cast<jint, jfloat>(15);
+  jfloat f6 = bit_cast<jint, jfloat>(16);
+  jfloat f7 = bit_cast<jint, jfloat>(17);
+  jfloat f8 = bit_cast<jint, jfloat>(18);
+  jfloat f9 = bit_cast<jint, jfloat>(19);
+  jfloat f10 = bit_cast<jint, jfloat>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, f1, f2,
                              f3, f4, f5, f6, f7, f8, f9, f10);
@@ -1376,25 +1369,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = convert<jfloat, jint>(f1);
+  jint i11 = bit_cast<jfloat, jint>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = convert<jfloat, jint>(f2);
+  jint i12 = bit_cast<jfloat, jint>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = convert<jfloat, jint>(f3);
+  jint i13 = bit_cast<jfloat, jint>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = convert<jfloat, jint>(f4);
+  jint i14 = bit_cast<jfloat, jint>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = convert<jfloat, jint>(f5);
+  jint i15 = bit_cast<jfloat, jint>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = convert<jfloat, jint>(f6);
+  jint i16 = bit_cast<jfloat, jint>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = convert<jfloat, jint>(f7);
+  jint i17 = bit_cast<jfloat, jint>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = convert<jfloat, jint>(f8);
+  jint i18 = bit_cast<jfloat, jint>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = convert<jfloat, jint>(f9);
+  jint i19 = bit_cast<jfloat, jint>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = convert<jfloat, jint>(f10);
+  jint i20 = bit_cast<jfloat, jint>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1414,16 +1407,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = convert<jint, jfloat>(11);
-  jfloat f2 = convert<jint, jfloat>(12);
-  jfloat f3 = convert<jint, jfloat>(13);
-  jfloat f4 = convert<jint, jfloat>(14);
-  jfloat f5 = convert<jint, jfloat>(15);
-  jfloat f6 = convert<jint, jfloat>(16);
-  jfloat f7 = convert<jint, jfloat>(17);
-  jfloat f8 = convert<jint, jfloat>(18);
-  jfloat f9 = convert<jint, jfloat>(19);
-  jfloat f10 = convert<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jint, jfloat>(11);
+  jfloat f2 = bit_cast<jint, jfloat>(12);
+  jfloat f3 = bit_cast<jint, jfloat>(13);
+  jfloat f4 = bit_cast<jint, jfloat>(14);
+  jfloat f5 = bit_cast<jint, jfloat>(15);
+  jfloat f6 = bit_cast<jint, jfloat>(16);
+  jfloat f7 = bit_cast<jint, jfloat>(17);
+  jfloat f8 = bit_cast<jint, jfloat>(18);
+  jfloat f9 = bit_cast<jint, jfloat>(19);
+  jfloat f10 = bit_cast<jint, jfloat>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, i1, i2, i3,
                              i4, i5, i6, i7, i8, i9, i10);
@@ -1444,25 +1437,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = convert<jfloat, jint>(f1);
+  jint i11 = bit_cast<jfloat, jint>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = convert<jfloat, jint>(f2);
+  jint i12 = bit_cast<jfloat, jint>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = convert<jfloat, jint>(f3);
+  jint i13 = bit_cast<jfloat, jint>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = convert<jfloat, jint>(f4);
+  jint i14 = bit_cast<jfloat, jint>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = convert<jfloat, jint>(f5);
+  jint i15 = bit_cast<jfloat, jint>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = convert<jfloat, jint>(f6);
+  jint i16 = bit_cast<jfloat, jint>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = convert<jfloat, jint>(f7);
+  jint i17 = bit_cast<jfloat, jint>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = convert<jfloat, jint>(f8);
+  jint i18 = bit_cast<jfloat, jint>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = convert<jfloat, jint>(f9);
+  jint i19 = bit_cast<jfloat, jint>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = convert<jfloat, jint>(f10);
+  jint i20 = bit_cast<jfloat, jint>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1482,16 +1475,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = convert<jint, jfloat>(11);
-  jfloat f2 = convert<jint, jfloat>(12);
-  jfloat f3 = convert<jint, jfloat>(13);
-  jfloat f4 = convert<jint, jfloat>(14);
-  jfloat f5 = convert<jint, jfloat>(15);
-  jfloat f6 = convert<jint, jfloat>(16);
-  jfloat f7 = convert<jint, jfloat>(17);
-  jfloat f8 = convert<jint, jfloat>(18);
-  jfloat f9 = convert<jint, jfloat>(19);
-  jfloat f10 = convert<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jint, jfloat>(11);
+  jfloat f2 = bit_cast<jint, jfloat>(12);
+  jfloat f3 = bit_cast<jint, jfloat>(13);
+  jfloat f4 = bit_cast<jint, jfloat>(14);
+  jfloat f5 = bit_cast<jint, jfloat>(15);
+  jfloat f6 = bit_cast<jint, jfloat>(16);
+  jfloat f7 = bit_cast<jint, jfloat>(17);
+  jfloat f8 = bit_cast<jint, jfloat>(18);
+  jfloat f9 = bit_cast<jint, jfloat>(19);
+  jfloat f10 = bit_cast<jint, jfloat>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, f1, i2, f2, i3, f3, i4, f4, i5, f5, i6, f6, i7,
                              f7, i8, f8, i9, f9, i10, f10);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 0b7272c..254faac 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -180,7 +180,7 @@
   EXPECT_EQ(80U, sizeof(OatHeader));
   EXPECT_EQ(8U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(78 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(77 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c3a322c..cc995f7 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -226,7 +226,7 @@
 }
 
 template<typename T>
-void HGraphBuilder::Binop_32x(const Instruction& instruction, Primitive::Type type) {
+void HGraphBuilder::Binop_23x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegB(), type);
   HInstruction* second = LoadLocal(instruction.VRegC(), type);
   current_block_->AddInstruction(new (arena_) T(type, first, second));
@@ -501,22 +501,22 @@
     }
 
     case Instruction::ADD_INT: {
-      Binop_32x<HAdd>(instruction, Primitive::kPrimInt);
+      Binop_23x<HAdd>(instruction, Primitive::kPrimInt);
       break;
     }
 
     case Instruction::ADD_LONG: {
-      Binop_32x<HAdd>(instruction, Primitive::kPrimLong);
+      Binop_23x<HAdd>(instruction, Primitive::kPrimLong);
       break;
     }
 
     case Instruction::SUB_INT: {
-      Binop_32x<HSub>(instruction, Primitive::kPrimInt);
+      Binop_23x<HSub>(instruction, Primitive::kPrimInt);
       break;
     }
 
     case Instruction::SUB_LONG: {
-      Binop_32x<HSub>(instruction, Primitive::kPrimLong);
+      Binop_23x<HSub>(instruction, Primitive::kPrimLong);
       break;
     }
 
@@ -573,6 +573,11 @@
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       break;
 
+    case Instruction::CMP_LONG: {
+      Binop_23x<HCompare>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::NOP:
       break;
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 0852a26..ee32ca8 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -73,7 +73,7 @@
   bool InitializeParameters(uint16_t number_of_parameters);
 
   template<typename T>
-  void Binop_32x(const Instruction& instruction, Primitive::Type type);
+  void Binop_23x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
@@ -84,11 +84,8 @@
   template<typename T>
   void Binop_22s(const Instruction& instruction, bool reverse);
 
-  template<typename T>
-  void If_22t(const Instruction& instruction, int32_t dex_offset);
-
-  template<typename T>
-  void If_21t(const Instruction& instruction, int32_t dex_offset);
+  template<typename T> void If_21t(const Instruction& instruction, int32_t dex_offset);
+  template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset);
 
   void BuildReturn(const Instruction& instruction, Primitive::Type type);
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 83621e0..ae2f030 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -90,6 +90,7 @@
   virtual void SetupBlockedRegisters(bool* blocked_registers) const = 0;
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
+  virtual InstructionSet GetInstructionSet() const = 0;
 
   void RecordPcInfo(uint32_t dex_pc) {
     struct PcInfo pc_info;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index c5862da..d87c14b 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -707,7 +707,8 @@
 
 void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) {
   Register temp = invoke->GetLocations()->GetTemp(0).AsArm().AsCoreRegister();
-  size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
+  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() +
       invoke->GetIndexInDexCache() * kArmWordSize;
 
   // TODO: Implement all kinds of calls:
@@ -904,6 +905,48 @@
          locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(1));
 }
 
+void LocationsBuilderARM::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      Register output = locations->Out().AsArm().AsCoreRegister();
+      ArmManagedRegister left = locations->InAt(0).AsArm();
+      ArmManagedRegister right = locations->InAt(1).AsArm();
+      Label less, greater, done;
+      __ cmp(left.AsRegisterPairHigh(),
+             ShifterOperand(right.AsRegisterPairHigh()));  // Signed compare.
+      __ b(&less, LT);
+      __ b(&greater, GT);
+      __ cmp(left.AsRegisterPairLow(),
+             ShifterOperand(right.AsRegisterPairLow()));  // Unsigned compare.
+      __ LoadImmediate(output, 0);
+      __ b(&done, EQ);
+      __ b(&less, CC);
+
+      __ Bind(&greater);
+      __ LoadImmediate(output, 1);
+      __ b(&done);
+
+      __ Bind(&less);
+      __ LoadImmediate(output, -1);
+
+      __ Bind(&done);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+}
+
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 0e2a079..c46c1b1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -20,7 +20,7 @@
 #include "code_generator.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
-#include "utils/arm/assembler_thumb2.h"
+#include "utils/arm/assembler_arm32.h"
 
 namespace art {
 namespace arm {
@@ -171,6 +171,10 @@
     return &move_resolver_;
   }
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kArm;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
@@ -180,7 +184,7 @@
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
   ParallelMoveResolverARM move_resolver_;
-  Thumb2Assembler assembler_;
+  Arm32Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a8ee6c0..572d494 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -81,12 +81,23 @@
                                                        bool* blocked_registers) const {
   switch (type) {
     case Primitive::kPrimLong: {
-      size_t reg = AllocateFreeRegisterInternal(
-          GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs);
+      bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+      size_t reg = AllocateFreeRegisterInternal(blocked_register_pairs, kNumberOfRegisterPairs);
       X86ManagedRegister pair =
           X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
       blocked_registers[pair.AsRegisterPairLow()] = true;
       blocked_registers[pair.AsRegisterPairHigh()] = true;
+      // Block all other register pairs that share a register with `pair`.
+      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
+        X86ManagedRegister current =
+            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
+        if (current.AsRegisterPairLow() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairLow() == pair.AsRegisterPairHigh()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) {
+          blocked_register_pairs[i] = true;
+        }
+      }
       return pair;
     }
 
@@ -691,7 +702,8 @@
 
 void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) {
   Register temp = invoke->GetLocations()->GetTemp(0).AsX86().AsCpuRegister();
-  size_t index_in_cache = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() +
+  uint32_t heap_reference_size = sizeof(mirror::HeapReference<mirror::Object>);
+  size_t index_in_cache = mirror::Array::DataOffset(heap_reference_size).Int32Value() +
       invoke->GetIndexInDexCache() * kX86WordSize;
 
   // TODO: Implement all kinds of calls:
@@ -900,6 +912,46 @@
   __ xorl(out.AsX86().AsCpuRegister(), Immediate(1));
 }
 
+void LocationsBuilderX86::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      Label less, greater, done;
+      Register output = locations->Out().AsX86().AsCpuRegister();
+      X86ManagedRegister left = locations->InAt(0).AsX86();
+      X86ManagedRegister right = locations->InAt(1).AsX86();
+      __ cmpl(left.AsRegisterPairHigh(), right.AsRegisterPairHigh());
+      __ j(kLess, &less);  // Signed compare.
+      __ j(kGreater, &greater);  // Signed compare.
+      __ cmpl(left.AsRegisterPairLow(), right.AsRegisterPairLow());
+      __ movl(output, Immediate(0));
+      __ j(kEqual, &done);
+      __ j(kBelow, &less);  // Unsigned compare.
+
+      __ Bind(&greater);
+      __ movl(output, Immediate(1));
+      __ jmp(&done);
+
+      __ Bind(&less);
+      __ movl(output, Immediate(-1));
+
+      __ Bind(&done);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+}
+
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index acc670e..8a8216a 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -173,6 +173,10 @@
     return &move_resolver_;
   }
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kX86;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 283f1f5..dc1d616 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -228,7 +228,9 @@
   }
 }
 
-void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
+void CodeGeneratorX86_64::Move(HInstruction* instruction,
+                               Location location,
+                               HInstruction* move_for) {
   if (instruction->AsIntConstant() != nullptr) {
     Immediate imm(instruction->AsIntConstant()->GetValue());
     if (location.IsRegister()) {
@@ -383,7 +385,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
+  locations->SetOut(Location::RequiresRegister());
   comp->SetLocations(locations);
 }
 
@@ -444,6 +446,39 @@
   VisitCondition(comp);
 }
 
+void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong:
+      __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(0));
+  __ j(kEqual, &done);
+  __ j(kGreater, &greater);
+
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(-1));
+  __ jmp(&done);
+
+  __ Bind(&greater);
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1));
+
+  __ Bind(&done);
+}
+
 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
   // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
@@ -463,7 +498,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) {
-  // Will be generated at use site.
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -812,10 +847,13 @@
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
       __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
-    } else {
-      DCHECK(destination.IsStackSlot());
+    } else if (destination.IsStackSlot()) {
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
               source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(destination.IsDoubleStackSlot());
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
     }
   } else if (source.IsStackSlot()) {
     if (destination.IsRegister()) {
@@ -826,18 +864,27 @@
       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
+  } else if (source.IsDoubleStackSlot()) {
+    if (destination.IsRegister()) {
+      __ movq(destination.AsX86_64().AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsDoubleStackSlot());
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+    }
   } else {
     LOG(FATAL) << "Unimplemented";
   }
 }
 
-void ParallelMoveResolverX86_64::Exchange(CpuRegister reg, int mem) {
+void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
-  __ movl(Address(CpuRegister(RSP), mem), CpuRegister(reg));
-  __ movl(CpuRegister(reg), CpuRegister(TMP));
+  __ movl(Address(CpuRegister(RSP), mem), reg);
+  __ movl(reg, CpuRegister(TMP));
 }
 
-void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) {
+void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch(
       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
 
@@ -850,6 +897,25 @@
           CpuRegister(ensure_scratch.GetRegister()));
 }
 
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+  __ movq(Address(CpuRegister(RSP), mem), reg);
+  __ movq(reg, CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+  __ movq(CpuRegister(ensure_scratch.GetRegister()),
+          Address(CpuRegister(RSP), mem2 + stack_offset));
+  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
+          CpuRegister(ensure_scratch.GetRegister()));
+}
+
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   MoveOperands* move = moves_.Get(index);
   Location source = move->GetSource();
@@ -858,11 +924,17 @@
   if (source.IsRegister() && destination.IsRegister()) {
     __ xchgq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
-    Exchange(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+    Exchange32(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
-    Exchange(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+    Exchange32(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
-    Exchange(destination.GetStackIndex(), source.GetStackIndex());
+    Exchange32(destination.GetStackIndex(), source.GetStackIndex());
+  } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
+    Exchange64(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+  } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
+    Exchange64(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+  } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
+    Exchange64(destination.GetStackIndex(), source.GetStackIndex());
   } else {
     LOG(FATAL) << "Unimplemented";
   }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f07df29..d347a4f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -69,8 +69,10 @@
   X86_64Assembler* GetAssembler() const;
 
  private:
-  void Exchange(CpuRegister reg, int mem);
-  void Exchange(int mem1, int mem2);
+  void Exchange32(CpuRegister reg, int mem);
+  void Exchange32(int mem1, int mem2);
+  void Exchange64(CpuRegister reg, int mem);
+  void Exchange64(int mem1, int mem2);
 
   CodeGeneratorX86_64* const codegen_;
 
@@ -170,6 +172,10 @@
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kX86_64;
+  }
+
  private:
   // Helper method to move a value between two locations.
   void Move(Location destination, Location source);
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index fd534ce..7ec0c84 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -52,10 +52,6 @@
   typedef int32_t (*fptr)();
   CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
   fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
-#if defined(__arm__)
-  // For thumb we need the bottom bit set.
-  f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
-#endif
   int32_t result = f();
   if (has_result) {
     CHECK_EQ(result, expected);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index a49ce64..f033e2e 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -108,9 +108,11 @@
       } else {
         codegen_.DumpCoreRegister(output_, location.reg().RegId());
       }
-    } else {
-      DCHECK(location.IsStackSlot());
+    } else if (location.IsStackSlot()) {
       output_ << location.GetStackIndex() << "(sp)";
+    } else {
+      DCHECK(location.IsDoubleStackSlot());
+      output_ << "2x" << location.GetStackIndex() << "(sp)";
     }
   }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 503f31d..9292084 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -414,6 +414,7 @@
   M(ReturnVoid)                                            \
   M(StoreLocal)                                            \
   M(Sub)                                                   \
+  M(Compare)                                               \
 
 
 #define FORWARD_DECLARATION(type) class H##type;
@@ -986,6 +987,22 @@
 };
 
 
+// Instruction to check how two inputs compare to each other.
+// Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
+class HCompare : public HBinaryOperation {
+ public:
+  HCompare(Primitive::Type type, HInstruction* first, HInstruction* second)
+      : HBinaryOperation(Primitive::kPrimInt, first, second) {
+    DCHECK_EQ(type, first->GetType());
+    DCHECK_EQ(type, second->GetType());
+  }
+
+  DECLARE_INSTRUCTION(Compare);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HCompare);
+};
+
 // A local in the graph. Corresponds to a Dex register.
 class HLocal : public HTemplateInstruction<0> {
  public:
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 56029aa..b4d7fff 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -77,6 +77,17 @@
                                                uint32_t method_idx,
                                                jobject class_loader,
                                                const DexFile& dex_file) const {
+  InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
+  // The optimizing compiler currently does not have a Thumb2 assembler.
+  if (instruction_set == kThumb2) {
+    instruction_set = kArm;
+  }
+
+  // Do not attempt to compile on architectures we do not support.
+  if (instruction_set != kX86 && instruction_set != kX86_64 && instruction_set != kArm) {
+    return nullptr;
+  }
+
   DexCompilationUnit dex_compilation_unit(
     nullptr, class_loader, art::Runtime::Current()->GetClassLinker(), dex_file, code_item,
     class_def_idx, method_idx, access_flags,
@@ -100,7 +111,6 @@
     return nullptr;
   }
 
-  InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set);
   if (codegen == nullptr) {
     if (shouldCompile) {
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1f4cb41..68130dd 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -55,7 +55,7 @@
          it.Advance()) {
       HInstruction* current = it.Current();
       if (current->NeedsEnvironment()) return false;
-      if (current->GetType() == Primitive::kPrimLong) return false;
+      if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false;
       if (current->GetType() == Primitive::kPrimFloat) return false;
       if (current->GetType() == Primitive::kPrimDouble) return false;
     }
@@ -139,7 +139,7 @@
         current->SetFrom(position + 1);
         current->SetRegister(output.reg().RegId());
         BlockRegister(output, position, position + 1, instruction->GetType());
-      } else if (output.IsStackSlot()) {
+      } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
         current->SetSpillSlot(output.GetStackIndex());
       }
       for (size_t i = 0; i < instruction->InputCount(); ++i) {
@@ -430,7 +430,7 @@
 // we spill `current` instead.
 bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
   size_t first_register_use = current->FirstRegisterUse();
-  if (current->FirstRegisterUse() == kNoLifetime) {
+  if (first_register_use == kNoLifetime) {
     AllocateSpillSlotFor(current);
     return false;
   }
@@ -559,6 +559,10 @@
   }
 }
 
+static bool NeedTwoSpillSlot(Primitive::Type type) {
+  return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
+}
+
 void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
   LiveInterval* parent = interval->GetParent();
 
@@ -581,6 +585,43 @@
   }
   size_t end = last_sibling->GetEnd();
 
+  if (NeedTwoSpillSlot(parent->GetType())) {
+    AllocateTwoSpillSlots(parent, end);
+  } else {
+    AllocateOneSpillSlot(parent, end);
+  }
+}
+
+void RegisterAllocator::AllocateTwoSpillSlots(LiveInterval* parent, size_t end) {
+  // Find an available spill slot.
+  size_t slot = 0;
+  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
+    // We check if it is less rather than less or equal because the parallel move
+    // resolver does not work when a single spill slot needs to be exchanged with
+    // a double spill slot. The strict comparison avoids needing to exchange these
+    // locations at the same lifetime position.
+    if (spill_slots_.Get(slot) < parent->GetStart()
+        && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) {
+      break;
+    }
+  }
+
+  if (slot == spill_slots_.Size()) {
+    // We need a new spill slot.
+    spill_slots_.Add(end);
+    spill_slots_.Add(end);
+  } else if (slot == spill_slots_.Size() - 1) {
+    spill_slots_.Put(slot, end);
+    spill_slots_.Add(end);
+  } else {
+    spill_slots_.Put(slot, end);
+    spill_slots_.Put(slot + 1, end);
+  }
+
+  parent->SetSpillSlot(slot * kVRegSize);
+}
+
+void RegisterAllocator::AllocateOneSpillSlot(LiveInterval* parent, size_t end) {
   // Find an available spill slot.
   size_t slot = 0;
   for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
@@ -604,7 +645,11 @@
     return Location::RegisterLocation(ManagedRegister(interval->GetRegister()));
   } else {
     DCHECK(interval->GetParent()->HasSpillSlot());
-    return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+    if (NeedTwoSpillSlot(interval->GetType())) {
+      return Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot());
+    } else {
+      return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+    }
   }
 }
 
@@ -750,7 +795,9 @@
     // We spill eagerly, so move must be at definition.
     InsertMoveAfter(interval->GetDefinedBy(),
                     Location::RegisterLocation(ManagedRegister(interval->GetRegister())),
-                    Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+                    NeedTwoSpillSlot(interval->GetType())
+                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
+                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
   UsePosition* use = current->GetFirstUse();
 
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index e63122f..7d4cd1a 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -93,6 +93,8 @@
 
   // Allocate a spill slot for the given interval.
   void AllocateSpillSlotFor(LiveInterval* interval);
+  void AllocateOneSpillSlot(LiveInterval* interval, size_t end);
+  void AllocateTwoSpillSlots(LiveInterval* interval, size_t end);
 
   // Connect adjacent siblings within blocks.
   void ConnectSiblings(LiveInterval* interval);
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index b607a1d..8a34928 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -111,43 +111,38 @@
   }
 }
 
-uint32_t ShifterOperand::encodingThumb(int version) const {
-  CHECK(version == 1 || version == 2);
-  if (version == 1) {
-    LOG(FATAL) << "Invalid of use encodingThumb with version 1";
-  } else {
-    switch (type_) {
-      case kImmediate:
-        return immed_;
-      case kRegister:
-        if (is_shift_) {
-          // Shifted immediate or register.
-          if (rs_ == kNoRegister) {
-            // Immediate shift.
-            if (shift_ == RRX) {
-              // RRX is encoded as an ROR with imm 0.
-              return ROR << 4 | static_cast<uint32_t>(rm_);
-            } else {
-              uint32_t imm3 = immed_ >> 2;
-              uint32_t imm2 = immed_ & 0b11;
-
-              return imm3 << 12 | imm2 << 6 | shift_ << 4 |
-                  static_cast<uint32_t>(rm_);
-            }
+uint32_t ShifterOperand::encodingThumb() const {
+  switch (type_) {
+    case kImmediate:
+      return immed_;
+    case kRegister:
+      if (is_shift_) {
+        // Shifted immediate or register.
+        if (rs_ == kNoRegister) {
+          // Immediate shift.
+          if (shift_ == RRX) {
+            // RRX is encoded as an ROR with imm 0.
+            return ROR << 4 | static_cast<uint32_t>(rm_);
           } else {
-            LOG(FATAL) << "No register-shifted register instruction available in thumb";
-            return 0;
+            uint32_t imm3 = immed_ >> 2;
+            uint32_t imm2 = immed_ & 0b11;
+
+            return imm3 << 12 | imm2 << 6 | shift_ << 4 |
+                static_cast<uint32_t>(rm_);
           }
         } else {
-          // Simple register
-          return static_cast<uint32_t>(rm_);
+          LOG(FATAL) << "No register-shifted register instruction available in thumb";
+          return 0;
         }
-        break;
-      default:
-        // Can't get here.
-        LOG(FATAL) << "Invalid shifter operand for thumb";
-        return 0;
-    }
+      } else {
+        // Simple register
+        return static_cast<uint32_t>(rm_);
+      }
+      break;
+    default:
+      // Can't get here.
+      LOG(FATAL) << "Invalid shifter operand for thumb";
+      return 0;
   }
   return 0;
 }
@@ -187,51 +182,78 @@
 uint32_t Address::encodingArm() const {
   CHECK(IsAbsoluteUint(12, offset_));
   uint32_t encoding;
-  if (offset_ < 0) {
-    encoding = (am_ ^ (1 << kUShift)) | -offset_;  // Flip U to adjust sign.
+  if (is_immed_offset_) {
+    if (offset_ < 0) {
+      encoding = (am_ ^ (1 << kUShift)) | -offset_;  // Flip U to adjust sign.
+    } else {
+      encoding =  am_ | offset_;
+    }
   } else {
-    encoding =  am_ | offset_;
+    uint32_t imm5 = offset_;
+    uint32_t shift = shift_;
+    if (shift == RRX) {
+      imm5 = 0;
+      shift = ROR;
+    }
+    encoding = am_ | static_cast<uint32_t>(rm_) | shift << 5 | offset_ << 7 | B25;
   }
   encoding |= static_cast<uint32_t>(rn_) << kRnShift;
   return encoding;
 }
 
 
-uint32_t Address::encodingThumb(int version) const {
-  CHECK(version == 1 || version == 2);
+uint32_t Address::encodingThumb(bool is_32bit) const {
   uint32_t encoding = 0;
-  if (version == 2) {
-      encoding = static_cast<uint32_t>(rn_) << 16;
-      // Check for the T3/T4 encoding.
-      // PUW must Offset for T3
-      // Convert ARM PU0W to PUW
-      // The Mode is in ARM encoding format which is:
-      // |P|U|0|W|
-      // we need this in thumb2 mode:
-      // |P|U|W|
+  if (is_immed_offset_) {
+    encoding = static_cast<uint32_t>(rn_) << 16;
+    // Check for the T3/T4 encoding.
+    // PUW must Offset for T3
+    // Convert ARM PU0W to PUW
+    // The Mode is in ARM encoding format which is:
+    // |P|U|0|W|
+    // we need this in thumb2 mode:
+    // |P|U|W|
 
-      uint32_t am = am_;
-      int32_t offset = offset_;
-      if (offset < 0) {
-        am ^= 1 << kUShift;
-        offset = -offset;
-      }
-      if (offset_ < 0 || (offset >= 0 && offset < 256 &&
+    uint32_t am = am_;
+    int32_t offset = offset_;
+    if (offset < 0) {
+      am ^= 1 << kUShift;
+      offset = -offset;
+    }
+    if (offset_ < 0 || (offset >= 0 && offset < 256 &&
         am_ != Mode::Offset)) {
-          // T4 encoding.
-        uint32_t PUW = am >> 21;   // Move down to bottom of word.
-        PUW = (PUW >> 1) | (PUW & 1);   // Bits 3, 2 and 0.
-        // If P is 0 then W must be 1 (Different from ARM).
-        if ((PUW & 0b100) == 0) {
-          PUW |= 0b1;
-        }
-        encoding |= B11 | PUW << 8 | offset;
-      } else {
-        // T3 encoding (also sets op1 to 0b01).
-        encoding |= B23 | offset_;
+      // T4 encoding.
+      uint32_t PUW = am >> 21;   // Move down to bottom of word.
+      PUW = (PUW >> 1) | (PUW & 1);   // Bits 3, 2 and 0.
+      // If P is 0 then W must be 1 (Different from ARM).
+      if ((PUW & 0b100) == 0) {
+        PUW |= 0b1;
       }
+      encoding |= B11 | PUW << 8 | offset;
+    } else {
+      // T3 encoding (also sets op1 to 0b01).
+      encoding |= B23 | offset_;
+    }
   } else {
-    LOG(FATAL) << "Invalid use of encodingThumb for version 1";
+    // Register offset, possibly shifted.
+    // Need to choose between encoding T1 (16 bit) or T2.
+    // Only Offset mode is supported.  Shift must be LSL and the count
+    // is only 2 bits.
+    CHECK_EQ(shift_, LSL);
+    CHECK_LE(offset_, 4);
+    CHECK_EQ(am_, Offset);
+    bool is_t2 = is_32bit;
+    if (ArmAssembler::IsHighRegister(rn_) || ArmAssembler::IsHighRegister(rm_)) {
+      is_t2 = true;
+    } else if (offset_ != 0) {
+      is_t2 = true;
+    }
+    if (is_t2) {
+      encoding = static_cast<uint32_t>(rn_) << 16 | static_cast<uint32_t>(rm_) |
+          offset_ << 4;
+    } else {
+      encoding = static_cast<uint32_t>(rn_) << 3 | static_cast<uint32_t>(rm_) << 6;
+    }
   }
   return encoding;
 }
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 7b662e1..be19174 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -68,7 +68,7 @@
   }
 
   uint32_t encodingArm() const;
-  uint32_t encodingThumb(int version) const;
+  uint32_t encodingThumb() const;
 
   bool IsEmpty() const {
     return type_ == kUnknown;
@@ -196,8 +196,26 @@
     NegPostIndex = (0|0|0) << 21   // negative post-indexed with writeback
   };
 
-  explicit Address(Register rn, int32_t offset = 0, Mode am = Offset) : rn_(rn), offset_(offset),
-      am_(am) {
+  Address(Register rn, int32_t offset = 0, Mode am = Offset) : rn_(rn), rm_(R0),
+      offset_(offset),
+      am_(am), is_immed_offset_(true), shift_(LSL) {
+  }
+
+  Address(Register rn, Register rm, Mode am = Offset) : rn_(rn), rm_(rm), offset_(0),
+      am_(am), is_immed_offset_(false), shift_(LSL) {
+    CHECK_NE(rm, PC);
+  }
+
+  Address(Register rn, Register rm, Shift shift, uint32_t count, Mode am = Offset) :
+                       rn_(rn), rm_(rm), offset_(count),
+                       am_(am), is_immed_offset_(false), shift_(shift) {
+    CHECK_NE(rm, PC);
+  }
+
+  // LDR(literal) - pc relative load.
+  explicit Address(int32_t offset) :
+               rn_(PC), rm_(R0), offset_(offset),
+               am_(Offset), is_immed_offset_(false), shift_(LSL) {
   }
 
   static bool CanHoldLoadOffsetArm(LoadOperandType type, int offset);
@@ -207,7 +225,7 @@
   static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset);
 
   uint32_t encodingArm() const;
-  uint32_t encodingThumb(int version) const;
+  uint32_t encodingThumb(bool is_32bit) const;
 
   uint32_t encoding3() const;
   uint32_t vencoding() const;
@@ -218,6 +236,10 @@
     return rn_;
   }
 
+  Register GetRegisterOffset() const {
+    return rm_;
+  }
+
   int32_t GetOffset() const {
     return offset_;
   }
@@ -226,10 +248,26 @@
     return am_;
   }
 
+  bool IsImmediate() const {
+    return is_immed_offset_;
+  }
+
+  Shift GetShift() const {
+    return shift_;
+  }
+
+  int32_t GetShiftCount() const {
+    CHECK(!is_immed_offset_);
+    return offset_;
+  }
+
  private:
   Register rn_;
-  int32_t offset_;
+  Register rm_;
+  int32_t offset_;      // Used as shift amount for register offset.
   Mode am_;
+  bool is_immed_offset_;
+  Shift shift_;
 };
 
 // Instruction encoding bits.
@@ -544,11 +582,25 @@
 
   // Convenience shift instructions. Use mov instruction with shifter operand
   // for variants setting the status flags or using a register shift count.
-  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
-  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
-  virtual void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
-  virtual void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
-  virtual void Rrx(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Rrx(Register rd, Register rm, bool setcc = false,
+                   Condition cond = AL) = 0;
+
+  virtual void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Asr(Register rd, Register rm, Register rn, bool setcc = false,
+                   Condition cond = AL) = 0;
+  virtual void Ror(Register rd, Register rm, Register rn, bool setcc = false,
+                   Condition cond = AL) = 0;
 
   static bool IsInstructionForExceptionHandling(uword pc);
 
@@ -673,6 +725,14 @@
 
   static uint32_t ModifiedImmediate(uint32_t value);
 
+  static bool IsLowRegister(Register r) {
+    return r < R8;
+  }
+
+  static bool IsHighRegister(Register r) {
+     return r >= R8;
+  }
+
  protected:
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index b2bb20f..267bba8 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -541,20 +541,40 @@
 
 
 void Arm32Assembler::EmitMemOp(Condition cond,
-                             bool load,
-                             bool byte,
-                             Register rd,
-                             const Address& ad) {
+                               bool load,
+                               bool byte,
+                               Register rd,
+                               const Address& ad) {
   CHECK_NE(rd, kNoRegister);
   CHECK_NE(cond, kNoCondition);
   const Address& addr = static_cast<const Address&>(ad);
 
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B26 |
-                     (load ? L : 0) |
-                     (byte ? B : 0) |
-                     (static_cast<int32_t>(rd) << kRdShift) |
-                     addr.encodingArm();
+  int32_t encoding = 0;
+  if (!ad.IsImmediate() && ad.GetRegisterOffset() == PC) {
+    // PC relative LDR(literal)
+    int32_t offset = ad.GetOffset();
+    int32_t u = B23;
+    if (offset < 0) {
+      offset = -offset;
+      u = 0;
+    }
+    CHECK_LT(offset, (1 << 12));
+    encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+         B26 | B24 | u | B20 |
+         (load ? L : 0) |
+         (byte ? B : 0) |
+         (static_cast<int32_t>(rd) << kRdShift) |
+         0xf << 16 |
+         (offset & 0xfff);
+
+  } else {
+    encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+        B26 |
+        (load ? L : 0) |
+        (byte ? B : 0) |
+        (static_cast<int32_t>(rd) << kRdShift) |
+        addr.encodingArm();
+  }
   Emit(encoding);
 }
 
@@ -1020,39 +1040,98 @@
 
 
 void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                         Condition cond) {
+                         bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
-  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+  }
 }
 
 
 void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                         Condition cond) {
+                         bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+  }
 }
 
 
 void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                         Condition cond) {
+                         bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+  }
 }
 
 
 void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                         Condition cond) {
+                         bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
-  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+  }
 }
 
-void Arm32Assembler::Rrx(Register rd, Register rm, Condition cond) {
-  mov(rd, ShifterOperand(rm, ROR, 0), cond);
+void Arm32Assembler::Rrx(Register rd, Register rm, bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ROR, 0), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ROR, 0), cond);
+  }
 }
 
 
+void Arm32Assembler::Lsl(Register rd, Register rm, Register rn,
+                         bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, LSL, rn), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, LSL, rn), cond);
+  }
+}
+
+
+void Arm32Assembler::Lsr(Register rd, Register rm, Register rn,
+                         bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, LSR, rn), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, LSR, rn), cond);
+  }
+}
+
+
+void Arm32Assembler::Asr(Register rd, Register rm, Register rn,
+                         bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ASR, rn), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ASR, rn), cond);
+  }
+}
+
+
+void Arm32Assembler::Ror(Register rd, Register rm, Register rn,
+                         bool setcc, Condition cond) {
+  if (setcc) {
+    movs(rd, ShifterOperand(rm, ROR, rn), cond);
+  } else {
+    mov(rd, ShifterOperand(rm, ROR, rn), cond);
+  }
+}
+
 void Arm32Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR
   CHECK_NE(cond, kNoCondition);
   int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 7a0fce2..7f9094d 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -197,11 +197,25 @@
   void bl(Label* label, Condition cond = AL);
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Rrx(Register rd, Register rm, Condition cond = AL);
+  void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Rrx(Register rd, Register rm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+
+  void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Asr(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Ror(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
 
   void Push(Register rd, Condition cond = AL) OVERRIDE;
   void Pop(Register rd, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 92a9f53..604f59e 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -329,7 +329,7 @@
       ++reg;
     }
     CHECK_LT(reg, 16);
-    CHECK(am == IA_W);      // Only writeback is supported.
+    CHECK(am == DB_W);      // Only writeback is supported.
     ldr(static_cast<Register>(reg), Address(base, kRegisterSize, Address::PostIndex), cond);
   } else {
     EmitMultiMemOp(cond, am, true, base, regs);
@@ -352,8 +352,8 @@
       ++reg;
     }
     CHECK_LT(reg, 16);
-    CHECK(am == DB || am == DB_W);
-    Address::Mode strmode = am == DB_W ? Address::PreIndex : Address::Offset;
+    CHECK(am == IA || am == IA_W);
+    Address::Mode strmode = am == IA ? Address::PreIndex : Address::Offset;
     str(static_cast<Register>(reg), Address(base, -kRegisterSize, strmode), cond);
   } else {
     EmitMultiMemOp(cond, am, false, base, regs);
@@ -642,7 +642,6 @@
            if (imm > (1 << 9)) {    // 9 bit immediate.
              return true;
            }
-           return false;      // 16 bit good.
          } else if (opcode == ADD && rd != SP && rn == SP) {   // 10 bit immediate.
            if (imm > (1 << 10)) {
              return true;
@@ -781,7 +780,7 @@
            imm8;
     } else {
       // Modified immediate.
-      uint32_t imm = ModifiedImmediate(so.encodingThumb(2));
+      uint32_t imm = ModifiedImmediate(so.encodingThumb());
       if (imm == kInvalidModifiedImmediate) {
         LOG(FATAL) << "Immediate value cannot fit in thumb2 modified immediate";
       }
@@ -799,7 +798,7 @@
          set_cc << 20 |
          rn << 16 |
          rd << 8 |
-         so.encodingThumb(2);
+         so.encodingThumb();
   }
   Emit32(encoding);
 }
@@ -1081,6 +1080,82 @@
   }
 }
 
+void Thumb2Assembler::EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc) {
+  CHECK_LT(amount, (1 << 5));
+  if (IsHighRegister(rd) || IsHighRegister(rm) || shift == ROR || shift == RRX) {
+    uint16_t opcode = 0;
+    switch (shift) {
+      case LSL: opcode = 0b00; break;
+      case LSR: opcode = 0b01; break;
+      case ASR: opcode = 0b10; break;
+      case ROR: opcode = 0b11; break;
+      case RRX: opcode = 0b11; amount = 0; break;
+      default:
+        LOG(FATAL) << "Unsupported thumb2 shift opcode";
+    }
+    // 32 bit.
+    int32_t encoding = B31 | B30 | B29 | B27 | B25 | B22 |
+        0xf << 16 | (setcc ? B20 : 0);
+    uint32_t imm3 = amount >> 2;
+    uint32_t imm2 = amount & 0b11;
+    encoding |= imm3 << 12 | imm2 << 6 | static_cast<int16_t>(rm) |
+        static_cast<int16_t>(rd) << 8 | opcode << 4;
+    Emit32(encoding);
+  } else {
+    // 16 bit shift
+    uint16_t opcode = 0;
+    switch (shift) {
+      case LSL: opcode = 0b00; break;
+      case LSR: opcode = 0b01; break;
+      case ASR: opcode = 0b10; break;
+      default:
+         LOG(FATAL) << "Unsupported thumb2 shift opcode";
+    }
+    int16_t encoding = opcode << 11 | amount << 6 | static_cast<int16_t>(rm) << 3 |
+        static_cast<int16_t>(rd);
+    Emit16(encoding);
+  }
+}
+
+void Thumb2Assembler::EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc) {
+  CHECK_NE(shift, RRX);
+  bool must_be_32bit = false;
+  if (IsHighRegister(rd) || IsHighRegister(rm) || IsHighRegister(rn) || rd != rn) {
+    must_be_32bit = true;
+  }
+
+  if (must_be_32bit) {
+    uint16_t opcode = 0;
+     switch (shift) {
+       case LSL: opcode = 0b00; break;
+       case LSR: opcode = 0b01; break;
+       case ASR: opcode = 0b10; break;
+       case ROR: opcode = 0b11; break;
+       default:
+         LOG(FATAL) << "Unsupported thumb2 shift opcode";
+     }
+     // 32 bit.
+     int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 |
+         0xf << 12 | (setcc ? B20 : 0);
+     encoding |= static_cast<int16_t>(rn) << 16 | static_cast<int16_t>(rm) |
+         static_cast<int16_t>(rd) << 8 | opcode << 21;
+     Emit32(encoding);
+  } else {
+    uint16_t opcode = 0;
+    switch (shift) {
+      case LSL: opcode = 0b0010; break;
+      case LSR: opcode = 0b0011; break;
+      case ASR: opcode = 0b0100; break;
+      default:
+         LOG(FATAL) << "Unsupported thumb2 shift opcode";
+    }
+    int16_t encoding = B14 | opcode << 6 | static_cast<int16_t>(rm) << 3 |
+        static_cast<int16_t>(rd);
+    Emit16(encoding);
+  }
+}
+
+
 
 void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const {
   bool link = type_ == kUnconditionalLinkX || type_ == kUnconditionalLink;
@@ -1172,7 +1247,7 @@
   }
 
   Register rn = ad.GetRegister();
-  if (IsHighRegister(rn) && rn != SP) {
+  if (IsHighRegister(rn) && rn != SP && rn != PC) {
     must_be_32bit = true;
   }
 
@@ -1180,87 +1255,132 @@
     must_be_32bit = true;
   }
 
-  int32_t offset = ad.GetOffset();
+  if (ad.IsImmediate()) {
+    // Immediate offset
+    int32_t offset = ad.GetOffset();
 
-  // The 16 bit SP relative instruction can only have a 10 bit offset.
-  if (rn == SP && offset > 1024) {
-    must_be_32bit = true;
-  }
-
-  if (byte) {
-    // 5 bit offset, no shift.
-    if (offset > 32) {
+    // The 16 bit SP relative instruction can only have a 10 bit offset.
+    if (rn == SP && offset >= (1 << 10)) {
       must_be_32bit = true;
     }
-  } else if (half) {
-    // 6 bit offset, shifted by 1.
-    if (offset > 64) {
-      must_be_32bit = true;
-    }
-  } else {
-    // 7 bit offset, shifted by 2.
-    if (offset > 128) {
-       must_be_32bit = true;
-     }
-  }
-
-  if (must_be_32bit) {
-    int32_t encoding = B31 | B30 | B29 | B28 | B27 |
-                  (load ? B20 : 0) |
-                  (is_signed ? B24 : 0) |
-                  static_cast<uint32_t>(rd) << 12 |
-                  ad.encodingThumb(2) |
-                  (byte ? 0 : half ? B21 : B22);
-    Emit32(encoding);
-  } else {
-    // 16 bit thumb1.
-    uint8_t opA = 0;
-    bool sp_relative = false;
 
     if (byte) {
-      opA = 0b0111;
+      // 5 bit offset, no shift.
+      if (offset >= (1 << 5)) {
+        must_be_32bit = true;
+      }
     } else if (half) {
-      opA = 0b1000;
+      // 6 bit offset, shifted by 1.
+      if (offset >= (1 << 6)) {
+        must_be_32bit = true;
+      }
     } else {
-      if (rn == SP) {
-        opA = 0b1001;
-        sp_relative = true;
-      } else {
-        opA = 0b0110;
+      // 7 bit offset, shifted by 2.
+      if (offset >= (1 << 7)) {
+        must_be_32bit = true;
       }
     }
-    int16_t encoding = opA << 12 |
-                (load ? B11 : 0);
 
-    CHECK_GE(offset, 0);
-    if (sp_relative) {
-      // SP relative, 10 bit offset.
-      CHECK_LT(offset, 1024);
-      CHECK_EQ((offset & 0b11), 0);
-      encoding |= rd << 8 | offset >> 2;
+    if (must_be_32bit) {
+      int32_t encoding = B31 | B30 | B29 | B28 | B27 |
+          (load ? B20 : 0) |
+          (is_signed ? B24 : 0) |
+          static_cast<uint32_t>(rd) << 12 |
+          ad.encodingThumb(true) |
+          (byte ? 0 : half ? B21 : B22);
+      Emit32(encoding);
     } else {
-      // No SP relative.  The offset is shifted right depending on
-      // the size of the load/store.
-      encoding |= static_cast<uint32_t>(rd);
+      // 16 bit thumb1.
+      uint8_t opA = 0;
+      bool sp_relative = false;
 
       if (byte) {
-        // 5 bit offset, no shift.
-        CHECK_LT(offset, 32);
+        opA = 0b0111;
       } else if (half) {
-        // 6 bit offset, shifted by 1.
-        CHECK_LT(offset, 64);
-        CHECK_EQ((offset & 0b1), 0);
-        offset >>= 1;
+        opA = 0b1000;
       } else {
-        // 7 bit offset, shifted by 2.
-        CHECK_LT(offset, 128);
-        CHECK_EQ((offset & 0b11), 0);
-        offset >>= 2;
+        if (rn == SP) {
+          opA = 0b1001;
+          sp_relative = true;
+        } else {
+          opA = 0b0110;
+        }
       }
-      encoding |= rn << 3 | offset  << 6;
-    }
+      int16_t encoding = opA << 12 |
+          (load ? B11 : 0);
 
-    Emit16(encoding);
+      CHECK_GE(offset, 0);
+      if (sp_relative) {
+        // SP relative, 10 bit offset.
+        CHECK_LT(offset, (1 << 10));
+        CHECK_EQ((offset & 0b11), 0);
+        encoding |= rd << 8 | offset >> 2;
+      } else {
+        // No SP relative.  The offset is shifted right depending on
+        // the size of the load/store.
+        encoding |= static_cast<uint32_t>(rd);
+
+        if (byte) {
+          // 5 bit offset, no shift.
+          CHECK_LT(offset, (1 << 5));
+        } else if (half) {
+          // 6 bit offset, shifted by 1.
+          CHECK_LT(offset, (1 << 6));
+          CHECK_EQ((offset & 0b1), 0);
+          offset >>= 1;
+        } else {
+          // 7 bit offset, shifted by 2.
+          CHECK_LT(offset, (1 << 7));
+          CHECK_EQ((offset & 0b11), 0);
+          offset >>= 2;
+        }
+        encoding |= rn << 3 | offset  << 6;
+      }
+
+      Emit16(encoding);
+    }
+  } else {
+    // Register shift.
+    if (ad.GetRegister() == PC) {
+       // PC relative literal encoding.
+      int32_t offset = ad.GetOffset();
+      if (must_be_32bit || offset < 0 || offset >= (1 << 10) || !load) {
+        int32_t up = B23;
+        if (offset < 0) {
+          offset = -offset;
+          up = 0;
+        }
+        CHECK_LT(offset, (1 << 12));
+        int32_t encoding = 0x1f << 27 | 0xf << 16 | B22 | (load ? B20 : 0) |
+            offset | up |
+            static_cast<uint32_t>(rd) << 12;
+        Emit32(encoding);
+      } else {
+        // 16 bit literal load.
+        CHECK_GE(offset, 0);
+        CHECK_LT(offset, (1 << 10));
+        int32_t encoding = B14 | (load ? B11 : 0) | static_cast<uint32_t>(rd) << 8 | offset >> 2;
+        Emit16(encoding);
+      }
+    } else {
+      if (ad.GetShiftCount() != 0) {
+        // If there is a shift count this must be 32 bit.
+        must_be_32bit = true;
+      } else if (IsHighRegister(ad.GetRegisterOffset())) {
+        must_be_32bit = true;
+      }
+
+      if (must_be_32bit) {
+        int32_t encoding = 0x1f << 27 | B22 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 |
+            ad.encodingThumb(true);
+        Emit32(encoding);
+      } else {
+        // 16 bit register offset.
+        int32_t encoding = B14 | B12 | (load ? B11 : 0) | static_cast<uint32_t>(rd) |
+            ad.encodingThumb(false);
+        Emit16(encoding);
+      }
+    }
   }
 }
 
@@ -2012,37 +2132,70 @@
 
 
 void Thumb2Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                          Condition cond) {
+                          bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
-  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+  CheckCondition(cond);
+  EmitShift(rd, rm, LSL, shift_imm, setcc);
 }
 
 
 void Thumb2Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                          Condition cond) {
+                          bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+  CheckCondition(cond);
+  EmitShift(rd, rm, LSR, shift_imm, setcc);
 }
 
 
 void Thumb2Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                          Condition cond) {
+                          bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
   if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+  CheckCondition(cond);
+  EmitShift(rd, rm, ASR, shift_imm, setcc);
 }
 
 
 void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                          Condition cond) {
+                          bool setcc, Condition cond) {
   CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
-  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+  CheckCondition(cond);
+  EmitShift(rd, rm, ROR, shift_imm, setcc);
 }
 
 
-void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond) {
-  mov(rd, ShifterOperand(rm, ROR, 0), cond);
+void Thumb2Assembler::Rrx(Register rd, Register rm, bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, RRX, rm, setcc);
+}
+
+
+void Thumb2Assembler::Lsl(Register rd, Register rm, Register rn,
+                          bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, LSL, rn, setcc);
+}
+
+
+void Thumb2Assembler::Lsr(Register rd, Register rm, Register rn,
+                          bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, LSR, rn, setcc);
+}
+
+
+void Thumb2Assembler::Asr(Register rd, Register rm, Register rn,
+                          bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, ASR, rn, setcc);
+}
+
+
+void Thumb2Assembler::Ror(Register rd, Register rm, Register rn,
+                          bool setcc, Condition cond) {
+  CheckCondition(cond);
+  EmitShift(rd, rm, ROR, rn, setcc);
 }
 
 
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 60b9384..5f24e4e 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -221,11 +221,25 @@
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
 
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Rrx(Register rd, Register rm, Condition cond = AL);
+  void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Lsr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Asr(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Ror(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Rrx(Register rd, Register rm, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+
+  void Lsl(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Lsr(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Asr(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
+  void Ror(Register rd, Register rm, Register rn, bool setcc = false,
+           Condition cond = AL) OVERRIDE;
 
   void Push(Register rd, Condition cond = AL) OVERRIDE;
   void Pop(Register rd, Condition cond = AL) OVERRIDE;
@@ -395,14 +409,8 @@
   static int DecodeBranchOffset(int32_t inst);
   int32_t EncodeTstOffset(int offset, int32_t inst);
   int DecodeTstOffset(int32_t inst);
-
-  bool IsLowRegister(Register r) {
-    return r < R8;
-  }
-
-  bool IsHighRegister(Register r) {
-     return r >= R8;
-  }
+  void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc = false);
+  void EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc = false);
 
   bool force_32bit_;      // Force the assembler to use 32 bit thumb2 instructions.
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 55fbed1..68cb656 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -28,6 +28,15 @@
 #include "assembler_thumb_test_expected.cc.inc"
 
 #ifndef HAVE_ANDROID_OS
+// This controls whether the results are printed to the
+// screen or compared against the expected output.
+// To generate new expected output, set this to true and
+// copy the output into the .cc.inc file in the form
+// of the other results.
+//
+// When this is false, the results are not printed to the
+// output, but are compared against the expected results
+// in the .cc.inc file.
 static constexpr bool kPrintResults = false;
 #endif
 
@@ -38,6 +47,19 @@
   }
 }
 
+int CompareIgnoringSpace(const char* s1, const char* s2) {
+  while (*s1 != '\0') {
+    while (isspace(*s1)) ++s1;
+    while (isspace(*s2)) ++s2;
+    if (*s1 == '\0' || *s1 != *s2) {
+      break;
+    }
+    ++s1;
+    ++s2;
+  }
+  return *s1 - *s2;
+}
+
 std::string GetAndroidToolsDir() {
   std::string root;
   const char* android_build_top = getenv("ANDROID_BUILD_TOP");
@@ -180,7 +202,10 @@
       if (s == nullptr) {
         break;
       }
-      ASSERT_EQ(strcmp(results->second[lineindex], testline), 0);
+      if (CompareIgnoringSpace(results->second[lineindex], testline) != 0) {
+        LOG(FATAL) << "Output is not as expected at line: " << lineindex
+          << results->second[lineindex] << "/" << testline;
+      }
       ++lineindex;
     }
     // Check that we are at the end.
@@ -1222,6 +1247,152 @@
   delete assembler;
 }
 
+TEST(Thumb2AssemblerTest, Shifts) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  // 16 bit
+  __ Lsl(R0, R1, 5);
+  __ Lsr(R0, R1, 5);
+  __ Asr(R0, R1, 5);
+
+  __ Lsl(R0, R0, R1);
+  __ Lsr(R0, R0, R1);
+  __ Asr(R0, R0, R1);
+
+  // 32 bit due to high registers.
+  __ Lsl(R8, R1, 5);
+  __ Lsr(R0, R8, 5);
+  __ Asr(R8, R1, 5);
+  __ Ror(R0, R8, 5);
+
+  // 32 bit due to different Rd and Rn.
+  __ Lsl(R0, R1, R2);
+  __ Lsr(R0, R1, R2);
+  __ Asr(R0, R1, R2);
+  __ Ror(R0, R1, R2);
+
+  // 32 bit due to use of high registers.
+  __ Lsl(R8, R1, R2);
+  __ Lsr(R0, R8, R2);
+  __ Asr(R0, R1, R8);
+
+  // S bit (all 32 bit)
+
+  // 32 bit due to high registers.
+  __ Lsl(R8, R1, 5, true);
+  __ Lsr(R0, R8, 5, true);
+  __ Asr(R8, R1, 5, true);
+  __ Ror(R0, R8, 5, true);
+
+  // 32 bit due to different Rd and Rn.
+  __ Lsl(R0, R1, R2, true);
+  __ Lsr(R0, R1, R2, true);
+  __ Asr(R0, R1, R2, true);
+  __ Ror(R0, R1, R2, true);
+
+  // 32 bit due to use of high registers.
+  __ Lsl(R8, R1, R2, true);
+  __ Lsr(R0, R8, R2, true);
+  __ Asr(R0, R1, R8, true);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "Shifts");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, LoadStoreRegOffset) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  // 16 bit.
+  __ ldr(R0, Address(R1, R2));
+  __ str(R0, Address(R1, R2));
+
+  // 32 bit due to shift.
+  __ ldr(R0, Address(R1, R2, LSL, 1));
+  __ str(R0, Address(R1, R2, LSL, 1));
+
+  __ ldr(R0, Address(R1, R2, LSL, 3));
+  __ str(R0, Address(R1, R2, LSL, 3));
+
+  // 32 bit due to high register use.
+  __ ldr(R8, Address(R1, R2));
+  __ str(R8, Address(R1, R2));
+
+  __ ldr(R1, Address(R8, R2));
+  __ str(R2, Address(R8, R2));
+
+  __ ldr(R0, Address(R1, R8));
+  __ str(R0, Address(R1, R8));
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadStoreRegOffset");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, LoadStoreLiteral) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ ldr(R0, Address(4));
+  __ str(R0, Address(4));
+
+  __ ldr(R0, Address(-8));
+  __ str(R0, Address(-8));
+
+  // Limits.
+  __ ldr(R0, Address(0x3ff));       // 10 bits (16 bit).
+  __ ldr(R0, Address(0x7ff));       // 11 bits (32 bit).
+  __ str(R0, Address(0x3ff));       // 32 bit (no 16 bit str(literal)).
+  __ str(R0, Address(0x7ff));       // 11 bits (32 bit).
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadStoreLiteral");
+  delete assembler;
+}
+
+TEST(Thumb2AssemblerTest, LoadStoreLimits) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  __ ldr(R0, Address(R4, 124));     // 16 bit.
+  __ ldr(R0, Address(R4, 128));     // 32 bit.
+
+  __ ldrb(R0, Address(R4, 31));     // 16 bit.
+  __ ldrb(R0, Address(R4, 32));     // 32 bit.
+
+  __ ldrh(R0, Address(R4, 62));     // 16 bit.
+  __ ldrh(R0, Address(R4, 64));     // 32 bit.
+
+  __ ldrsb(R0, Address(R4, 31));     // 32 bit.
+  __ ldrsb(R0, Address(R4, 32));     // 32 bit.
+
+  __ ldrsh(R0, Address(R4, 62));     // 32 bit.
+  __ ldrsh(R0, Address(R4, 64));     // 32 bit.
+
+  __ str(R0, Address(R4, 124));     // 16 bit.
+  __ str(R0, Address(R4, 128));     // 32 bit.
+
+  __ strb(R0, Address(R4, 31));     // 16 bit.
+  __ strb(R0, Address(R4, 32));     // 32 bit.
+
+  __ strh(R0, Address(R4, 62));     // 16 bit.
+  __ strh(R0, Address(R4, 64));     // 32 bit.
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "LoadStoreLimits");
+  delete assembler;
+}
+
 #undef __
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index c5f2226..3943e37 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -4742,6 +4742,82 @@
   " 80a:	0011      	movs	r1, r2\n",
   nullptr
 };
+const char* ShiftsResults[] = {
+  "   0:	0148      	lsls	r0, r1, #5\n",
+  "   2:	0948      	lsrs	r0, r1, #5\n",
+  "   4:	1148      	asrs	r0, r1, #5\n",
+  "   6:	4088      	lsls	r0, r1\n",
+  "   8:	40c8      	lsrs	r0, r1\n",
+  "   a:	4108      	asrs	r0, r1\n",
+  "   c:	ea4f 1841 	mov.w	r8, r1, lsl #5\n",
+  "  10:	ea4f 1058 	mov.w	r0, r8, lsr #5\n",
+  "  14:	ea4f 1861 	mov.w	r8, r1, asr #5\n",
+  "  18:	ea4f 1078 	mov.w	r0, r8, ror #5\n",
+  "  1c:	fa01 f002 	lsl.w	r0, r1, r2\n",
+  "  20:	fa21 f002 	lsr.w	r0, r1, r2\n",
+  "  24:	fa41 f002 	asr.w	r0, r1, r2\n",
+  "  28:	fa61 f002 	ror.w	r0, r1, r2\n",
+  "  2c:	fa01 f802 	lsl.w	r8, r1, r2\n",
+  "  30:	fa28 f002 	lsr.w	r0, r8, r2\n",
+  "  34:	fa41 f008 	asr.w	r0, r1, r8\n",
+  "  38:	ea5f 1841 	movs.w	r8, r1, lsl #5\n",
+  "  3c:	ea5f 1058 	movs.w	r0, r8, lsr #5\n",
+  "  40:	ea5f 1861 	movs.w	r8, r1, asr #5\n",
+  "  44:	ea5f 1078 	movs.w	r0, r8, ror #5\n",
+  "  48:	fa11 f002 	lsls.w	r0, r1, r2\n",
+  "  4c:	fa31 f002 	lsrs.w	r0, r1, r2\n",
+  "  50:	fa51 f002 	asrs.w	r0, r1, r2\n",
+  "  54:	fa71 f002 	rors.w	r0, r1, r2\n",
+  "  58:	fa11 f802 	lsls.w	r8, r1, r2\n",
+  "  5c:	fa38 f002 	lsrs.w	r0, r8, r2\n",
+  "  60:	fa51 f008 	asrs.w	r0, r1, r8\n",
+  nullptr
+};
+const char* LoadStoreRegOffsetResults[] = {
+  "   0:	5888      	ldr	r0, [r1, r2]\n",
+  "   2:	5088      	str	r0, [r1, r2]\n",
+  "   4:	f851 0012 	ldr.w	r0, [r1, r2, lsl #1]\n",
+  "   8:	f841 0012 	str.w	r0, [r1, r2, lsl #1]\n",
+  "   c:	f851 0032 	ldr.w	r0, [r1, r2, lsl #3]\n",
+  "  10:	f841 0032 	str.w	r0, [r1, r2, lsl #3]\n",
+  "  14:	f851 8002 	ldr.w	r8, [r1, r2]\n",
+  "  18:	f841 8002 	str.w	r8, [r1, r2]\n",
+  "  1c:	f858 1002 	ldr.w	r1, [r8, r2]\n",
+  "  20:	f848 2002 	str.w	r2, [r8, r2]\n",
+  "  24:	f851 0008 	ldr.w	r0, [r1, r8]\n",
+  "  28:	f841 0008 	str.w	r0, [r1, r8]\n",
+  nullptr
+};
+const char* LoadStoreLiteralResults[] = {
+  "   0:   4801            ldr     r0, [pc, #4]    ; (8 <LoadStoreLiteral+0x8>)\n",
+  "   2:   f8cf 0004       str.w   r0, [pc, #4]    ; 8 <LoadStoreLiteral+0x8>\n",
+  "   6:   f85f 0008       ldr.w   r0, [pc, #-8]   ; 0 <LoadStoreLiteral>\n",
+  "   a:   f84f 0008       str.w   r0, [pc, #-8]   ; 4 <LoadStoreLiteral+0x4>\n",
+  "   e:   48ff            ldr     r0, [pc, #1020] ; (40c <LoadStoreLiteral+0x40c>)\n",
+  "  10:   f8df 07ff       ldr.w   r0, [pc, #2047] ; 813 <LoadStoreLiteral+0x813>\n",
+  "  14:   f8cf 03ff       str.w   r0, [pc, #1023] ; 417 <LoadStoreLiteral+0x417>\n",
+  "  18:   f8cf 07ff       str.w   r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n",
+  nullptr
+};
+const char* LoadStoreLimitsResults[] = {
+  "   0:   6fe0            ldr     r0, [r4, #124]  ; 0x7c\n",
+  "   2:   f8d4 0080       ldr.w   r0, [r4, #128]  ; 0x80\n",
+  "   6:   7fe0            ldrb    r0, [r4, #31]\n",
+  "   8:   f894 0020       ldrb.w  r0, [r4, #32]\n",
+  "   c:   8fe0            ldrh    r0, [r4, #62]   ; 0x3e\n",
+  "   e:   f8b4 0040       ldrh.w  r0, [r4, #64]   ; 0x40\n",
+  "  12:   f994 001f       ldrsb.w r0, [r4, #31]\n",
+  "  16:   f994 0020       ldrsb.w r0, [r4, #32]\n",
+  "  1a:   f9b4 003e       ldrsh.w r0, [r4, #62]   ; 0x3e\n",
+  "  1e:   f9b4 0040       ldrsh.w r0, [r4, #64]   ; 0x40\n",
+  "  22:   67e0            str     r0, [r4, #124]  ; 0x7c\n",
+  "  24:   f8c4 0080       str.w   r0, [r4, #128]  ; 0x80\n",
+  "  28:   77e0            strb    r0, [r4, #31]\n",
+  "  2a:   f884 0020       strb.w  r0, [r4, #32]\n",
+  "  2e:   87e0            strh    r0, [r4, #62]   ; 0x3e\n",
+  "  30:   f8a4 0040       strh.w  r0, [r4, #64]   ; 0x40\n",
+  nullptr
+};
 std::map<std::string, const char**> test_results;
 void setup_results() {
     test_results["SimpleMov"] = SimpleMovResults;
@@ -4785,4 +4861,8 @@
     test_results["CompareAndBranchRelocation16"] = CompareAndBranchRelocation16Results;
     test_results["CompareAndBranchRelocation32"] = CompareAndBranchRelocation32Results;
     test_results["MixedBranch32"] = MixedBranch32Results;
+    test_results["Shifts"] = ShiftsResults;
+    test_results["LoadStoreRegOffset"] = LoadStoreRegOffsetResults;
+    test_results["LoadStoreLiteral"] = LoadStoreLiteralResults;
+    test_results["LoadStoreLimits"] = LoadStoreLimitsResults;
 }
diff --git a/compiler/utils/scoped_arena_containers.h b/compiler/utils/scoped_arena_containers.h
index 5deb661..6728565 100644
--- a/compiler/utils/scoped_arena_containers.h
+++ b/compiler/utils/scoped_arena_containers.h
@@ -17,8 +17,10 @@
 #ifndef ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
 #define ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
 
-#include <vector>
+#include <deque>
+#include <queue>
 #include <set>
+#include <vector>
 
 #include "utils/scoped_arena_allocator.h"
 #include "safe_map.h"
@@ -26,6 +28,12 @@
 namespace art {
 
 template <typename T>
+using ScopedArenaDeque = std::deque<T, ScopedArenaAllocatorAdapter<T>>;
+
+template <typename T>
+using ScopedArenaQueue = std::queue<T, ScopedArenaDeque<T>>;
+
+template <typename T>
 using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T>>;
 
 template <typename T, typename Comparator = std::less<T>>
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 41d1529..4d5d613 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -949,6 +949,14 @@
 }
 
 
+void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // andq only supports 32b immediate.
+  EmitRex64(reg);
+  EmitComplex(4, Operand(reg), imm);
+}
+
+
 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -972,6 +980,14 @@
 }
 
 
+void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x33);
+  EmitOperand(dst.LowBits(), Operand(src));
+}
+
+
 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 9aa5a54..7514854 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -391,12 +391,14 @@
 
   void andl(CpuRegister dst, const Immediate& imm);
   void andl(CpuRegister dst, CpuRegister src);
+  void andq(CpuRegister dst, const Immediate& imm);
 
   void orl(CpuRegister dst, const Immediate& imm);
   void orl(CpuRegister dst, CpuRegister src);
 
   void xorl(CpuRegister dst, CpuRegister src);
   void xorq(CpuRegister dst, const Immediate& imm);
+  void xorq(CpuRegister dst, CpuRegister src);
 
   void addl(CpuRegister dst, CpuRegister src);
   void addl(CpuRegister reg, const Immediate& imm);
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 5d838c0..a06b5c5 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -16,6 +16,8 @@
 
 LOCAL_PATH := $(call my-dir)
 
+include art/build/Android.common.mk
+
 dalvikvm_cflags := -Wall -Werror -Wextra -std=gnu++11
 
 include $(CLEAR_VARS)
@@ -27,6 +29,7 @@
 LOCAL_C_INCLUDES := art/runtime
 LOCAL_SHARED_LIBRARIES := libdl libnativehelper
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.common.mk
 LOCAL_MULTILIB := both
 LOCAL_MODULE_STEM_32 := dalvikvm32
 LOCAL_MODULE_STEM_64 := dalvikvm64
@@ -37,6 +40,10 @@
 include  $(BUILD_SYSTEM)/executable_prefer_symlink.mk
 
 ART_TARGET_EXECUTABLES += $(TARGET_OUT_EXECUTABLES)/$(LOCAL_MODULE)
+ART_TARGET_EXECUTABLES += $(TARGET_OUT_EXECUTABLES)/$(LOCAL_MODULE)$(ART_PHONY_TEST_TARGET_SUFFIX)
+ifdef 2ND_ART_PHONY_TEST_TARGET_SUFFIX
+  ART_TARGET_EXECUTABLES += $(TARGET_OUT_EXECUTABLES)/$(LOCAL_MODULE)$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
+endif
 
 include $(CLEAR_VARS)
 LOCAL_MODULE := dalvikvm
@@ -48,7 +55,8 @@
 LOCAL_C_INCLUDES := art/runtime
 LOCAL_SHARED_LIBRARIES := libnativehelper
 LOCAL_LDFLAGS := -ldl -lpthread
-LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.common.mk
 LOCAL_IS_HOST_MODULE := true
 LOCAL_MULTILIB := both
 LOCAL_MODULE_STEM_32 := dalvikvm32
@@ -56,7 +64,11 @@
 include external/libcxx/libcxx.mk
 include $(BUILD_HOST_EXECUTABLE)
 
-ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)32
-ifneq ($(HOST_PREFER_32_BIT),true)
-  ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)64
-endif
\ No newline at end of file
+# Create symlink for the primary version target.
+include  $(BUILD_SYSTEM)/executable_prefer_symlink.mk
+
+ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
+ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)$(ART_PHONY_TEST_HOST_SUFFIX)
+ifdef 2ND_ART_PHONY_TEST_HOST_SUFFIX
+  ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)$(2ND_ART_PHONY_TEST_HOST_SUFFIX)
+endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 2d55140..3387f91 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -478,11 +478,8 @@
         continue;
       }
       std::string error_msg;
-      const DexFile* dex_file = DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg);
-      if (dex_file == nullptr) {
+      if (!DexFile::Open(parsed[i].c_str(), parsed[i].c_str(), &error_msg, &dex_files)) {
         LOG(WARNING) << "Failed to open dex file '" << parsed[i] << "': " << error_msg;
-      } else {
-        dex_files.push_back(dex_file);
       }
     }
   }
@@ -536,12 +533,9 @@
       LOG(WARNING) << "Skipping non-existent dex file '" << dex_filename << "'";
       continue;
     }
-    const DexFile* dex_file = DexFile::Open(dex_filename, dex_location, &error_msg);
-    if (dex_file == nullptr) {
+    if (!DexFile::Open(dex_filename, dex_location, &error_msg, &dex_files)) {
       LOG(WARNING) << "Failed to open .dex from file '" << dex_filename << "': " << error_msg;
       ++failure_count;
-    } else {
-      dex_files.push_back(dex_file);
     }
     ATRACE_END();
   }
@@ -881,6 +875,8 @@
       watch_dog_enabled = false;
     } else if (option == "--gen-gdb-info") {
       generate_gdb_information = true;
+      // Debug symbols are needed for gdb information.
+      include_debug_symbols = true;
     } else if (option == "--no-gen-gdb-info") {
       generate_gdb_information = false;
     } else if (option.starts_with("-j")) {
@@ -1002,7 +998,7 @@
     } else if (option == "--no-profile-file") {
       // No profile
     } else if (option.starts_with("--top-k-profile-threshold=")) {
-      ParseDouble(option.data(), '=', 10.0, 90.0, &top_k_profile_threshold);
+      ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold);
     } else if (option == "--print-pass-names") {
       PassDriverMEOpts::PrintPassNames();
     } else if (option.starts_with("--disable-passes=")) {
@@ -1136,8 +1132,8 @@
   }
 
   if (compiler_filter_string == nullptr) {
-    if (instruction_set == kMips) {
-      // TODO: fix compiler for Mips.
+    if (instruction_set == kMips64) {
+      // TODO: fix compiler for Mips64.
       compiler_filter_string = "interpret-only";
     } else if (image) {
       compiler_filter_string = "speed";
@@ -1170,7 +1166,6 @@
   CheckExplicitCheckOptions(instruction_set, &explicit_null_checks, &explicit_so_checks,
                             &explicit_suspend_checks);
 
-  LOG(INFO) << "init compiler options for explicit null: " << explicit_null_checks;
   CompilerOptions compiler_options(compiler_filter,
                                    huge_method_threshold,
                                    large_method_threshold,
@@ -1319,13 +1314,11 @@
             << error_msg;
         return EXIT_FAILURE;
       }
-      const DexFile* dex_file = DexFile::Open(*zip_archive.get(), zip_location, &error_msg);
-      if (dex_file == nullptr) {
+      if (!DexFile::OpenFromZip(*zip_archive.get(), zip_location, &error_msg, &dex_files)) {
         LOG(ERROR) << "Failed to open dex from file descriptor for zip file '" << zip_location
             << "': " << error_msg;
         return EXIT_FAILURE;
       }
-      dex_files.push_back(dex_file);
       ATRACE_END();
     } else {
       size_t failure_count = OpenDexFiles(dex_filenames, dex_locations, dex_files);
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 135a5c6..e6cbf05 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -21,6 +21,7 @@
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "thread.h"
+#include <inttypes.h>
 
 namespace art {
 namespace x86 {
@@ -923,6 +924,14 @@
   case 0x99:
     opcode << "cdq";
     break;
+  case 0x9B:
+    if (instr[1] == 0xDF && instr[2] == 0xE0) {
+      opcode << "fstsw\tax";
+      instr += 2;
+    } else {
+      opcode << StringPrintf("unknown opcode '%02X'", *instr);
+    }
+    break;
   case 0xAF:
     opcode << (prefix[2] == 0x66 ? "scasw" : "scasl");
     break;
@@ -933,6 +942,12 @@
     reg_in_opcode = true;
     break;
   case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF:
+    if (rex == 0x48) {
+      opcode << "movabsq";
+      immediate_bytes = 8;
+      reg_in_opcode = true;
+      break;
+    }
     opcode << "mov";
     immediate_bytes = 4;
     reg_in_opcode = true;
@@ -969,11 +984,25 @@
     break;
   case 0xCC: opcode << "int 3"; break;
   case 0xD9:
-    static const char* d9_opcodes[] = {"flds", "unknown-d9", "fsts", "fstps", "fldenv", "fldcw", "fnstenv", "fnstcw"};
-    modrm_opcodes = d9_opcodes;
-    store = true;
-    has_modrm = true;
-    reg_is_opcode = true;
+    if (instr[1] == 0xF8) {
+      opcode << "fprem";
+      instr++;
+    } else {
+      static const char* d9_opcodes[] = {"flds", "unknown-d9", "fsts", "fstps", "fldenv", "fldcw",
+                                         "fnstenv", "fnstcw"};
+      modrm_opcodes = d9_opcodes;
+      store = true;
+      has_modrm = true;
+      reg_is_opcode = true;
+    }
+    break;
+  case 0xDA:
+    if (instr[1] == 0xE9) {
+      opcode << "fucompp";
+      instr++;
+    } else {
+      opcode << StringPrintf("unknown opcode '%02X'", *instr);
+    }
     break;
   case 0xDB:
     static const char* db_opcodes[] = {"fildl", "unknown-db", "unknown-db", "unknown-db", "unknown-db", "unknown-db", "unknown-db", "unknown-db"};
@@ -1010,11 +1039,18 @@
     immediate_bytes = ((instr[1] & 0x38) == 0) ? 1 : 0;
     break;
   case 0xFF:
-    static const char* ff_opcodes[] = {"inc", "dec", "call", "call", "jmp", "jmp", "push", "unknown-ff"};
-    modrm_opcodes = ff_opcodes;
-    has_modrm = true;
-    reg_is_opcode = true;
-    load = true;
+    {
+      static const char* ff_opcodes[] = {"inc", "dec", "call", "call", "jmp", "jmp", "push", "unknown-ff"};
+      modrm_opcodes = ff_opcodes;
+      has_modrm = true;
+      reg_is_opcode = true;
+      load = true;
+      const uint8_t opcode_digit = (instr[1] >> 3) & 7;
+      // 'call', 'jmp' and 'push' are target specific instructions
+      if (opcode_digit == 2 || opcode_digit == 4 || opcode_digit == 6) {
+        target_specific = true;
+      }
+    }
     break;
   default:
     opcode << StringPrintf("unknown opcode '%02X'", *instr);
@@ -1024,10 +1060,10 @@
   // We force the REX prefix to be available for 64-bit target
   // in order to dump addr (base/index) registers correctly.
   uint8_t rex64 = supports_rex_ ? (rex | 0x40) : rex;
+  // REX.W should be forced for 64-target and target-specific instructions (i.e., push or pop).
+  uint8_t rex_w = (supports_rex_ && target_specific) ? (rex | 0x48) : rex;
   if (reg_in_opcode) {
     DCHECK(!has_modrm);
-    // REX.W should be forced for 64-target and target-specific instructions (i.e., push or pop).
-    uint8_t rex_w = (supports_rex_ && target_specific) ? (rex | 0x48) : rex;
     DumpOpcodeReg(args, rex_w, *instr & 0x7);
   }
   instr++;
@@ -1088,7 +1124,7 @@
     } else {
       if (mod == 3) {
         if (!no_ops) {
-          DumpRmReg(address, rex, rm, byte_operand, prefix[2], load ? src_reg_file : dst_reg_file);
+          DumpRmReg(address, rex_w, rm, byte_operand, prefix[2], load ? src_reg_file : dst_reg_file);
         }
       } else {
         address << "[";
@@ -1149,8 +1185,7 @@
     if (immediate_bytes == 1) {
       args << StringPrintf("%d", *reinterpret_cast<const int8_t*>(instr));
       instr++;
-    } else {
-      CHECK_EQ(immediate_bytes, 4u);
+    } else if (immediate_bytes == 4) {
       if (prefix[2] == 0x66) {  // Operand size override from 32-bit to 16-bit.
         args << StringPrintf("%d", *reinterpret_cast<const int16_t*>(instr));
         instr += 2;
@@ -1158,6 +1193,10 @@
         args << StringPrintf("%d", *reinterpret_cast<const int32_t*>(instr));
         instr += 4;
       }
+    } else {
+      CHECK_EQ(immediate_bytes, 8u);
+      args << StringPrintf("%" PRId64, *reinterpret_cast<const int64_t*>(instr));
+      instr += 8;
     }
   } else if (branch_bytes > 0) {
     DCHECK(!has_modrm);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index c6ac9ca..7f5cf0c 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -197,7 +197,7 @@
 LIBART_GCC_ONLY_SRC_FILES := \
 	interpreter/interpreter_goto_table_impl.cc
 
-LIBART_TARGET_LDFLAGS := -Wl,--no-fatal-warnings
+LIBART_TARGET_LDFLAGS :=
 LIBART_HOST_LDFLAGS :=
 
 LIBART_TARGET_SRC_FILES := \
@@ -223,6 +223,7 @@
 	arch/arm64/context_arm64.cc \
 	arch/arm64/entrypoints_init_arm64.cc \
 	arch/arm64/jni_entrypoints_arm64.S \
+	arch/arm64/memcmp16_arm64.S \
 	arch/arm64/portable_entrypoints_arm64.S \
 	arch/arm64/quick_entrypoints_arm64.S \
 	arch/arm64/thread_arm64.cc \
@@ -485,4 +486,4 @@
 LIBART_HOST_SRC_FILES_64 :=
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES :=
 LIBART_CFLAGS :=
-build-libart :=
\ No newline at end of file
+build-libart :=
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index ebceb63..3fa09cb 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -46,9 +46,6 @@
 extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*);
 extern "C" void* art_quick_resolve_string(void*, uint32_t);
 
-// Exception entrypoints.
-extern "C" void* GetAndClearException(Thread*);
-
 // Field entrypoints.
 extern "C" int art_quick_set32_instance(uint32_t, void*, int32_t);
 extern "C" int art_quick_set32_static(uint32_t, int32_t);
@@ -116,7 +113,6 @@
 extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
 
 // Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
 extern "C" void art_quick_test_suspend();
 
 // Throw entrypoints.
@@ -226,7 +222,6 @@
   qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
 
   // Thread
-  qpoints->pCheckSuspend = CheckSuspendFromCode;
   qpoints->pTestSuspend = art_quick_test_suspend;
 
   // Throws
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 83a683d..4939610 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -127,7 +127,7 @@
 
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
 #endif
 .endm
 
@@ -1007,7 +1007,92 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_resolution_trampoline
 
-UNIMPLEMENTED art_quick_generic_jni_trampoline
+    /*
+     * Called to do a generic JNI down-call
+     */
+ENTRY art_quick_generic_jni_trampoline
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    str r0, [sp, #0]  // Store native ArtMethod* to bottom of stack.
+
+    // Save rSELF
+    mov r11, rSELF
+    // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
+    mov r10, sp
+    .cfi_def_cfa_register r10
+
+    sub sp, sp, #5120
+
+    // prepare for artQuickGenericJniTrampoline call
+    // (Thread*,  SP)
+    //    r0      r1   <= C calling convention
+    //  rSELF     r10  <= where they are
+
+    mov r0, rSELF   // Thread*
+    mov r1, r10
+    blx artQuickGenericJniTrampoline  // (Thread*, sp)
+
+    // The C call will have registered the complete save-frame on success.
+    // The result of the call is:
+    // r0: pointer to native code, 0 on error.
+    // r1: pointer to the bottom of the used area of the alloca, can restore stack till there.
+
+    // Check for error = 0.
+    cbz r0, .Lentry_error
+
+    // Release part of the alloca.
+    mov sp, r1
+
+    // Save the code pointer
+    mov r12, r0
+
+    // Load parameters from frame into registers.
+    pop {r0-r3}
+
+    // Softfloat.
+    // TODO: Change to hardfloat when supported.
+
+    blx r12           // native call.
+
+    // result sign extension is handled in C code
+    // prepare for artQuickGenericJniEndTrampoline call
+    // (Thread*, result, result_f)
+    //    r0      r1,r2    r3,stack       <= C calling convention
+    //    r11     r0,r1    r0,r1          <= where they are
+    sub sp, sp, #12 // Stack alignment.
+
+    push {r1}
+    mov r3, r0
+    mov r2, r1
+    mov r1, r0
+    mov r0, r11
+
+    blx artQuickGenericJniEndTrampoline
+
+    // Tear down the alloca.
+    mov sp, r10
+    .cfi_def_cfa_register sp
+
+    // Restore self pointer.
+    mov r9, r11
+
+    // Pending exceptions possible.
+    ldr r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
+    cbnz r2, .Lexception_in_native
+
+    // Tear down the callee-save frame.
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+    bx lr      // ret
+
+.Lentry_error:
+    mov sp, r10
+    .cfi_def_cfa_register sp
+    mov r9, r11
+.Lexception_in_native:
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    DELIVER_PENDING_EXCEPTION
+
+END art_quick_generic_jni_trampoline
 
     .extern artQuickToInterpreterBridge
 ENTRY art_quick_to_interpreter_bridge
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index cbb2c27..c19b79e 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -35,7 +35,7 @@
 extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
 
 // Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
+extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 extern "C" void art_quick_check_cast(void*, void*);
 
@@ -45,9 +45,6 @@
 extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*);
 extern "C" void* art_quick_resolve_string(void*, uint32_t);
 
-// Exception entrypoints.
-extern "C" void* GetAndClearException(Thread*);
-
 // Field entrypoints.
 extern "C" int art_quick_set32_instance(uint32_t, void*, int32_t);
 extern "C" int art_quick_set32_static(uint32_t, int32_t);
@@ -96,7 +93,6 @@
 extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
 
 // Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
 extern "C" void art_quick_test_suspend();
 
 // Throw entrypoints.
@@ -129,7 +125,7 @@
   ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
+  qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -209,7 +205,6 @@
   qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
 
   // Thread
-  qpoints->pCheckSuspend = CheckSuspendFromCode;
   qpoints->pTestSuspend = art_quick_test_suspend;
 
   // Throws
diff --git a/runtime/arch/arm64/memcmp.S b/runtime/arch/arm64/memcmp.S
deleted file mode 100644
index 3d08ecd..0000000
--- a/runtime/arch/arm64/memcmp.S
+++ /dev/null
@@ -1,155 +0,0 @@
-/* Copyright (c) 2014, Linaro Limited
-   All rights reserved.
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are met:
-       * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in the
-         documentation and/or other materials provided with the distribution.
-       * Neither the name of the Linaro nor the
-         names of its contributors may be used to endorse or promote products
-         derived from this software without specific prior written permission.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
-#include <private/bionic_asm.h>
-
-/* Parameters and result.  */
-#define src1		x0
-#define src2		x1
-#define limit		x2
-#define result		x0
-
-/* Internal variables.  */
-#define data1		x3
-#define data1w		w3
-#define data2		x4
-#define data2w		w4
-#define has_nul		x5
-#define diff		x6
-#define endloop		x7
-#define tmp1		x8
-#define tmp2		x9
-#define tmp3		x10
-#define pos		x11
-#define limit_wd	x12
-#define mask		x13
-
-ENTRY(memcmp)
-	cbz	limit, .Lret0
-	eor	tmp1, src1, src2
-	tst	tmp1, #7
-	b.ne	.Lmisaligned8
-	ands	tmp1, src1, #7
-	b.ne	.Lmutual_align
-	add	limit_wd, limit, #7
-	lsr	limit_wd, limit_wd, #3
-	/* Start of performance-critical section  -- one 64B cache line.  */
-.Lloop_aligned:
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-.Lstart_realigned:
-	subs	limit_wd, limit_wd, #1
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, ne	/* Last Dword or differences.  */
-	cbz	endloop, .Lloop_aligned
-	/* End of performance-critical section  -- one 64B cache line.  */
-
-	/* Not reached the limit, must have found a diff.  */
-	cbnz	limit_wd, .Lnot_limit
-
-	/* Limit % 8 == 0 => all bytes significant.  */
-	ands	limit, limit, #7
-	b.eq	.Lnot_limit
-
-	lsl	limit, limit, #3	/* Bits -> bytes.  */
-	mov	mask, #~0
-#ifdef __AARCH64EB__
-	lsr	mask, mask, limit
-#else
-	lsl	mask, mask, limit
-#endif
-	bic	data1, data1, mask
-	bic	data2, data2, mask
-
-	orr	diff, diff, mask
-.Lnot_limit:
-
-#ifndef	__AARCH64EB__
-	rev	diff, diff
-	rev	data1, data1
-	rev	data2, data2
-#endif
-	/* The MS-non-zero bit of DIFF marks either the first bit
-	   that is different, or the end of the significant data.
-	   Shifting left now will bring the critical information into the
-	   top bits.  */
-	clz	pos, diff
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/* But we need to zero-extend (char is unsigned) the value and then
-	   perform a signed 32-bit subtraction.  */
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-
-.Lmutual_align:
-	/* Sources are mutually aligned, but are not currently at an
-	   alignment boundary.  Round down the addresses and then mask off
-	   the bytes that precede the start point.  */
-	bic	src1, src1, #7
-	bic	src2, src2, #7
-	add	limit, limit, tmp1	/* Adjust the limit for the extra.  */
-	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
-	ldr	data1, [src1], #8
-	neg	tmp1, tmp1		/* Bits to alignment -64.  */
-	ldr	data2, [src2], #8
-	mov	tmp2, #~0
-#ifdef __AARCH64EB__
-	/* Big-endian.  Early bytes are at MSB.  */
-	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
-#else
-	/* Little-endian.  Early bytes are at LSB.  */
-	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
-#endif
-	add	limit_wd, limit, #7
-	orr	data1, data1, tmp2
-	orr	data2, data2, tmp2
-	lsr	limit_wd, limit_wd, #3
-	b	.Lstart_realigned
-
-.Lret0:
-	mov	result, #0
-	ret
-
-	.p2align 6
-.Lmisaligned8:
-	sub	limit, limit, #1
-1:
-	/* Perhaps we can do better than this.  */
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	limit, limit, #1
-	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
-	b.eq	1b
-	sub	result, data1, data2
-	ret
-END(memcmp)
diff --git a/runtime/arch/arm64/memcmp16_arm64.S b/runtime/arch/arm64/memcmp16_arm64.S
new file mode 100644
index 0000000..582940a
--- /dev/null
+++ b/runtime/arch/arm64/memcmp16_arm64.S
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
+#define ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
+
+#include "asm_support_arm64.S"
+
+/* Parameters and result.  */
+#define src1        x0
+#define src2        x1
+#define limit       x2
+#define result      x0
+
+/* Internal variables.  */
+#define data1       x3
+#define data1w      w3
+#define data2       x4
+#define data2w      w4
+#define has_nul     x5
+#define diff        x6
+#define endloop     x7
+#define tmp1        x8
+#define tmp2        x9
+#define tmp3        x10
+#define limit_wd    x12
+#define mask        x13
+
+// WARNING: If you change this code to use x14 and x15, you must also change
+//          art_quick_string_compareto, which relies on these temps being unused.
+
+ENTRY __memcmp16
+  cbz     limit, .Lret0
+  lsl     limit, limit, #1  /* Half-words to bytes.  */
+  eor     tmp1, src1, src2
+  tst     tmp1, #7
+  b.ne    .Lmisaligned8
+  ands    tmp1, src1, #7
+  b.ne    .Lmutual_align
+  add     limit_wd, limit, #7
+  lsr     limit_wd, limit_wd, #3
+  /* Start of performance-critical section  -- one 64B cache line.  */
+.Lloop_aligned:
+  ldr     data1, [src1], #8
+  ldr     data2, [src2], #8
+.Lstart_realigned:
+  subs    limit_wd, limit_wd, #1
+  eor     diff, data1, data2  /* Non-zero if differences found.  */
+  csinv   endloop, diff, xzr, ne  /* Last Dword or differences.  */
+  cbz     endloop, .Lloop_aligned
+  /* End of performance-critical section  -- one 64B cache line.  */
+
+  /* Not reached the limit, must have found a diff.  */
+  cbnz    limit_wd, .Lnot_limit
+
+  /* Limit % 8 == 0 => all bytes significant.  */
+  ands    limit, limit, #7
+  b.eq    .Lnot_limit
+
+  lsl     limit, limit, #3  /* Bits -> bytes.  */
+  mov     mask, #~0
+  lsl     mask, mask, limit
+  bic     data1, data1, mask
+  bic     data2, data2, mask
+
+.Lnot_limit:
+
+  // Swap the byte order of diff. Exact reverse is not important, as we only need to detect
+  // the half-word.
+  rev     diff, diff
+  // The most significant bit of DIFF marks the least significant bit of change between DATA1/2
+  clz     diff, diff
+  // Mask off 0xF to have shift amount. Why does ARM64 not have BIC with immediate?!?!
+  bfi     diff, xzr, #0, #4
+  // Create a 16b mask
+  mov     mask, #0xFFFF
+  // Shift to the right half-word.
+  lsr     data1, data1, diff
+  lsr     data2, data2, diff
+  // Mask the lowest half-word.
+  and     data1, data1, mask
+  and     data2, data2, mask
+  // Compute difference.
+  sub     result, data1, data2
+  ret
+
+.Lmutual_align:
+  /* Sources are mutually aligned, but are not currently at an
+     alignment boundary.  Round down the addresses and then mask off
+     the bytes that precede the start point.  */
+  bic     src1, src1, #7
+  bic     src2, src2, #7
+  add     limit, limit, tmp1  /* Adjust the limit for the extra.  */
+  lsl     tmp1, tmp1, #3    /* Bytes beyond alignment -> bits.  */
+  ldr     data1, [src1], #8
+  neg     tmp1, tmp1    /* Bits to alignment -64.  */
+  ldr     data2, [src2], #8
+  mov     tmp2, #~0
+  /* Little-endian.  Early bytes are at LSB.  */
+  lsr     tmp2, tmp2, tmp1  /* Shift (tmp1 & 63).  */
+  add     limit_wd, limit, #7
+  orr     data1, data1, tmp2
+  orr     data2, data2, tmp2
+  lsr     limit_wd, limit_wd, #3
+  b       .Lstart_realigned
+
+.Lret0:
+  mov     result, #0
+  ret
+
+  .p2align 6
+.Lmisaligned8:
+  sub     limit, limit, #1
+1:
+  /* Perhaps we can do better than this.  */
+  ldrh    data1w, [src1], #2
+  ldrh    data2w, [src2], #2
+  subs    limit, limit, #2
+  ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
+  b.eq    1b
+  sub     result, data1, data2
+  ret
+END __memcmp16
+
+#endif  // ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 2e60b93..7907b6e 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1485,33 +1485,34 @@
     mov x1, xFP
     bl artQuickGenericJniTrampoline  // (Thread*, sp)
 
-    // Get the updated pointer. This is the bottom of the frame _with_ handle scope.
-    ldr xFP, [sp]
-    add x9, sp, #8
+    // The C call will have registered the complete save-frame on success.
+    // The result of the call is:
+    // x0: pointer to native code, 0 on error.
+    // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
 
-    cmp x0, #0
-    b.mi .Lentry_error      // Check for error, negative value.
+    // Check for error = 0.
+    cbz x0, .Lentry_error
 
-    // release part of the alloca.
-    add x9, x9, x0
+    // Release part of the alloca.
+    mov sp, x1
 
-    // Get the code pointer
-    ldr xIP0, [x9, #0]
+    // Save the code pointer
+    mov xIP0, x0
 
     // Load parameters from frame into registers.
     // TODO Check with artQuickGenericJniTrampoline.
     //      Also, check again APPCS64 - the stack arguments are interleaved.
-    ldp x0, x1, [x9, #8]
-    ldp x2, x3, [x9, #24]
-    ldp x4, x5, [x9, #40]
-    ldp x6, x7, [x9, #56]
+    ldp x0, x1, [sp]
+    ldp x2, x3, [sp, #16]
+    ldp x4, x5, [sp, #32]
+    ldp x6, x7, [sp, #48]
 
-    ldp d0, d1, [x9, #72]
-    ldp d2, d3, [x9, #88]
-    ldp d4, d5, [x9, #104]
-    ldp d6, d7, [x9, #120]
+    ldp d0, d1, [sp, #64]
+    ldp d2, d3, [sp, #80]
+    ldp d4, d5, [sp, #96]
+    ldp d6, d7, [sp, #112]
 
-    add sp, x9, #136
+    add sp, sp, #128
 
     blr xIP0           // native call.
 
@@ -1520,13 +1521,11 @@
 
     // result sign extension is handled in C code
     // prepare for artQuickGenericJniEndTrampoline call
-    // (Thread*,  SP, result, result_f)
-    //   x0       x1   x2       x3       <= C calling convention
-    mov x5, x0      // Save return value
+    // (Thread*, result, result_f)
+    //    x0       x1       x2        <= C calling convention
+    mov x1, x0      // Result (from saved)
     mov x0, xSELF   // Thread register
-    mov x1, xFP     // Stack pointer
-    mov x2, x5      // Result (from saved)
-    fmov x3, d0     // d0 will contain floating point result, but needs to go into x3
+    fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
 
     bl artQuickGenericJniEndTrampoline
 
@@ -1632,6 +1631,8 @@
     ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
     .cfi_adjust_cfa_offset 16
 
+    // Need to restore x18.
+    ldr   xSELF, [sp, #72]
     POP_REF_ONLY_CALLEE_SAVE_FRAME
 
     br    x9                  // Tail-call out.
@@ -1647,6 +1648,7 @@
     mov    x0, xSELF          // Pass thread.
     mov    x1, sp             // Pass SP.
     bl     artDeoptimize      // artDeoptimize(Thread*, SP)
+    brk 0
 END art_quick_deoptimize
 
 
@@ -1757,7 +1759,7 @@
      *    x1:   comp object pointer
      *
      */
-    .extern memcmp16_generic_static
+    .extern __memcmp16
 ENTRY art_quick_string_compareto
     mov    x2, x0         // x0 is return, use x2 for first input.
     sub    x0, x2, x1     // Same string object?
@@ -1850,16 +1852,17 @@
     ret
 
 .Ldo_memcmp16:
-    str x0, [sp,#-16]!           // Save x0
+    mov x14, x0                  // Save x0 and LR. __memcmp16 does not use these temps.
+    mov x15, xLR                 //                 TODO: Codify and check that?
 
     mov x0, x2
     uxtw x2, w3
-    bl memcmp16_generic_static
+    bl __memcmp16
 
-    ldr x1, [sp], #16            // Restore old x0 = length diff
+    mov xLR, x15                 // Restore LR.
 
-    cmp x0, #0                   // Check the memcmp difference
-    csel x0, x0, x1, ne          // x0 := x0 != 0 ? x0 : x1
+    cmp x0, #0                   // Check the memcmp difference.
+    csel x0, x0, x14, ne         // x0 := x0 != 0 ? x14(prev x0=length diff) : x1.
     ret
 END art_quick_string_compareto
 
@@ -1869,11 +1872,9 @@
 .macro NATIVE_DOWNCALL name, entrypoint
     .extern \entrypoint
 ENTRY \name
-    sub    sp, sp, #16
-    stp    xSELF, xLR, [sp]
+    stp    xSELF, xLR, [sp, #-16]!
     bl     \entrypoint
-    ldp    xSELF, xLR, [sp]
-    add    sp, sp, #16
+    ldp    xSELF, xLR, [sp], #16
     ret
 END \name
 .endm
@@ -1881,3 +1882,4 @@
 NATIVE_DOWNCALL art_quick_fmod fmod
 NATIVE_DOWNCALL art_quick_fmodf fmodf
 NATIVE_DOWNCALL art_quick_memcpy memcpy
+NATIVE_DOWNCALL art_quick_assignable_from_code artIsAssignableFromCode
diff --git a/runtime/arch/memcmp16.h b/runtime/arch/memcmp16.h
index ad58588..1144c8c 100644
--- a/runtime/arch/memcmp16.h
+++ b/runtime/arch/memcmp16.h
@@ -30,7 +30,7 @@
 //
 // In both cases, MemCmp16 is declared.
 
-#if defined(__arm__) || defined(__mips)
+#if defined(__aarch64__) || defined(__arm__) || defined(__mips)
 
 extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
 #define MemCmp16 __memcmp16
diff --git a/runtime/arch/memcmp16_test.cc b/runtime/arch/memcmp16_test.cc
new file mode 100644
index 0000000..5747c67
--- /dev/null
+++ b/runtime/arch/memcmp16_test.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "memcmp16.h"
+
+class RandGen {
+ public:
+  explicit RandGen(uint32_t seed) : val_(seed) {}
+
+  uint32_t next() {
+    val_ = val_ * 48271 % 2147483647 + 13;
+    return val_;
+  }
+
+  uint32_t val_;
+};
+
+class MemCmp16Test : public testing::Test {
+};
+
+// A simple implementation to compare against.
+// Note: this version is equivalent to the generic one used when no optimized version is available.
+int32_t memcmp16_compare(const uint16_t* s0, const uint16_t* s1, size_t count) {
+  for (size_t i = 0; i < count; i++) {
+    if (s0[i] != s1[i]) {
+      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
+    }
+  }
+  return 0;
+}
+
+static constexpr size_t kMemCmp16Rounds = 100000;
+
+static void CheckSeparate(size_t max_length, size_t min_length) {
+  RandGen r(0x1234);
+  size_t range_of_tests = 7;  // All four (weighted) tests active in the beginning.
+
+  for (size_t round = 0; round < kMemCmp16Rounds; ++round) {
+    size_t type = r.next() % range_of_tests;
+    size_t count1, count2;
+    uint16_t *s1, *s2;  // Use raw pointers to simplify using clobbered addresses
+
+    switch (type) {
+      case 0:  // random, non-zero lengths of both strings
+      case 1:
+      case 2:
+      case 3:
+        count1 = (r.next() % max_length) + min_length;
+        count2 = (r.next() % max_length) + min_length;
+        break;
+
+      case 4:  // random non-zero length of first, second is zero
+        count1 = (r.next() % max_length) + min_length;
+        count2 = 0U;
+        break;
+
+      case 5:  // random non-zero length of second, first is zero
+        count1 = 0U;
+        count2 = (r.next() % max_length) + min_length;
+        break;
+
+      case 6:  // both zero-length
+        count1 = 0U;
+        count2 = 0U;
+        range_of_tests = 6;  // Don't do zero-zero again.
+        break;
+
+      default:
+        ASSERT_TRUE(false) << "Should not get here.";
+        continue;
+    }
+
+    if (count1 > 0U) {
+      s1 = new uint16_t[count1];
+    } else {
+      // Leave a random pointer, should not be touched.
+      s1 = reinterpret_cast<uint16_t*>(0xebad1001);
+    }
+
+    if (count2 > 0U) {
+      s2 = new uint16_t[count2];
+    } else {
+      // Leave a random pointer, should not be touched.
+      s2 = reinterpret_cast<uint16_t*>(0xebad2002);
+    }
+
+    size_t min = count1 < count2 ? count1 : count2;
+    bool fill_same = r.next() % 1 == 1;
+
+    if (fill_same) {
+      for (size_t i = 0; i < min; ++i) {
+        s1[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+        s2[i] = s1[i];
+      }
+      for (size_t i = min; i < count1; ++i) {
+        s1[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+      }
+      for (size_t i = min; i < count2; ++i) {
+        s2[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+      }
+    } else {
+      for (size_t i = 0; i < count1; ++i) {
+        s1[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+      }
+      for (size_t i = 0; i < count2; ++i) {
+        s2[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+      }
+    }
+
+    uint16_t* s1_pot_unaligned = s1;
+    uint16_t* s2_pot_unaligned = s2;
+    size_t c1_mod = count1;
+    size_t c2_mod = count2;
+
+    if (!fill_same) {  // Don't waste a good "long" test.
+      if (count1 > 1 && r.next() % 10 == 0) {
+        c1_mod--;
+        s1_pot_unaligned++;
+      }
+      if (count2 > 1 && r.next() % 10 == 0) {
+        c2_mod--;
+        s2_pot_unaligned++;
+      }
+    }
+    size_t mod_min = c1_mod < c2_mod ? c1_mod : c2_mod;
+
+    int32_t expected = memcmp16_compare(s1_pot_unaligned, s2_pot_unaligned, mod_min);
+    int32_t computed = MemCmp16(s1_pot_unaligned, s2_pot_unaligned, mod_min);
+
+    ASSERT_EQ(expected, computed) << "Run " << round << ", c1=" << count1 << " c2=" << count2;
+
+    if (count1 > 0U) {
+      delete s1;
+    }
+    if (count2 > 0U) {
+      delete s2;
+    }
+  }
+}
+
+TEST_F(MemCmp16Test, RandomSeparateShort) {
+  CheckSeparate(5U, 1U);
+}
+
+TEST_F(MemCmp16Test, RandomSeparateLong) {
+  CheckSeparate(64U, 32U);
+}
+
+// TODO: What's a good test for overlapping memory. Is it important?
+// TEST_F(MemCmp16Test, RandomOverlay) {
+//
+// }
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 08caa80..70a9619 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -45,9 +45,6 @@
 extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*);
 extern "C" void* art_quick_resolve_string(void*, uint32_t);
 
-// Exception entrypoints.
-extern "C" void* GetAndClearException(Thread*);
-
 // Field entrypoints.
 extern "C" int art_quick_set32_instance(uint32_t, void*, int32_t);
 extern "C" int art_quick_set32_static(uint32_t, int32_t);
@@ -117,7 +114,6 @@
 extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
 
 // Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
 extern "C" void art_quick_test_suspend();
 
 // Throw entrypoints.
@@ -229,7 +225,6 @@
   qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
 
   // Thread
-  qpoints->pCheckSuspend = CheckSuspendFromCode;
   qpoints->pTestSuspend = art_quick_test_suspend;
 
   // Throws
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 22b8cca..eb490eb 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -258,7 +258,7 @@
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
           "memory");  // clobber.
-#elif defined(__x86_64__)
+#elif defined(__x86_64__) && !defined(__APPLE__)
     // Note: Uses the native convention
     // TODO: Set the thread?
     __asm__ __volatile__(
@@ -483,7 +483,7 @@
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
           "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
           "memory");  // clobber.
-#elif defined(__x86_64__)
+#elif defined(__x86_64__) && !defined(__APPLE__)
     // Note: Uses the native convention
     // TODO: Set the thread?
     __asm__ __volatile__(
@@ -518,7 +518,7 @@
   // Method with 32b arg0, 64b arg1
   size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self,
                               mirror::ArtMethod* referrer) {
-#if defined(__x86_64__) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
     // Just pass through.
     return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer);
 #else
@@ -533,7 +533,7 @@
   // Method with 32b arg0, 32b arg1, 64b arg2
   size_t Invoke3UUWithReferrer(uint32_t arg0, uint32_t arg1, uint64_t arg2, uintptr_t code,
                                Thread* self, mirror::ArtMethod* referrer) {
-#if defined(__x86_64__) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
     // Just pass through.
     return Invoke3WithReferrer(arg0, arg1, arg2, code, self, referrer);
 #else
@@ -547,12 +547,12 @@
 };
 
 
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_memcpy(void);
 #endif
 
 TEST_F(StubTest, Memcpy) {
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
   uint32_t orig[20];
@@ -588,12 +588,12 @@
 #endif
 }
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_lock_object(void);
 #endif
 
 TEST_F(StubTest, LockObject) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -664,14 +664,14 @@
 };
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_lock_object(void);
 extern "C" void art_quick_unlock_object(void);
 #endif
 
 // NO_THREAD_SAFETY_ANALYSIS as we do not want to grab exclusive mutator lock for MonitorInfo.
 static void TestUnlockObject(StubTest* test) NO_THREAD_SAFETY_ANALYSIS {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   static constexpr size_t kThinLockLoops = 100;
 
   Thread* self = Thread::Current();
@@ -817,12 +817,12 @@
   TestUnlockObject(this);
 }
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_check_cast(void);
 #endif
 
 TEST_F(StubTest, CheckCast) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
   // Find some classes.
   ScopedObjectAccess soa(self);
@@ -867,7 +867,7 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_aput_obj_with_null_and_bound_check(void);
 // Do not check non-checked ones, we'd need handlers and stuff...
 #endif
@@ -875,7 +875,7 @@
 TEST_F(StubTest, APutObj) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
   // Create an object
   ScopedObjectAccess soa(self);
@@ -1003,7 +1003,7 @@
 TEST_F(StubTest, AllocObject) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -1125,7 +1125,7 @@
 TEST_F(StubTest, AllocObjectArray) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -1204,14 +1204,14 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_string_compareto(void);
 #endif
 
 TEST_F(StubTest, StringCompareTo) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -1222,8 +1222,12 @@
   // Use array so we can index into it and use a matrix for expected results
   // Setup: The first half is standard. The second half uses a non-zero offset.
   // TODO: Shared backing arrays.
-  static constexpr size_t kBaseStringCount  = 7;
-  const char* c[kBaseStringCount] = { "", "", "a", "aa", "ab", "aac", "aac" , };
+  static constexpr size_t kBaseStringCount  = 8;
+  const char* c[kBaseStringCount] = { "", "", "a", "aa", "ab",
+      "aacaacaacaacaacaac",  // This one's under the default limit to go to __memcmp16.
+      "aacaacaacaacaacaacaacaacaacaacaacaac",     // This one's over.
+      "aacaacaacaacaacaacaacaacaacaacaacaaca" };  // As is this one. We need a separate one to
+                                                  // defeat object-equal optimizations.
 
   static constexpr size_t kStringCount = 2 * kBaseStringCount;
 
@@ -1301,7 +1305,7 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_set32_static(void);
 extern "C" void art_quick_get32_static(void);
 #endif
@@ -1309,7 +1313,7 @@
 static void GetSet32Static(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f, Thread* self,
                            mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   constexpr size_t num_values = 7;
   uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
@@ -1337,7 +1341,7 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_set32_instance(void);
 extern "C" void art_quick_get32_instance(void);
 #endif
@@ -1345,7 +1349,7 @@
 static void GetSet32Instance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   constexpr size_t num_values = 7;
   uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
@@ -1379,7 +1383,7 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_set_obj_static(void);
 extern "C" void art_quick_get_obj_static(void);
 
@@ -1406,7 +1410,7 @@
 static void GetSetObjStatic(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f, Thread* self,
                             mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
 
   // Allocate a string object for simplicity.
@@ -1422,7 +1426,7 @@
 }
 
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_set_obj_instance(void);
 extern "C" void art_quick_get_obj_instance(void);
 
@@ -1453,7 +1457,7 @@
 static void GetSetObjInstance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
                               Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   set_and_check_instance(f, obj->Get(), nullptr, self, referrer, test);
 
   // Allocate a string object for simplicity.
@@ -1471,7 +1475,7 @@
 
 // TODO: Complete these tests for 32b architectures.
 
-#if defined(__x86_64__) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
 extern "C" void art_quick_set64_static(void);
 extern "C" void art_quick_get64_static(void);
 #endif
@@ -1479,7 +1483,7 @@
 static void GetSet64Static(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f, Thread* self,
                            mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__x86_64__) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
   constexpr size_t num_values = 8;
   uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
@@ -1506,7 +1510,7 @@
 }
 
 
-#if defined(__x86_64__) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
 extern "C" void art_quick_set64_instance(void);
 extern "C" void art_quick_get64_instance(void);
 #endif
@@ -1514,7 +1518,7 @@
 static void GetSet64Instance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#if defined(__x86_64__) || defined(__aarch64__)
+#if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
   constexpr size_t num_values = 8;
   uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
@@ -1678,12 +1682,12 @@
   TestFields(self, this, Primitive::Type::kPrimLong);
 }
 
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
 extern "C" void art_quick_imt_conflict_trampoline(void);
 #endif
 
 TEST_F(StubTest, IMT) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
   Thread* self = Thread::Current();
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index a85e250..b217cd6 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -93,7 +93,6 @@
 extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
 
 // Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
 extern "C" void art_quick_test_suspend();
 
 // Throw entrypoints.
@@ -205,7 +204,6 @@
   qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
 
   // Thread
-  qpoints->pCheckSuspend = CheckSuspendFromCode;
   qpoints->pTestSuspend = art_quick_test_suspend;
 
   // Throws
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index ecd8ce6..24b9e46 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1127,8 +1127,7 @@
     // This also stores the native ArtMethod reference at the bottom of the stack.
 
     movl %esp, %ebp                 // save SP at callee-save frame
-    movl %esp, %edi
-    CFI_DEF_CFA_REGISTER(edi)
+    CFI_DEF_CFA_REGISTER(ebp)
     subl LITERAL(5120), %esp
     // prepare for artQuickGenericJniTrampoline call
     // (Thread*,  SP)
@@ -1141,46 +1140,39 @@
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     SETUP_GOT_NOSAVE              // Clobbers ebx.
     call PLT_SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
-    // Drop call stack.
-    addl LITERAL(16), %esp
 
-    // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save
-    // get the adjusted frame pointer
-    popl %ebp
+    // The C call will have registered the complete save-frame on success.
+    // The result of the call is:
+    // eax: pointer to native code, 0 on error.
+    // edx: pointer to the bottom of the used area of the alloca, can restore stack till there.
 
-    // Check for error, negative value.
+    // Check for error = 0.
     test %eax, %eax
-    js .Lentry_error
+    jz .Lentry_error
 
-    // release part of the alloca, get the code pointer
-    addl %eax, %esp
-    popl %eax
+    // Release part of the alloca.
+    movl %edx, %esp
 
     // On x86 there are no registers passed, so nothing to pop here.
 
     // Native call.
     call *%eax
 
-    // Pop native stack, but keep the space that was reserved cookie.
-    movl %ebp, %esp
-    subl LITERAL(16), %esp        // Alignment.
-
     // result sign extension is handled in C code
     // prepare for artQuickGenericJniEndTrampoline call
-    // (Thread*,  SP,  result, result_f)
-    //  (esp)   4(esp)  8(esp)  16(esp)    <= C calling convention
-    //  fs:...    ebp  eax:edx   xmm0      <= where they are
+    // (Thread*, result, result_f)
+    //  (esp)    4(esp)  12(esp)    <= C calling convention
+    //  fs:...  eax:edx   xmm0      <= where they are
 
-    subl LITERAL(8), %esp         // Pass float result.
+    subl LITERAL(20), %esp         // Padding & pass float result.
     movsd %xmm0, (%esp)
     pushl %edx                    // Pass int result.
     pushl %eax
-    pushl %ebp                    // Pass SP (to ArtMethod).
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
 
     // Tear down the alloca.
-    movl %edi, %esp
+    movl %ebp, %esp
     CFI_DEF_CFA_REGISTER(esp)
 
     // Pending exceptions possible.
@@ -1204,7 +1196,7 @@
     punpckldq %xmm1, %xmm0
     ret
 .Lentry_error:
-    movl %edi, %esp
+    movl %ebp, %esp
     CFI_DEF_CFA_REGISTER(esp)
 .Lexception_in_native:
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
@@ -1247,11 +1239,12 @@
     PUSH ecx                      // Pass receiver.
     PUSH eax                      // Pass Method*.
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP, LR)
-    addl  LITERAL(28), %esp       // Pop arguments upto saved Method*.
+    call PLT_SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP, LR)
+    addl LITERAL(28), %esp        // Pop arguments upto saved Method*.
     movl 28(%esp), %edi           // Restore edi.
     movl %eax, 28(%esp)           // Place code* over edi, just under return pc.
-    movl LITERAL(SYMBOL(art_quick_instrumentation_exit)), 32(%esp)
+    movl SYMBOL(art_quick_instrumentation_exit)@GOT(%ebx), %ebx
+    movl %ebx, 32(%esp)
                                   // Place instrumentation exit as return pc.
     movl (%esp), %eax             // Restore eax.
     movl 8(%esp), %ecx            // Restore ecx.
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index 34c8b82..70c71c2 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -19,7 +19,7 @@
 
 #include "asm_support_x86_64.h"
 
-#if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
+#if defined(__APPLE__) || (defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5))
     // Clang's as(1) doesn't let you name macro parameters prior to 3.5.
     #define MACRO0(macro_name) .macro macro_name
     #define MACRO1(macro_name, macro_arg1) .macro macro_name
@@ -27,13 +27,12 @@
     #define MACRO3(macro_name, macro_arg1, macro_args2, macro_args3) .macro macro_name
     #define END_MACRO .endmacro
 
-    // Clang's as(1) uses $0, $1, and so on for macro arguments prior to 3.5.
+    // Clang's as(1) uses $0, $1, and so on for macro arguments.
+    #define RAW_VAR(name,index) $index
     #define VAR(name,index) SYMBOL($index)
-    #define PLT_VAR(name, index) SYMBOL($index)@PLT
+    #define PLT_VAR(name, index) PLT_SYMBOL($index)
     #define REG_VAR(name,index) %$index
     #define CALL_MACRO(name,index) $index
-    #define FUNCTION_TYPE(name,index) .type $index, @function
-    #define SIZE(name,index) .size $index, .-$index
 
     //  The use of $x for arguments mean that literals need to be represented with $$x in macros.
     #define LITERAL(value) $value
@@ -52,17 +51,27 @@
     // no special meaning to $, so literals are still just $x. The use of altmacro means % is a
     // special character meaning care needs to be taken when passing registers as macro arguments.
     .altmacro
+    #define RAW_VAR(name,index) name&
     #define VAR(name,index) name&
     #define PLT_VAR(name, index) name&@PLT
     #define REG_VAR(name,index) %name
     #define CALL_MACRO(name,index) name&
-    #define FUNCTION_TYPE(name,index) .type name&, @function
-    #define SIZE(name,index) .size name, .-name
 
     #define LITERAL(value) $value
     #define MACRO_LITERAL(value) $value
 #endif
 
+#if defined(__APPLE__)
+    #define FUNCTION_TYPE(name,index)
+    #define SIZE(name,index)
+#elif defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
+    #define FUNCTION_TYPE(name,index) .type $index, @function
+    #define SIZE(name,index) .size $index, .-$index
+#else
+    #define FUNCTION_TYPE(name,index) .type name&, @function
+    #define SIZE(name,index) .size name, .-name
+#endif
+
     // CFI support.
 #if !defined(__APPLE__)
     #define CFI_STARTPROC .cfi_startproc
@@ -86,9 +95,14 @@
     // Symbols.
 #if !defined(__APPLE__)
     #define SYMBOL(name) name
-    #define PLT_SYMBOL(name) name ## @PLT
+    #if defined(__clang__) && (__clang_major__ < 4) && (__clang_minor__ < 5)
+        // TODO: Disabled for old clang 3.3, this leads to text relocations and there should be a
+        // better fix.
+        #define PLT_SYMBOL(name) name // ## @PLT
+    #else
+        #define PLT_SYMBOL(name) name ## @PLT
+    #endif
 #else
-    // Mac OS' symbols have an _ prefix.
     #define SYMBOL(name) _ ## name
     #define PLT_SYMBOL(name) _ ## name
 #endif
@@ -103,8 +117,10 @@
     .globl VAR(c_name, 0)
     ALIGN_FUNCTION_ENTRY
 VAR(c_name, 0):
+#if !defined(__APPLE__)
     // Have a local entrypoint that's not globl
 VAR(c_name, 0)_local:
+#endif
     CFI_STARTPROC
     // Ensure we get a sane starting CFA.
     CFI_DEF_CFA(rsp, 8)
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 0ccbd27..e1f47ee 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -59,8 +59,8 @@
     size_t j = 2;  // Offset j to skip return address spill.
     for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) {
       if (((frame_info.FpSpillMask() >> i) & 1) != 0) {
-        fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j,
-                                        frame_info.FrameSizeInBytes());
+        fprs_[i] = reinterpret_cast<uint64_t*>(
+            fr.CalleeSaveAddress(spill_count + fp_spill_count - j, frame_info.FrameSizeInBytes()));
         j++;
       }
     }
@@ -93,7 +93,7 @@
 
 bool X86_64Context::SetFPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
-  CHECK_NE(fprs_[reg], &gZero);
+  CHECK_NE(fprs_[reg], reinterpret_cast<const uint64_t*>(&gZero));
   if (fprs_[reg] != nullptr) {
     *fprs_[reg] = value;
     return true;
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 92aabee..609d1c6 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -94,7 +94,6 @@
 extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
 
 // Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
 extern "C" void art_quick_test_suspend();
 
 // Throw entrypoints.
@@ -112,6 +111,9 @@
 
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
+#if defined(__APPLE__)
+  UNIMPLEMENTED(FATAL);
+#else
   // Interpreter
   ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
   ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
@@ -206,7 +208,6 @@
   qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
 
   // Thread
-  qpoints->pCheckSuspend = CheckSuspendFromCode;
   qpoints->pTestSuspend = art_quick_test_suspend;
 
   // Throws
@@ -216,6 +217,7 @@
   qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
   qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
   qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
+#endif  // __APPLE__
 };
 
 }  // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index c9220c8..8fa947c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -23,6 +23,10 @@
      * Runtime::CreateCalleeSaveMethod(kSaveAll)
      */
 MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
+#if defined(__APPLE__)
+    int3
+    int3
+#else
     // R10 := Runtime::Current()
     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
     movq (%r10), %r10
@@ -45,6 +49,7 @@
 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 8 + 8)
 #error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
 #endif
+#endif  // __APPLE__
 END_MACRO
 
     /*
@@ -52,6 +57,10 @@
      * Runtime::CreateCalleeSaveMethod(kRefsOnly)
      */
 MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
+#if defined(__APPLE__)
+    int3
+    int3
+#else
     // R10 := Runtime::Current()
     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
     movq (%r10), %r10
@@ -74,6 +83,7 @@
 #if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 8 + 8)
 #error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
 #endif
+#endif  // __APPLE__
 END_MACRO
 
 MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
@@ -93,6 +103,10 @@
      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
      */
 MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
+#if defined(__APPLE__)
+    int3
+    int3
+#else
     // R10 := Runtime::Current()
     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
     movq (%r10), %r10
@@ -130,6 +144,7 @@
 #if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 80 + 8)
 #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
 #endif
+#endif  // __APPLE__
 END_MACRO
 
 MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
@@ -366,6 +381,10 @@
      *   r9 = char* shorty
      */
 DEFINE_FUNCTION art_quick_invoke_stub
+#if defined(__APPLE__)
+    int3
+    int3
+#else
     // Set up argument XMM registers.
     leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
     leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
@@ -431,6 +450,7 @@
 .Lreturn_float_quick:
     movss %xmm0, (%r8)           // Store the floating point result.
     ret
+#endif  // __APPLE__
 END_FUNCTION art_quick_invoke_stub
 
     /*
@@ -445,6 +465,10 @@
      *   r9 = char* shorty
      */
 DEFINE_FUNCTION art_quick_invoke_static_stub
+#if defined(__APPLE__)
+    int3
+    int3
+#else
     // Set up argument XMM registers.
     leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
     movq %rsi, %r11               // R11 := arg_array
@@ -509,6 +533,7 @@
 .Lreturn_float_quick2:
     movss %xmm0, (%r8)           // Store the floating point result.
     ret
+#endif  // __APPLE__
 END_FUNCTION art_quick_invoke_static_stub
 
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
@@ -559,6 +584,45 @@
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
+MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movl 8(%rsp), %esi                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 is in rdi
+    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
+    movq %rsp, %rcx                    // pass SP
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movl 8(%rsp), %edx                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 and arg1 are in rdi/rsi
+    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
+    movq %rsp, %r8                     // pass SP
+    call PLT_VAR(cxx_name, 1)          // (arg0, arg1, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movl 8(%rsp), %ecx                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0, arg1, and arg2 are in rdi/rsi/rdx
+    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
+    movq %rsp, %r9                     // pass SP
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)        // return or deliver exception
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
     testq %rax, %rax               // rax == 0 ?
     jz  1f                         // if rax == 0 goto 1
@@ -783,14 +847,23 @@
      * rdi(edi) = array, rsi(esi) = index, rdx(edx) = value
      */
 DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+#if defined(__APPLE__)
+    int3
+    int3
+#else
     testl %edi, %edi
 //  testq %rdi, %rdi
     jnz art_quick_aput_obj_with_bound_check_local
     jmp art_quick_throw_null_pointer_exception_local
+#endif  // __APPLE__
 END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
 
 
 DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
+#if defined(__APPLE__)
+    int3
+    int3
+#else
     movl ARRAY_LENGTH_OFFSET(%edi), %ecx
 //  movl ARRAY_LENGTH_OFFSET(%rdi), %ecx      // This zero-extends, so value(%rcx)=value(%ecx)
     cmpl %ecx, %esi
@@ -800,6 +873,7 @@
     mov %ecx, %esi
 //  mov %rcx, %rsi
     jmp art_quick_throw_array_bounds_local
+#endif  // __APPLE__
 END_FUNCTION art_quick_aput_obj_with_bound_check
 
 
@@ -894,47 +968,6 @@
 UNIMPLEMENTED art_quick_lshr
 UNIMPLEMENTED art_quick_lushr
 
-
-MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
-    movl 8(%rsp), %esi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // arg0 is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)
-    END_FUNCTION VAR(c_name, 0)
-END_MACRO
-
-MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
-    movl 8(%rsp), %edx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // arg0 and arg1 are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_VAR(cxx_name, 1)          // (arg0, arg1, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)
-    END_FUNCTION VAR(c_name, 0)
-END_MACRO
-
-MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
-    DEFINE_FUNCTION VAR(c_name, 0)
-    movl 8(%rsp), %ecx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // arg0, arg1, and arg2 are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
-    movq %rsp, %r9                     // pass SP
-    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    CALL_MACRO(return_macro, 2)        // return or deliver exception
-    END_FUNCTION VAR(c_name, 0)
-END_MACRO
-
-
 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
 THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
@@ -1006,10 +1039,15 @@
      * rax is a hidden argument that holds the target method's dex method index.
      */
 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
+#if defined(__APPLE__)
+    int3
+    int3
+#else
     movl 8(%rsp), %edi            // load caller Method*
     movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
     movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
     jmp art_quick_invoke_interface_trampoline_local
+#endif  // __APPLE__
 END_FUNCTION art_quick_imt_conflict_trampoline
 
 DEFINE_FUNCTION art_quick_resolution_trampoline
@@ -1129,11 +1167,9 @@
     movq %xmm5, 56(%rsp)
     movq %xmm6, 64(%rsp)
     movq %xmm7, 72(%rsp)
-    // Store native ArtMethod* to bottom of stack.
-    movq %rdi, 0(%rsp)
-    movq %rsp, %rbp                 // save SP at callee-save frame
-    movq %rsp, %rbx
-    CFI_DEF_CFA_REGISTER(rbx)
+    movq %rdi, 0(%rsp)              // Store native ArtMethod* to bottom of stack.
+    movq %rsp, %rbp                 // save SP at (old) callee-save frame
+    CFI_DEF_CFA_REGISTER(rbp)
     //
     // reserve a lot of space
     //
@@ -1160,17 +1196,17 @@
     movq %rbp, %rsi
     call PLT_SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
 
-    // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save
-    // get the adjusted frame pointer
-    popq %rbp
+    // The C call will have registered the complete save-frame on success.
+    // The result of the call is:
+    // %rax: pointer to native code, 0 on error.
+    // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
 
-    // Check for error, negative value.
+    // Check for error = 0.
     test %rax, %rax
-    js .Lentry_error
+    jz .Lentry_error
 
-    // release part of the alloca, get the code pointer
-    addq %rax, %rsp
-    popq %rax
+    // Release part of the alloca.
+    movq %rdx, %rsp
 
     // pop from the register-passing alloca region
     // what's the right layout?
@@ -1190,21 +1226,22 @@
     movq 48(%rsp), %xmm6
     movq 56(%rsp), %xmm7
     addq LITERAL(64), %rsp          // floating-point done
+
     // native call
-    call *%rax                      // Stack should be aligned 16B without the return addr?
+    call *%rax
+
     // result sign extension is handled in C code
     // prepare for artQuickGenericJniEndTrampoline call
-    // (Thread*,  SP, result, result_f)
-    //   rdi      rsi   rdx   rcx       <= C calling convention
-    //  gs:...    rbp   rax   xmm0      <= where they are
+    // (Thread*,  result, result_f)
+    //   rdi      rsi   rdx       <= C calling convention
+    //  gs:...    rax   xmm0      <= where they are
     movq %gs:THREAD_SELF_OFFSET, %rdi
-    movq %rbp, %rsi
-    movq %rax, %rdx
-    movq %xmm0, %rcx
+    movq %rax, %rsi
+    movq %xmm0, %rdx
     call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
 
     // Tear down the alloca.
-    movq %rbx, %rsp
+    movq %rbp, %rsp
     CFI_DEF_CFA_REGISTER(rsp)
 
     // Pending exceptions possible.
@@ -1242,7 +1279,7 @@
     movq %rax, %xmm0
     ret
 .Lentry_error:
-    movq %rbx, %rsp
+    movq %rbp, %rsp
     CFI_DEF_CFA_REGISTER(rsp)
 .Lexception_in_native:
     // TODO: the handle scope contains the this pointer which is used by the debugger for exception
@@ -1294,6 +1331,10 @@
      * Routine that intercepts method calls and returns.
      */
 DEFINE_FUNCTION art_quick_instrumentation_entry
+#if defined(__APPLE__)
+    int3
+    int3
+#else
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
 
     movq %rdi, %r12               // Preserve method pointer in a callee-save.
@@ -1313,6 +1354,7 @@
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
 
     jmp *%rax                     // Tail call to intended method.
+#endif  // __APPLE__
 END_FUNCTION art_quick_instrumentation_entry
 
 DEFINE_FUNCTION art_quick_instrumentation_exit
diff --git a/runtime/arch/x86_64/thread_x86_64.cc b/runtime/arch/x86_64/thread_x86_64.cc
index b7a5c43..6dff2b4 100644
--- a/runtime/arch/x86_64/thread_x86_64.cc
+++ b/runtime/arch/x86_64/thread_x86_64.cc
@@ -21,18 +21,28 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 
+#if defined(__linux__)
 #include <asm/prctl.h>
 #include <sys/prctl.h>
 #include <sys/syscall.h>
+#endif
 
 namespace art {
 
+#if defined(__linux__)
 static void arch_prctl(int code, void* val) {
   syscall(__NR_arch_prctl, code, val);
 }
+#endif
+
 void Thread::InitCpu() {
   MutexLock mu(nullptr, *Locks::modify_ldt_lock_);
+
+#if defined(__linux__)
   arch_prctl(ARCH_SET_GS, this);
+#else
+  UNIMPLEMENTED(FATAL) << "Need to set GS";
+#endif
 
   // Allow easy indirection back to Thread*.
   tlsPtr_.self = this;
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index c0bce84..351de3d 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -63,6 +63,10 @@
   return file_.get();
 }
 
+bool ScopedFlock::HasFile() {
+  return file_.get() != nullptr;
+}
+
 ScopedFlock::ScopedFlock() { }
 
 ScopedFlock::~ScopedFlock() {
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
index 26b4eb0..f8ed805 100644
--- a/runtime/base/scoped_flock.h
+++ b/runtime/base/scoped_flock.h
@@ -40,6 +40,10 @@
 
   // Returns the (locked) file associated with this instance.
   File* GetFile();
+
+  // Returns whether a file is held.
+  bool HasFile();
+
   ~ScopedFlock();
  private:
   std::unique_ptr<File> file_;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 61f94d4..60453c3 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -634,15 +634,22 @@
 const OatFile* ClassLinker::FindOpenedOatFileForDexFile(const DexFile& dex_file) {
   const char* dex_location = dex_file.GetLocation().c_str();
   uint32_t dex_location_checksum = dex_file.GetLocationChecksum();
-  return FindOpenedOatFileFromDexLocation(dex_location, &dex_location_checksum);
+  return FindOpenedOatFile(nullptr, dex_location, &dex_location_checksum);
 }
 
-const OatFile* ClassLinker::FindOpenedOatFileFromDexLocation(
-    const char* dex_location, const uint32_t* const dex_location_checksum) {
+const OatFile* ClassLinker::FindOpenedOatFile(const char* oat_location, const char* dex_location,
+                                              const uint32_t* const dex_location_checksum) {
   ReaderMutexLock mu(Thread::Current(), dex_lock_);
   for (size_t i = 0; i < oat_files_.size(); i++) {
     const OatFile* oat_file = oat_files_[i];
     DCHECK(oat_file != NULL);
+
+    if (oat_location != nullptr) {
+      if (oat_file->GetLocation() != oat_location) {
+        continue;
+      }
+    }
+
     const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location,
                                                                       dex_location_checksum,
                                                                       false);
@@ -653,10 +660,229 @@
   return NULL;
 }
 
-const DexFile* ClassLinker::FindDexFileInOatLocation(const char* dex_location,
-                                                     uint32_t dex_location_checksum,
-                                                     const char* oat_location,
-                                                     std::string* error_msg) {
+static std::string GetMultiDexClassesDexName(size_t number, const char* dex_location) {
+  if (number == 0) {
+    return dex_location;
+  } else {
+    return StringPrintf("%s" kMultiDexSeparatorString "classes%zu.dex", dex_location, number + 1);
+  }
+}
+
+static bool LoadMultiDexFilesFromOatFile(const OatFile* oat_file, const char* dex_location,
+                                         bool generated,
+                                         std::vector<std::string>* error_msgs,
+                                         std::vector<const DexFile*>* dex_files) {
+  if (oat_file == nullptr) {
+    return false;
+  }
+
+  size_t old_size = dex_files->size();  // To rollback on error.
+
+  bool success = true;
+  for (size_t i = 0; success; ++i) {
+    std::string next_name_str = GetMultiDexClassesDexName(i, dex_location);
+    const char* next_name = next_name_str.c_str();
+
+    uint32_t dex_location_checksum;
+    uint32_t* dex_location_checksum_pointer = &dex_location_checksum;
+    std::string error_msg;
+    if (!DexFile::GetChecksum(next_name, dex_location_checksum_pointer, &error_msg)) {
+      DCHECK_EQ(false, i == 0 && generated);
+      dex_location_checksum_pointer = nullptr;
+    }
+
+    const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(next_name, nullptr, false);
+
+    if (oat_dex_file == nullptr) {
+      if (i == 0 && generated) {
+        std::string error_msg;
+        error_msg = StringPrintf("\nFailed to find dex file '%s' (checksum 0x%x) in generated out "
+                                 " file'%s'", dex_location, dex_location_checksum,
+                                 oat_file->GetLocation().c_str());
+        error_msgs->push_back(error_msg);
+      }
+      break;  // Not found, done.
+    }
+
+    // Checksum test. Test must succeed when generated.
+    success = !generated;
+    if (dex_location_checksum_pointer != nullptr) {
+      success = dex_location_checksum == oat_dex_file->GetDexFileLocationChecksum();
+    }
+
+    if (success) {
+      const DexFile* dex_file = oat_dex_file->OpenDexFile(&error_msg);
+      if (dex_file == nullptr) {
+        success = false;
+        error_msgs->push_back(error_msg);
+      } else {
+        dex_files->push_back(dex_file);
+      }
+    }
+
+    // When we generated the file, we expect success, or something is terribly wrong.
+    CHECK_EQ(false, generated && !success)
+        << "dex_location=" << next_name << " oat_location=" << oat_file->GetLocation().c_str()
+        << std::hex << " dex_location_checksum=" << dex_location_checksum
+        << " OatDexFile::GetLocationChecksum()=" << oat_dex_file->GetDexFileLocationChecksum();
+  }
+
+  if (dex_files->size() == old_size) {
+    success = false;  // We did not even find classes.dex
+  }
+
+  if (success) {
+    return true;
+  } else {
+    // Free all the dex files we have loaded.
+    auto it = dex_files->begin() + old_size;
+    auto it_end = dex_files->end();
+    for (; it != it_end; it++) {
+      delete *it;
+    }
+    dex_files->erase(dex_files->begin() + old_size, it_end);
+
+    return false;
+  }
+}
+
+// Multidex files make it possible that some, but not all, dex files can be broken/outdated. This
+// complicates the loading process, as we should not use an iterative loading process, because that
+// would register the oat file and dex files that come before the broken one. Instead, check all
+// multidex ahead of time.
+bool ClassLinker::OpenDexFilesFromOat(const char* dex_location, const char* oat_location,
+                                      std::vector<std::string>* error_msgs,
+                                      std::vector<const DexFile*>* dex_files) {
+  // 1) Check whether we have an open oat file.
+  // This requires a dex checksum, use the "primary" one.
+  uint32_t dex_location_checksum;
+  uint32_t* dex_location_checksum_pointer = &dex_location_checksum;
+  bool have_checksum = true;
+  std::string checksum_error_msg;
+  if (!DexFile::GetChecksum(dex_location, dex_location_checksum_pointer, &checksum_error_msg)) {
+    dex_location_checksum_pointer = nullptr;
+    have_checksum = false;
+  }
+
+  bool needs_registering = false;
+
+  std::unique_ptr<const OatFile> open_oat_file(FindOpenedOatFile(oat_location, dex_location,
+                                                                 dex_location_checksum_pointer));
+
+  // 2) If we do not have an open one, maybe there's one on disk already.
+
+  // In case the oat file is not open, we play a locking game here so
+  // that if two different processes race to load and register or generate
+  // (or worse, one tries to open a partial generated file) we will be okay.
+  // This is actually common with apps that use DexClassLoader to work
+  // around the dex method reference limit and that have a background
+  // service running in a separate process.
+  ScopedFlock scoped_flock;
+
+  if (open_oat_file.get() == nullptr) {
+    if (oat_location != nullptr) {
+      // Can only do this if we have a checksum, else error.
+      if (!have_checksum) {
+        error_msgs->push_back(checksum_error_msg);
+        return false;
+      }
+
+      std::string error_msg;
+
+      // We are loading or creating one in the future. Time to set up the file lock.
+      if (!scoped_flock.Init(oat_location, &error_msg)) {
+        error_msgs->push_back(error_msg);
+        return false;
+      }
+
+      open_oat_file.reset(FindOatFileInOatLocationForDexFile(dex_location, dex_location_checksum,
+                                                             oat_location, &error_msg));
+
+      if (open_oat_file.get() == nullptr) {
+        std::string compound_msg = StringPrintf("Failed to find dex file '%s' in oat location '%s': %s",
+                                                dex_location, oat_location, error_msg.c_str());
+        VLOG(class_linker) << compound_msg;
+        error_msgs->push_back(compound_msg);
+      }
+    } else {
+      // TODO: What to lock here?
+      open_oat_file.reset(FindOatFileContainingDexFileFromDexLocation(dex_location,
+                                                                      dex_location_checksum_pointer,
+                                                                      kRuntimeISA, error_msgs));
+    }
+    needs_registering = true;
+  }
+
+  // 3) If we have an oat file, check all contained multidex files for our dex_location.
+  // Note: LoadMultiDexFilesFromOatFile will check for nullptr in the first argument.
+  bool success = LoadMultiDexFilesFromOatFile(open_oat_file.get(), dex_location, false, error_msgs,
+                                              dex_files);
+  if (success) {
+    const OatFile* oat_file = open_oat_file.release();  // Avoid deleting it.
+    if (needs_registering) {
+      // We opened the oat file, so we must register it.
+      RegisterOatFile(oat_file);
+    }
+    return true;
+  } else {
+    if (needs_registering) {
+      // We opened it, delete it.
+      open_oat_file.reset();
+    } else {
+      open_oat_file.release();  // Do not delete open oat files.
+    }
+  }
+
+  // 4) If it's not the case (either no oat file or mismatches), regenerate and load.
+
+  // Need a checksum, fail else.
+  if (!have_checksum) {
+    error_msgs->push_back(checksum_error_msg);
+    return false;
+  }
+
+  // Look in cache location if no oat_location is given.
+  std::string cache_location;
+  if (oat_location == nullptr) {
+    // Use the dalvik cache.
+    const std::string dalvik_cache(GetDalvikCacheOrDie(GetInstructionSetString(kRuntimeISA)));
+    cache_location = GetDalvikCacheFilenameOrDie(dex_location, dalvik_cache.c_str());
+    oat_location = cache_location.c_str();
+  }
+
+  // Definitely need to lock now.
+  if (!scoped_flock.HasFile()) {
+    std::string error_msg;
+    if (!scoped_flock.Init(oat_location, &error_msg)) {
+      error_msgs->push_back(error_msg);
+      return false;
+    }
+  }
+
+  // Create the oat file.
+  open_oat_file.reset(CreateOatFileForDexLocation(dex_location, scoped_flock.GetFile()->Fd(),
+                                                  oat_location, error_msgs));
+
+  // Failed, bail.
+  if (open_oat_file.get() == nullptr) {
+    return false;
+  }
+
+  // Try to load again, but stronger checks.
+  success = LoadMultiDexFilesFromOatFile(open_oat_file.get(), dex_location, true, error_msgs,
+                                         dex_files);
+  if (success) {
+    RegisterOatFile(open_oat_file.release());
+    return true;
+  } else {
+    return false;
+  }
+}
+
+const OatFile* ClassLinker::FindOatFileInOatLocationForDexFile(const char* dex_location,
+                                                               uint32_t dex_location_checksum,
+                                                               const char* oat_location,
+                                                               std::string* error_msg) {
   std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_location, oat_location, NULL,
                                             !Runtime::Current()->IsCompiler(),
                                             error_msg));
@@ -699,44 +925,21 @@
                               actual_dex_checksum);
     return nullptr;
   }
-  const DexFile* dex_file = oat_dex_file->OpenDexFile(error_msg);
-  if (dex_file != nullptr) {
-    RegisterOatFile(oat_file.release());
-  }
-  return dex_file;
-}
-
-const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(
-    const char* dex_location,
-    uint32_t dex_location_checksum,
-    const char* oat_location,
-    std::vector<std::string>* error_msgs) {
-  // We play a locking game here so that if two different processes
-  // race to generate (or worse, one tries to open a partial generated
-  // file) we will be okay. This is actually common with apps that use
-  // DexClassLoader to work around the dex method reference limit and
-  // that have a background service running in a separate process.
-  ScopedFlock scoped_flock;
-  std::string error_msg;
-  if (!scoped_flock.Init(oat_location, &error_msg)) {
-    error_msgs->push_back(error_msg);
+  std::unique_ptr<const DexFile> dex_file(oat_dex_file->OpenDexFile(error_msg));
+  if (dex_file.get() != nullptr) {
+    return oat_file.release();
+  } else {
     return nullptr;
   }
+}
 
-  // Check if we already have an up-to-date output file
-  const DexFile* dex_file = FindDexFileInOatLocation(dex_location, dex_location_checksum,
-                                                     oat_location, &error_msg);
-  if (dex_file != nullptr) {
-    return dex_file;
-  }
-  std::string compound_msg = StringPrintf("Failed to find dex file '%s' in oat location '%s': %s",
-                                          dex_location, oat_location, error_msg.c_str());
-  VLOG(class_linker) << compound_msg;
-  error_msgs->push_back(compound_msg);
-
+const OatFile* ClassLinker::CreateOatFileForDexLocation(const char* dex_location,
+                                                        int fd, const char* oat_location,
+                                                        std::vector<std::string>* error_msgs) {
   // Generate the output oat file for the dex file
   VLOG(class_linker) << "Generating oat file " << oat_location << " for " << dex_location;
-  if (!GenerateOatFile(dex_location, scoped_flock.GetFile()->Fd(), oat_location, &error_msg)) {
+  std::string error_msg;
+  if (!GenerateOatFile(dex_location, fd, oat_location, &error_msg)) {
     CHECK(!error_msg.empty());
     error_msgs->push_back(error_msg);
     return nullptr;
@@ -745,27 +948,13 @@
                                             !Runtime::Current()->IsCompiler(),
                                             &error_msg));
   if (oat_file.get() == nullptr) {
-    compound_msg = StringPrintf("\nFailed to open generated oat file '%s': %s",
-                                oat_location, error_msg.c_str());
+    std::string compound_msg = StringPrintf("\nFailed to open generated oat file '%s': %s",
+                                            oat_location, error_msg.c_str());
     error_msgs->push_back(compound_msg);
     return nullptr;
   }
-  const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location,
-                                                                    &dex_location_checksum);
-  if (oat_dex_file == nullptr) {
-    error_msg = StringPrintf("\nFailed to find dex file '%s' (checksum 0x%x) in generated out file "
-                             "'%s'", dex_location, dex_location_checksum, oat_location);
-    error_msgs->push_back(error_msg);
-    return nullptr;
-  }
-  const DexFile* result = oat_dex_file->OpenDexFile(&error_msg);
-  CHECK(result != nullptr) << error_msgs << ", " << error_msg;
-  CHECK_EQ(dex_location_checksum, result->GetLocationChecksum())
-          << "dex_location=" << dex_location << " oat_location=" << oat_location << std::hex
-          << " dex_location_checksum=" << dex_location_checksum
-          << " DexFile::GetLocationChecksum()=" << result->GetLocationChecksum();
-  RegisterOatFile(oat_file.release());
-  return result;
+
+  return oat_file.release();
 }
 
 bool ClassLinker::VerifyOatFileChecksums(const OatFile* oat_file,
@@ -832,17 +1021,17 @@
   return false;
 }
 
-const DexFile* ClassLinker::VerifyAndOpenDexFileFromOatFile(const std::string& oat_file_location,
-                                                            const char* dex_location,
-                                                            std::string* error_msg,
-                                                            bool* open_failed) {
+const OatFile* ClassLinker::LoadOatFileAndVerifyDexFile(const std::string& oat_file_location,
+                                                        const char* dex_location,
+                                                        std::string* error_msg,
+                                                        bool* open_failed) {
   std::unique_ptr<const OatFile> oat_file(FindOatFileFromOatLocation(oat_file_location, error_msg));
   if (oat_file.get() == nullptr) {
     *open_failed = true;
     return nullptr;
   }
   *open_failed = false;
-  const DexFile* dex_file = nullptr;
+  std::unique_ptr<const DexFile> dex_file;
   uint32_t dex_location_checksum;
   if (!DexFile::GetChecksum(dex_location, &dex_location_checksum, error_msg)) {
     // If no classes.dex found in dex_location, it has been stripped or is corrupt, assume oat is
@@ -855,49 +1044,38 @@
                                 error_msg->c_str());
       return nullptr;
     }
-    dex_file = oat_dex_file->OpenDexFile(error_msg);
+    dex_file.reset(oat_dex_file->OpenDexFile(error_msg));
   } else {
     bool verified = VerifyOatFileChecksums(oat_file.get(), dex_location, dex_location_checksum,
                                            kRuntimeISA, error_msg);
     if (!verified) {
       return nullptr;
     }
-    dex_file = oat_file->GetOatDexFile(dex_location,
-                                       &dex_location_checksum)->OpenDexFile(error_msg);
+    dex_file.reset(oat_file->GetOatDexFile(dex_location,
+                                           &dex_location_checksum)->OpenDexFile(error_msg));
   }
-  if (dex_file != nullptr) {
-    RegisterOatFile(oat_file.release());
+
+  if (dex_file.get() != nullptr) {
+    return oat_file.release();
+  } else {
+    return nullptr;
   }
-  return dex_file;
 }
 
-const DexFile* ClassLinker::FindDexFileInOatFileFromDexLocation(
+const OatFile* ClassLinker::FindOatFileContainingDexFileFromDexLocation(
     const char* dex_location,
     const uint32_t* const dex_location_checksum,
     InstructionSet isa,
     std::vector<std::string>* error_msgs) {
-  const OatFile* open_oat_file = FindOpenedOatFileFromDexLocation(dex_location,
-                                                                  dex_location_checksum);
-  if (open_oat_file != nullptr) {
-    const OatFile::OatDexFile* oat_dex_file = open_oat_file->GetOatDexFile(dex_location,
-                                                                           dex_location_checksum);
-    std::string error_msg;
-    const DexFile* ret = oat_dex_file->OpenDexFile(&error_msg);
-    if (ret == nullptr) {
-      error_msgs->push_back(error_msg);
-    }
-    return ret;
-  }
-
   // Look for an existing file next to dex. for example, for
   // /foo/bar/baz.jar, look for /foo/bar/<isa>/baz.odex.
   std::string odex_filename(DexFilenameToOdexFilename(dex_location, isa));
   bool open_failed;
   std::string error_msg;
-  const DexFile* dex_file = VerifyAndOpenDexFileFromOatFile(odex_filename, dex_location,
-                                                            &error_msg, &open_failed);
-  if (dex_file != nullptr) {
-    return dex_file;
+  const OatFile* oat_file = LoadOatFileAndVerifyDexFile(odex_filename, dex_location, &error_msg,
+                                                        &open_failed);
+  if (oat_file != nullptr) {
+    return oat_file;
   }
   if (dex_location_checksum == nullptr) {
     error_msgs->push_back(StringPrintf("Failed to open oat file from %s and no classes.dex found in"
@@ -910,10 +1088,10 @@
   const std::string dalvik_cache(GetDalvikCacheOrDie(GetInstructionSetString(kRuntimeISA)));
   std::string cache_location(GetDalvikCacheFilenameOrDie(dex_location,
                                                          dalvik_cache.c_str()));
-  dex_file = VerifyAndOpenDexFileFromOatFile(cache_location, dex_location, &cache_error_msg,
-                                             &open_failed);
-  if (dex_file != nullptr) {
-    return dex_file;
+  oat_file = LoadOatFileAndVerifyDexFile(cache_location, dex_location, &cache_error_msg,
+                                         &open_failed);
+  if (oat_file != nullptr) {
+    return oat_file;
   }
   if (!open_failed && TEMP_FAILURE_RETRY(unlink(cache_location.c_str())) != 0) {
     PLOG(FATAL) << "Failed to remove obsolete oat file from " << cache_location;
@@ -924,9 +1102,7 @@
   VLOG(class_linker) << compound_msg;
   error_msgs->push_back(compound_msg);
 
-  // Try to generate oat file if it wasn't found or was obsolete.
-  return FindOrCreateOatFileForDexLocation(dex_location, *dex_location_checksum,
-                                           cache_location.c_str(), error_msgs);
+  return nullptr;
 }
 
 const OatFile* ClassLinker::FindOpenedOatFileFromOatLocation(const std::string& oat_location) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 7d7bf15..60dad7b 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -273,23 +273,12 @@
                                             std::string* error_msg)
       LOCKS_EXCLUDED(dex_lock_);
 
-  // Finds the oat file for a dex location, generating the oat file if
-  // it is missing or out of date. Returns the DexFile from within the
-  // created oat file.
-  const DexFile* FindOrCreateOatFileForDexLocation(const char* dex_location,
-                                                   uint32_t dex_location_checksum,
-                                                   const char* oat_location,
-                                                   std::vector<std::string>* error_msgs)
+  // Find or create the oat file holding dex_location. Then load all corresponding dex files
+  // (if multidex) into the given vector.
+  bool OpenDexFilesFromOat(const char* dex_location, const char* oat_location,
+                           std::vector<std::string>* error_msgs,
+                           std::vector<const DexFile*>* dex_files)
       LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
-  // Find a DexFile within an OatFile given a DexFile location. Note
-  // that this returns null if the location checksum of the DexFile
-  // does not match the OatFile.
-  const DexFile* FindDexFileInOatFileFromDexLocation(const char* location,
-                                                     const uint32_t* const location_checksum,
-                                                     InstructionSet isa,
-                                                     std::vector<std::string>* error_msgs)
-      LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
-
 
   // Returns true if oat file contains the dex file with the given location and checksum.
   static bool VerifyOatFileChecksums(const OatFile* oat_file,
@@ -545,21 +534,47 @@
   const OatFile* FindOpenedOatFileForDexFile(const DexFile& dex_file)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  const OatFile* FindOpenedOatFileFromDexLocation(const char* dex_location,
-                                                  const uint32_t* const dex_location_checksum)
+
+  // Find an opened oat file that contains dex_location. If oat_location is not nullptr, the file
+  // must have that location, else any oat location is accepted.
+  const OatFile* FindOpenedOatFile(const char* oat_location, const char* dex_location,
+                                   const uint32_t* const dex_location_checksum)
       LOCKS_EXCLUDED(dex_lock_);
   const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location)
       LOCKS_EXCLUDED(dex_lock_);
-  const DexFile* FindDexFileInOatLocation(const char* dex_location,
-                                          uint32_t dex_location_checksum,
-                                          const char* oat_location,
-                                          std::string* error_msg)
+
+  // Note: will not register the oat file.
+  const OatFile* FindOatFileInOatLocationForDexFile(const char* dex_location,
+                                                    uint32_t dex_location_checksum,
+                                                    const char* oat_location,
+                                                    std::string* error_msg)
       LOCKS_EXCLUDED(dex_lock_);
 
-  const DexFile* VerifyAndOpenDexFileFromOatFile(const std::string& oat_file_location,
-                                                 const char* dex_location,
-                                                 std::string* error_msg,
-                                                 bool* open_failed)
+  // Creates the oat file from the dex_location to the oat_location. Needs a file descriptor for
+  // the file to be written, which is assumed to be under a lock.
+  const OatFile* CreateOatFileForDexLocation(const char* dex_location,
+                                             int fd, const char* oat_location,
+                                             std::vector<std::string>* error_msgs)
+      LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
+
+  // Finds an OatFile that contains a DexFile for the given a DexFile location.
+  //
+  // Note 1: this will not check open oat files, which are assumed to be stale when this is run.
+  // Note 2: Does not register the oat file. It is the caller's job to register if the file is to
+  //         be kept.
+  const OatFile* FindOatFileContainingDexFileFromDexLocation(const char* location,
+                                                             const uint32_t* const location_checksum,
+                                                             InstructionSet isa,
+                                                             std::vector<std::string>* error_msgs)
+      LOCKS_EXCLUDED(dex_lock_, Locks::mutator_lock_);
+
+  // Find a verify an oat file with the given dex file. Will return nullptr when the oat file
+  // was not found or the dex file could not be verified.
+  // Note: Does not register the oat file.
+  const OatFile* LoadOatFileAndVerifyDexFile(const std::string& oat_file_location,
+                                             const char* dex_location,
+                                             std::string* error_msg,
+                                             bool* open_failed)
       LOCKS_EXCLUDED(dex_lock_);
 
   mirror::ArtMethod* CreateProxyConstructor(Thread* self, Handle<mirror::Class> klass,
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 044d08b..fdbc9c2 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -114,32 +114,42 @@
  public:
   static void SetEnvironmentVariables(std::string& android_data) {
     if (IsHost()) {
-      // $ANDROID_ROOT is set on the device, but not on the host.
-      // We need to set this so that icu4c can find its locale data.
-      std::string root;
-      const char* android_build_top = getenv("ANDROID_BUILD_TOP");
-      if (android_build_top != nullptr) {
-        root += android_build_top;
-      } else {
-        // Not set by build server, so default to current directory
-        char* cwd = getcwd(nullptr, 0);
-        setenv("ANDROID_BUILD_TOP", cwd, 1);
-        root += cwd;
-        free(cwd);
-      }
+      // $ANDROID_ROOT is set on the device, but not necessarily on the host.
+      // But it needs to be set so that icu4c can find its locale data.
+      const char* android_root_from_env = getenv("ANDROID_ROOT");
+      if (android_root_from_env == nullptr) {
+        // Use ANDROID_HOST_OUT for ANDROID_ROOT if it is set.
+        const char* android_host_out = getenv("ANDROID_HOST_OUT");
+        if (android_host_out != nullptr) {
+          setenv("ANDROID_ROOT", android_host_out, 1);
+        } else {
+          // Build it from ANDROID_BUILD_TOP or cwd
+          std::string root;
+          const char* android_build_top = getenv("ANDROID_BUILD_TOP");
+          if (android_build_top != nullptr) {
+            root += android_build_top;
+          } else {
+            // Not set by build server, so default to current directory
+            char* cwd = getcwd(nullptr, 0);
+            setenv("ANDROID_BUILD_TOP", cwd, 1);
+            root += cwd;
+            free(cwd);
+          }
 #if defined(__linux__)
-      root += "/out/host/linux-x86";
+          root += "/out/host/linux-x86";
 #elif defined(__APPLE__)
-      root += "/out/host/darwin-x86";
+          root += "/out/host/darwin-x86";
 #else
 #error unsupported OS
 #endif
-      setenv("ANDROID_ROOT", root.c_str(), 1);
+          setenv("ANDROID_ROOT", root.c_str(), 1);
+        }
+      }
       setenv("LD_LIBRARY_PATH", ":", 0);  // Required by java.lang.System.<clinit>.
 
       // Not set by build server, so default
       if (getenv("ANDROID_HOST_OUT") == nullptr) {
-        setenv("ANDROID_HOST_OUT", root.c_str(), 1);
+        setenv("ANDROID_HOST_OUT", getenv("ANDROID_ROOT"), 1);
       }
     }
 
@@ -156,6 +166,18 @@
     return !kIsTargetBuild;
   }
 
+  const DexFile* LoadExpectSingleDexFile(const char* location) {
+    std::vector<const DexFile*> dex_files;
+    std::string error_msg;
+    if (!DexFile::Open(location, location, &error_msg, &dex_files)) {
+      LOG(FATAL) << "Could not open .dex file '" << location << "': " << error_msg << "\n";
+      return nullptr;
+    } else {
+      CHECK_EQ(1U, dex_files.size()) << "Expected only one dex file in " << location;
+      return dex_files[0];
+    }
+  }
+
   virtual void SetUp() {
     SetEnvironmentVariables(android_data_);
     dalvik_cache_.append(android_data_.c_str());
@@ -164,12 +186,7 @@
     ASSERT_EQ(mkdir_result, 0);
 
     std::string error_msg;
-    java_lang_dex_file_ = DexFile::Open(GetLibCoreDexFileName().c_str(),
-                                        GetLibCoreDexFileName().c_str(), &error_msg);
-    if (java_lang_dex_file_ == nullptr) {
-      LOG(FATAL) << "Could not open .dex file '" << GetLibCoreDexFileName() << "': "
-          << error_msg << "\n";
-    }
+    java_lang_dex_file_ = LoadExpectSingleDexFile(GetLibCoreDexFileName().c_str());
     boot_class_path_.push_back(java_lang_dex_file_);
 
     std::string min_heap_string(StringPrintf("-Xms%zdm", gc::Heap::kDefaultInitialSize / MB));
@@ -233,7 +250,7 @@
     // There's a function to clear the array, but it's not public...
     typedef void (*IcuCleanupFn)();
     void* sym = dlsym(RTLD_DEFAULT, "u_cleanup_" U_ICU_VERSION_SHORT);
-    CHECK(sym != nullptr);
+    CHECK(sym != nullptr) << dlerror();
     IcuCleanupFn icu_cleanup_fn = reinterpret_cast<IcuCleanupFn>(sym);
     (*icu_cleanup_fn)();
 
@@ -264,7 +281,8 @@
     return GetAndroidRoot();
   }
 
-  const DexFile* OpenTestDexFile(const char* name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  std::vector<const DexFile*> OpenTestDexFiles(const char* name)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(name != nullptr);
     std::string filename;
     if (IsHost()) {
@@ -277,26 +295,36 @@
     filename += name;
     filename += ".jar";
     std::string error_msg;
-    const DexFile* dex_file = DexFile::Open(filename.c_str(), filename.c_str(), &error_msg);
-    CHECK(dex_file != nullptr) << "Failed to open '" << filename << "': " << error_msg;
-    CHECK_EQ(PROT_READ, dex_file->GetPermissions());
-    CHECK(dex_file->IsReadOnly());
-    opened_dex_files_.push_back(dex_file);
-    return dex_file;
+    std::vector<const DexFile*> dex_files;
+    bool success = DexFile::Open(filename.c_str(), filename.c_str(), &error_msg, &dex_files);
+    CHECK(success) << "Failed to open '" << filename << "': " << error_msg;
+    for (const DexFile* dex_file : dex_files) {
+      CHECK_EQ(PROT_READ, dex_file->GetPermissions());
+      CHECK(dex_file->IsReadOnly());
+    }
+    opened_dex_files_.insert(opened_dex_files_.end(), dex_files.begin(), dex_files.end());
+    return dex_files;
+  }
+
+  const DexFile* OpenTestDexFile(const char* name)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    std::vector<const DexFile*> vector = OpenTestDexFiles(name);
+    EXPECT_EQ(1U, vector.size());
+    return vector[0];
   }
 
   jobject LoadDex(const char* dex_name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile* dex_file = OpenTestDexFile(dex_name);
-    CHECK(dex_file != nullptr);
-    class_linker_->RegisterDexFile(*dex_file);
-    std::vector<const DexFile*> class_path;
-    class_path.push_back(dex_file);
+    std::vector<const DexFile*> dex_files = OpenTestDexFiles(dex_name);
+    CHECK_NE(0U, dex_files.size());
+    for (const DexFile* dex_file : dex_files) {
+      class_linker_->RegisterDexFile(*dex_file);
+    }
     ScopedObjectAccessUnchecked soa(Thread::Current());
     ScopedLocalRef<jobject> class_loader_local(soa.Env(),
         soa.Env()->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader));
     jobject class_loader = soa.Env()->NewGlobalRef(class_loader_local.get());
     soa.Self()->SetClassLoaderOverride(soa.Decode<mirror::ClassLoader*>(class_loader_local.get()));
-    Runtime::Current()->SetCompileTimeClassPath(class_loader, class_path);
+    Runtime::Current()->SetCompileTimeClassPath(class_loader, dex_files);
     return class_loader;
   }
 
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 10f34d9..e5bc7c8 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -87,7 +87,21 @@
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
   CHECK(checksum != NULL);
   uint32_t magic;
-  ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
+
+  // Strip ":...", which is the location
+  const char* zip_entry_name = kClassesDex;
+  const char* file_part = filename;
+  std::unique_ptr<const char> file_part_ptr;
+
+
+  if (IsMultiDexLocation(filename)) {
+    std::pair<const char*, const char*> pair = SplitMultiDexLocation(filename);
+    file_part_ptr.reset(pair.first);
+    file_part = pair.first;
+    zip_entry_name = pair.second;
+  }
+
+  ScopedFd fd(OpenAndReadMagic(file_part, &magic, error_msg));
   if (fd.get() == -1) {
     DCHECK(!error_msg->empty());
     return false;
@@ -95,13 +109,13 @@
   if (IsZipMagic(magic)) {
     std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd.release(), filename, error_msg));
     if (zip_archive.get() == NULL) {
-      *error_msg = StringPrintf("Failed to open zip archive '%s'", filename);
+      *error_msg = StringPrintf("Failed to open zip archive '%s'", file_part);
       return false;
     }
-    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(kClassesDex, error_msg));
+    std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(zip_entry_name, error_msg));
     if (zip_entry.get() == NULL) {
-      *error_msg = StringPrintf("Zip archive '%s' doesn't contain %s (error msg: %s)", filename,
-                                kClassesDex, error_msg->c_str());
+      *error_msg = StringPrintf("Zip archive '%s' doesn't contain %s (error msg: %s)", file_part,
+                                zip_entry_name, error_msg->c_str());
       return false;
     }
     *checksum = zip_entry->GetCrc32();
@@ -119,23 +133,29 @@
   return false;
 }
 
-const DexFile* DexFile::Open(const char* filename,
-                             const char* location,
-                             std::string* error_msg) {
+bool DexFile::Open(const char* filename, const char* location, std::string* error_msg,
+                   std::vector<const DexFile*>* dex_files) {
   uint32_t magic;
   ScopedFd fd(OpenAndReadMagic(filename, &magic, error_msg));
   if (fd.get() == -1) {
     DCHECK(!error_msg->empty());
-    return NULL;
+    return false;
   }
   if (IsZipMagic(magic)) {
-    return DexFile::OpenZip(fd.release(), location, error_msg);
+    return DexFile::OpenZip(fd.release(), location, error_msg, dex_files);
   }
   if (IsDexMagic(magic)) {
-    return DexFile::OpenFile(fd.release(), location, true, error_msg);
+    std::unique_ptr<const DexFile> dex_file(DexFile::OpenFile(fd.release(), location, true,
+                                                              error_msg));
+    if (dex_file.get() != nullptr) {
+      dex_files->push_back(dex_file.release());
+      return true;
+    } else {
+      return false;
+    }
   }
   *error_msg = StringPrintf("Expected valid zip or dex file: '%s'", filename);
-  return nullptr;
+  return false;
 }
 
 int DexFile::GetPermissions() const {
@@ -217,13 +237,14 @@
 
 const char* DexFile::kClassesDex = "classes.dex";
 
-const DexFile* DexFile::OpenZip(int fd, const std::string& location, std::string* error_msg) {
+bool DexFile::OpenZip(int fd, const std::string& location, std::string* error_msg,
+                      std::vector<const  DexFile*>* dex_files) {
   std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(fd, location.c_str(), error_msg));
   if (zip_archive.get() == nullptr) {
     DCHECK(!error_msg->empty());
-    return nullptr;
+    return false;
   }
-  return DexFile::Open(*zip_archive, location, error_msg);
+  return DexFile::OpenFromZip(*zip_archive, location, error_msg, dex_files);
 }
 
 const DexFile* DexFile::OpenMemory(const std::string& location,
@@ -238,17 +259,20 @@
                     error_msg);
 }
 
-const DexFile* DexFile::Open(const ZipArchive& zip_archive, const std::string& location,
-                             std::string* error_msg) {
+const DexFile* DexFile::Open(const ZipArchive& zip_archive, const char* entry_name,
+                             const std::string& location, std::string* error_msg,
+                             ZipOpenErrorCode* error_code) {
   CHECK(!location.empty());
-  std::unique_ptr<ZipEntry> zip_entry(zip_archive.Find(kClassesDex, error_msg));
+  std::unique_ptr<ZipEntry> zip_entry(zip_archive.Find(entry_name, error_msg));
   if (zip_entry.get() == NULL) {
+    *error_code = ZipOpenErrorCode::kEntryNotFound;
     return nullptr;
   }
-  std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(location.c_str(), kClassesDex, error_msg));
+  std::unique_ptr<MemMap> map(zip_entry->ExtractToMemMap(location.c_str(), entry_name, error_msg));
   if (map.get() == NULL) {
-    *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", kClassesDex, location.c_str(),
+    *error_msg = StringPrintf("Failed to extract '%s' from '%s': %s", entry_name, location.c_str(),
                               error_msg->c_str());
+    *error_code = ZipOpenErrorCode::kExtractToMemoryError;
     return nullptr;
   }
   std::unique_ptr<const DexFile> dex_file(OpenMemory(location, zip_entry->GetCrc32(), map.release(),
@@ -256,20 +280,63 @@
   if (dex_file.get() == nullptr) {
     *error_msg = StringPrintf("Failed to open dex file '%s' from memory: %s", location.c_str(),
                               error_msg->c_str());
+    *error_code = ZipOpenErrorCode::kDexFileError;
     return nullptr;
   }
   if (!dex_file->DisableWrite()) {
     *error_msg = StringPrintf("Failed to make dex file '%s' read only", location.c_str());
+    *error_code = ZipOpenErrorCode::kMakeReadOnlyError;
     return nullptr;
   }
   CHECK(dex_file->IsReadOnly()) << location;
   if (!DexFileVerifier::Verify(dex_file.get(), dex_file->Begin(), dex_file->Size(),
                                location.c_str(), error_msg)) {
+    *error_code = ZipOpenErrorCode::kVerifyError;
     return nullptr;
   }
+  *error_code = ZipOpenErrorCode::kNoError;
   return dex_file.release();
 }
 
+bool DexFile::OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
+                          std::string* error_msg, std::vector<const DexFile*>* dex_files) {
+  ZipOpenErrorCode error_code;
+  std::unique_ptr<const DexFile> dex_file(Open(zip_archive, kClassesDex, location, error_msg,
+                                               &error_code));
+  if (dex_file.get() == nullptr) {
+    return false;
+  } else {
+    // Had at least classes.dex.
+    dex_files->push_back(dex_file.release());
+
+    // Now try some more.
+    size_t i = 2;
+
+    // We could try to avoid std::string allocations by working on a char array directly. As we
+    // do not expect a lot of iterations, this seems too involved and brittle.
+
+    while (i < 100) {
+      std::string name = StringPrintf("classes%zu.dex", i);
+      std::string fake_location = location + ":" + name;
+      std::unique_ptr<const DexFile> next_dex_file(Open(zip_archive, name.c_str(), fake_location,
+                                                        error_msg, &error_code));
+      if (next_dex_file.get() == nullptr) {
+        if (error_code != ZipOpenErrorCode::kEntryNotFound) {
+          LOG(WARNING) << error_msg;
+        }
+        break;
+      } else {
+        dex_files->push_back(next_dex_file.release());
+      }
+
+      i++;
+    }
+
+    return true;
+  }
+}
+
+
 const DexFile* DexFile::OpenMemory(const byte* base,
                                    size_t size,
                                    const std::string& location,
@@ -865,6 +932,25 @@
   }
 }
 
+bool DexFile::IsMultiDexLocation(const char* location) {
+  return strrchr(location, kMultiDexSeparator) != nullptr;
+}
+
+std::pair<const char*, const char*> DexFile::SplitMultiDexLocation(
+    const char* location) {
+  const char* colon_ptr = strrchr(location, kMultiDexSeparator);
+
+  // Check it's synthetic.
+  CHECK_NE(colon_ptr, static_cast<const char*>(nullptr));
+
+  size_t colon_index = colon_ptr - location;
+  char* tmp = new char[colon_index + 1];
+  strncpy(tmp, location, colon_index);
+  tmp[colon_index] = 0;
+
+  return std::make_pair(tmp, colon_ptr + 1);
+}
+
 std::ostream& operator<<(std::ostream& os, const DexFile& dex_file) {
   os << StringPrintf("[DexFile: %s dex-checksum=%08x location-checksum=%08x %p-%p]",
                      dex_file.GetLocation().c_str(),
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 8270a2b..04f1cc1 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -63,6 +63,13 @@
   // The value of an invalid index.
   static const uint16_t kDexNoIndex16 = 0xFFFF;
 
+  // The separator charactor in MultiDex locations.
+  static constexpr char kMultiDexSeparator = ':';
+
+  // A string version of the previous. This is a define so that we can merge string literals in the
+  // preprocessor.
+  #define kMultiDexSeparatorString ":"
+
   // Raw header_item.
   struct Header {
     uint8_t magic_[8];
@@ -352,8 +359,9 @@
   // Return true if the checksum could be found, false otherwise.
   static bool GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg);
 
-  // Opens .dex file, guessing the container format based on file extension
-  static const DexFile* Open(const char* filename, const char* location, std::string* error_msg);
+  // Opens .dex files found in the container, guessing the container format based on file extension.
+  static bool Open(const char* filename, const char* location, std::string* error_msg,
+                   std::vector<const DexFile*>* dex_files);
 
   // Opens .dex file, backed by existing memory
   static const DexFile* Open(const uint8_t* base, size_t size,
@@ -363,9 +371,9 @@
     return OpenMemory(base, size, location, location_checksum, NULL, error_msg);
   }
 
-  // Opens .dex file from the classes.dex in a zip archive
-  static const DexFile* Open(const ZipArchive& zip_archive, const std::string& location,
-                             std::string* error_msg);
+  // Open all classesXXX.dex files from a zip archive.
+  static bool OpenFromZip(const ZipArchive& zip_archive, const std::string& location,
+                          std::string* error_msg, std::vector<const DexFile*>* dex_files);
 
   // Closes a .dex file.
   virtual ~DexFile();
@@ -823,8 +831,24 @@
   // Opens a .dex file
   static const DexFile* OpenFile(int fd, const char* location, bool verify, std::string* error_msg);
 
-  // Opens a dex file from within a .jar, .zip, or .apk file
-  static const DexFile* OpenZip(int fd, const std::string& location, std::string* error_msg);
+  // Opens dex files from within a .jar, .zip, or .apk file
+  static bool OpenZip(int fd, const std::string& location, std::string* error_msg,
+                      std::vector<const DexFile*>* dex_files);
+
+  enum class ZipOpenErrorCode {  // private
+    kNoError,
+    kEntryNotFound,
+    kExtractToMemoryError,
+    kDexFileError,
+    kMakeReadOnlyError,
+    kVerifyError
+  };
+
+  // Opens .dex file from the entry_name in a zip archive. error_code is undefined when non-nullptr
+  // return.
+  static const DexFile* Open(const ZipArchive& zip_archive, const char* entry_name,
+                             const std::string& location, std::string* error_msg,
+                             ZipOpenErrorCode* error_code);
 
   // Opens a .dex file at the given address backed by a MemMap
   static const DexFile* OpenMemory(const std::string& location,
@@ -855,6 +879,18 @@
       DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
       void* context, const byte* stream, LocalInfo* local_in_reg) const;
 
+  // Check whether a location denotes a multidex dex file. This is a very simple check: returns
+  // whether the string contains the separator character.
+  static bool IsMultiDexLocation(const char* location);
+
+  // Splits a multidex location at the last separator character. The second component is a pointer
+  // to the character after the separator. The first is a copy of the substring up to the separator.
+  //
+  // Note: It's the caller's job to free the first component of the returned pair.
+  // Bug 15313523: gcc/libc++ don't allow a unique_ptr for the first component
+  static std::pair<const char*, const char*> SplitMultiDexLocation(const char* location);
+
+
   // The base address of the memory mapping.
   const byte* const begin_;
 
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index a814c34..c1e00fc 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -146,8 +146,11 @@
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
   std::string error_msg;
-  const DexFile* dex_file = DexFile::Open(location, location, &error_msg);
-  CHECK(dex_file != nullptr) << error_msg;
+  std::vector<const DexFile*> tmp;
+  bool success = DexFile::Open(location, location, &error_msg, &tmp);
+  CHECK(success) << error_msg;
+  EXPECT_EQ(1U, tmp.size());
+  const DexFile* dex_file = tmp[0];
   EXPECT_EQ(PROT_READ, dex_file->GetPermissions());
   EXPECT_TRUE(dex_file->IsReadOnly());
   return dex_file;
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index d0ce00f..93faeae 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -115,7 +115,14 @@
 
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
-  return DexFile::Open(location, location, error_msg);
+  std::vector<const DexFile*> tmp;
+  bool success = DexFile::Open(location, location, error_msg, &tmp);
+  CHECK(success) << error_msg;
+  EXPECT_EQ(1U, tmp.size());
+  const DexFile* dex_file = tmp[0];
+  EXPECT_EQ(PROT_READ, dex_file->GetPermissions());
+  EXPECT_TRUE(dex_file->IsReadOnly());
+  return dex_file;
 }
 
 
@@ -170,7 +177,15 @@
 
   // read dex file
   ScopedObjectAccess soa(Thread::Current());
-  return DexFile::Open(location, location, error_msg);
+  std::vector<const DexFile*> tmp;
+  if (!DexFile::Open(location, location, error_msg, &tmp)) {
+    return nullptr;
+  }
+  EXPECT_EQ(1U, tmp.size());
+  const DexFile* dex_file = tmp[0];
+  EXPECT_EQ(PROT_READ, dex_file->GetPermissions());
+  EXPECT_TRUE(dex_file->IsReadOnly());
+  return dex_file;
 }
 
 static bool ModifyAndLoad(const char* location, size_t offset, uint8_t new_val,
diff --git a/runtime/dex_method_iterator_test.cc b/runtime/dex_method_iterator_test.cc
index 5e2d89e..0d00cc3 100644
--- a/runtime/dex_method_iterator_test.cc
+++ b/runtime/dex_method_iterator_test.cc
@@ -21,26 +21,15 @@
 namespace art {
 
 class DexMethodIteratorTest : public CommonRuntimeTest {
- public:
-  const DexFile* OpenDexFile(const std::string& partial_filename) {
-    std::string dfn = GetDexFileName(partial_filename);
-    std::string error_msg;
-    const DexFile* dexfile = DexFile::Open(dfn.c_str(), dfn.c_str(), &error_msg);
-    if (dexfile == nullptr) {
-      LG << "Failed to open '" << dfn << "': " << error_msg;
-    }
-    return dexfile;
-  }
 };
 
 TEST_F(DexMethodIteratorTest, Basic) {
   ScopedObjectAccess soa(Thread::Current());
   std::vector<const DexFile*> dex_files;
-  dex_files.push_back(OpenDexFile("core-libart"));
-  dex_files.push_back(OpenDexFile("conscrypt"));
-  dex_files.push_back(OpenDexFile("okhttp"));
-  dex_files.push_back(OpenDexFile("core-junit"));
-  dex_files.push_back(OpenDexFile("bouncycastle"));
+  const char* jars[] = { "core-libart", "conscrypt", "okhttp", "core-junit", "bouncycastle" };
+  for (size_t i = 0; i < 5; ++i) {
+    dex_files.push_back(LoadExpectSingleDexFile(GetDexFileName(jars[i]).c_str()));
+  }
   DexMethodIterator it(dex_files);
   while (it.HasNext()) {
     const DexFile& dex_file = it.GetDexFile();
diff --git a/runtime/dwarf.h b/runtime/dwarf.h
new file mode 100644
index 0000000..370ad95
--- /dev/null
+++ b/runtime/dwarf.h
@@ -0,0 +1,662 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DWARF_H_
+#define ART_RUNTIME_DWARF_H_
+
+namespace art {
+
+// Based on the Dwarf 4 specification at dwarfstd.com and issues marked
+// for inclusion in Dwarf 5 on same. Values not specified in the Dwarf 4
+// standard might change or be removed in the future and may be different
+// than the values used currently by other implementations for the same trait,
+// use at your own risk.
+
+enum Tag {
+  DW_TAG_array_type = 0x01,
+  DW_TAG_class_type = 0x02,
+  DW_TAG_entry_point = 0x03,
+  DW_TAG_enumeration_type = 0x04,
+  DW_TAG_formal_parameter = 0x05,
+  DW_TAG_imported_declaration = 0x08,
+  DW_TAG_label = 0x0a,
+  DW_TAG_lexical_block = 0x0b,
+  DW_TAG_member = 0x0d,
+  DW_TAG_pointer_type = 0x0f,
+  DW_TAG_reference_type = 0x10,
+  DW_TAG_compile_unit = 0x11,
+  DW_TAG_string_type = 0x12,
+  DW_TAG_structure_type = 0x13,
+  DW_TAG_subroutine_type = 0x15,
+  DW_TAG_typedef = 0x16,
+  DW_TAG_union_type = 0x17,
+  DW_TAG_unspecified_parameters = 0x18,
+  DW_TAG_variant = 0x19,
+  DW_TAG_common_block = 0x1a,
+  DW_TAG_common_inclusion = 0x1b,
+  DW_TAG_inheritance = 0x1c,
+  DW_TAG_inlined_subroutine = 0x1d,
+  DW_TAG_module = 0x1e,
+  DW_TAG_ptr_to_member_type = 0x1f,
+  DW_TAG_set_type = 0x20,
+  DW_TAG_subrange_type = 0x21,
+  DW_TAG_with_stmt = 0x22,
+  DW_TAG_access_declaration = 0x23,
+  DW_TAG_base_type = 0x24,
+  DW_TAG_catch_block = 0x25,
+  DW_TAG_const_type = 0x26,
+  DW_TAG_constant = 0x27,
+  DW_TAG_enumerator = 0x28,
+  DW_TAG_file_type = 0x29,
+  DW_TAG_friend = 0x2a,
+  DW_TAG_namelist = 0x2b,
+  DW_TAG_namelist_item = 0x2c,
+  DW_TAG_packed_type = 0x2d,
+  DW_TAG_subprogram = 0x2e,
+  DW_TAG_template_type_parameter = 0x2f,
+  DW_TAG_template_value_parameter = 0x30,
+  DW_TAG_thrown_type = 0x31,
+  DW_TAG_try_block = 0x32,
+  DW_TAG_variant_part = 0x33,
+  DW_TAG_variable = 0x34,
+  DW_TAG_volatile_type = 0x35,
+  DW_TAG_dwarf_procedure = 0x36,
+  DW_TAG_restrict_type = 0x37,
+  DW_TAG_interface_type = 0x38,
+  DW_TAG_namespace = 0x39,
+  DW_TAG_imported_module = 0x3a,
+  DW_TAG_unspecified_type = 0x3b,
+  DW_TAG_partial_unit = 0x3c,
+  DW_TAG_imported_unit = 0x3d,
+  DW_TAG_condition = 0x3f,
+  DW_TAG_shared_type = 0x40,
+  DW_TAG_type_unit = 0x41,
+  DW_TAG_rvalue_reference_type = 0x42,
+  DW_TAG_template_alias = 0x43,
+#ifdef INCLUDE_DWARF5_VALUES
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_TAG_coarray_type = 0x44,
+  DW_TAG_call_site = 0x45,
+  DW_TAG_call_site_parameter = 0x46,
+  DW_TAG_generic_subrange = 0x47,
+  DW_TAG_atomic_type = 0x48,
+  DW_TAG_dynamic_type = 0x49,
+  DW_TAG_aligned_type = 0x50,
+#endif
+  DW_TAG_lo_user = 0x4080,
+  DW_TAG_hi_user = 0xffff
+};
+
+enum Children : uint8_t {
+  DW_CHILDREN_no = 0x00,
+  DW_CHILDREN_yes = 0x01
+};
+
+enum Attribute {
+  DW_AT_sibling = 0x01,
+  DW_AT_location = 0x02,
+  DW_AT_name = 0x03,
+  DW_AT_ordering = 0x09,
+  DW_AT_byte_size = 0x0b,
+  DW_AT_bit_offset = 0x0c,
+  DW_AT_bit_size = 0x0d,
+  DW_AT_stmt_list = 0x10,
+  DW_AT_low_pc = 0x11,
+  DW_AT_high_pc = 0x12,
+  DW_AT_language = 0x13,
+  DW_AT_discr = 0x15,
+  DW_AT_discr_value = 0x16,
+  DW_AT_visibility = 0x17,
+  DW_AT_import = 0x18,
+  DW_AT_string_length = 0x19,
+  DW_AT_common_reference = 0x1a,
+  DW_AT_comp_dir = 0x1b,
+  DW_AT_const_value = 0x1c,
+  DW_AT_containing_type = 0x1d,
+  DW_AT_default_value = 0x1e,
+  DW_AT_inline = 0x20,
+  DW_AT_is_optional = 0x21,
+  DW_AT_lower_bound = 0x22,
+  DW_AT_producer = 0x25,
+  DW_AT_prototyped = 0x27,
+  DW_AT_return_addr = 0x2a,
+  DW_AT_start_scope = 0x2c,
+  DW_AT_bit_stride = 0x2e,
+  DW_AT_upper_bound = 0x2f,
+  DW_AT_abstract_origin = 0x31,
+  DW_AT_accessibility = 0x32,
+  DW_AT_address_class = 0x33,
+  DW_AT_artificial = 0x34,
+  DW_AT_base_types = 0x35,
+  DW_AT_calling_convention = 0x36,
+  DW_AT_count = 0x37,
+  DW_AT_data_member_location = 0x38,
+  DW_AT_decl_column = 0x39,
+  DW_AT_decl_file = 0x3a,
+  DW_AT_decl_line = 0x3b,
+  DW_AT_declaration = 0x3c,
+  DW_AT_discr_list = 0x3d,
+  DW_AT_encoding = 0x3e,
+  DW_AT_external = 0x3f,
+  DW_AT_frame_base = 0x40,
+  DW_AT_friend = 0x41,
+  DW_AT_identifier_case = 0x42,
+  DW_AT_macro_info = 0x43,
+  DW_AT_namelist_item = 0x44,
+  DW_AT_priority = 0x45,
+  DW_AT_segment = 0x46,
+  DW_AT_specification = 0x47,
+  DW_AT_static_link = 0x48,
+  DW_AT_type = 0x49,
+  DW_AT_use_location = 0x4a,
+  DW_AT_variable_parameter = 0x4b,
+  DW_AT_virtuality = 0x4c,
+  DW_AT_vtable_elem_location = 0x4d,
+  DW_AT_allocated = 0x4e,
+  DW_AT_associated = 0x4f,
+  DW_AT_data_location = 0x50,
+  DW_AT_byte_stride = 0x51,
+  DW_AT_entry_pc = 0x52,
+  DW_AT_use_UTF8 = 0x53,
+  DW_AT_extension = 0x54,
+  DW_AT_ranges = 0x55,
+  DW_AT_trampoline = 0x56,
+  DW_AT_call_column = 0x57,
+  DW_AT_call_file = 0x58,
+  DW_AT_call_line = 0x59,
+  DW_AT_description = 0x5a,
+  DW_AT_binary_scale = 0x5b,
+  DW_AT_decimal_scale = 0x5c,
+  DW_AT_small = 0x5d,
+  DW_AT_decimal_sign = 0x5e,
+  DW_AT_digit_count = 0x5f,
+  DW_AT_picture_string = 0x60,
+  DW_AT_mutable = 0x61,
+  DW_AT_threads_scaled = 0x62,
+  DW_AT_explicit = 0x63,
+  DW_AT_object_pointer = 0x64,
+  DW_AT_endianity = 0x65,
+  DW_AT_elemental = 0x66,
+  DW_AT_pure = 0x67,
+  DW_AT_recursive = 0x68,
+  DW_AT_signature = 0x69,
+  DW_AT_main_subprogram = 0x6a,
+  DW_AT_data_bit_offset = 0x6b,
+  DW_AT_const_expr = 0x6c,
+  DW_AT_enum_class = 0x6d,
+#ifdef INCLUDE_DWARF5_VALUES
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_AT_linkage_name = 0x6e,
+  DW_AT_call_site_value = 0x6f,
+  DW_AT_call_site_data_value = 0x70,
+  DW_AT_call_site_target = 0x71,
+  DW_AT_call_site_target_clobbered = 0x72,
+  DW_AT_tail_call = 0x73,
+  DW_AT_all_tail_call_sites = 0x74,
+  DW_AT_all_call_sites = 0x75,
+  DW_AT_all_source_call_sites = 0x76,
+  DW_AT_call_site_parameter = 0x77,
+  DW_AT_tail_call = 0x78,
+  DW_AT_all_tail_call_sites = 0x79,
+  DW_AT_all_call_sites = 0x7a,
+  DW_AT_all_source_call_sites = 0x7b,
+  DW_AT_rank = 0x7c,
+  DW_AT_string_bitsize = 0x7d,
+  DW_AT_string_byte_size = 0x7e,
+  DW_AT_reference = 0x7f,
+  DW_AT_rvalue_reference = 0x80,
+  DW_AT_noreturn = 0x81,
+  DW_AT_alignment = 0x82,
+#endif
+  DW_AT_lo_user = 0x2000,
+  DW_AT_hi_user = 0xffff
+};
+
+enum Form : uint8_t {
+  DW_FORM_addr = 0x01,
+  DW_FORM_block2 = 0x03,
+  DW_FORM_block4 = 0x04,
+  DW_FORM_data2 = 0x05,
+  DW_FORM_data4 = 0x06,
+  DW_FORM_data8 = 0x07,
+  DW_FORM_string = 0x08,
+  DW_FORM_block = 0x09,
+  DW_FORM_block1 = 0x0a,
+  DW_FORM_data1 = 0x0b,
+  DW_FORM_flag = 0x0c,
+  DW_FORM_sdata = 0x0d,
+  DW_FORM_strp = 0x0e,
+  DW_FORM_udata = 0x0f,
+  DW_FORM_ref_addr = 0x10,
+  DW_FORM_ref1 = 0x11,
+  DW_FORM_ref2 = 0x12,
+  DW_FORM_ref4 = 0x13,
+  DW_FORM_ref8 = 0x14,
+  DW_FORM_ref_udata = 0x15,
+  DW_FORM_indirect = 0x16,
+  DW_FORM_sec_offset = 0x17,
+  DW_FORM_exprloc = 0x18,
+  DW_FORM_flag_present = 0x19,
+  DW_FORM_ref_sig8 = 0x20
+};
+
+enum Operation : uint16_t {
+  DW_OP_addr = 0x03,
+  DW_OP_deref = 0x06,
+  DW_OP_const1u = 0x08,
+  DW_OP_const1s = 0x09,
+  DW_OP_const2u = 0x0a,
+  DW_OP_const2s = 0x0b,
+  DW_OP_const4u = 0x0c,
+  DW_OP_const4s = 0x0d,
+  DW_OP_const8u = 0x0e,
+  DW_OP_const8s = 0x0f,
+  DW_OP_constu = 0x10,
+  DW_OP_consts = 0x11,
+  DW_OP_dup = 0x12,
+  DW_OP_drop = 0x13,
+  DW_OP_over = 0x14,
+  DW_OP_pick = 0x15,
+  DW_OP_swap = 0x16,
+  DW_OP_rot = 0x17,
+  DW_OP_xderef = 0x18,
+  DW_OP_abs = 0x19,
+  DW_OP_and = 0x1a,
+  DW_OP_div = 0x1b,
+  DW_OP_minus = 0x1c,
+  DW_OP_mod = 0x1d,
+  DW_OP_mul = 0x1e,
+  DW_OP_neg = 0x1f,
+  DW_OP_not = 0x20,
+  DW_OP_or = 0x21,
+  DW_OP_plus = 0x22,
+  DW_OP_plus_uconst = 0x23,
+  DW_OP_shl = 0x24,
+  DW_OP_shr = 0x25,
+  DW_OP_shra = 0x26,
+  DW_OP_xor = 0x27,
+  DW_OP_skip = 0x2f,
+  DW_OP_bra = 0x28,
+  DW_OP_eq = 0x29,
+  DW_OP_ge = 0x2a,
+  DW_OP_gt = 0x2b,
+  DW_OP_le = 0x2c,
+  DW_OP_lt = 0x2d,
+  DW_OP_ne = 0x2e,
+  DW_OP_lit0 = 0x30,
+  DW_OP_lit1 = 0x31,
+  DW_OP_lit2 = 0x32,
+  DW_OP_lit3 = 0x33,
+  DW_OP_lit4 = 0x34,
+  DW_OP_lit5 = 0x35,
+  DW_OP_lit6 = 0x36,
+  DW_OP_lit7 = 0x37,
+  DW_OP_lit8 = 0x38,
+  DW_OP_lit9 = 0x39,
+  DW_OP_lit10 = 0x3a,
+  DW_OP_lit11 = 0x3b,
+  DW_OP_lit12 = 0x3c,
+  DW_OP_lit13 = 0x3d,
+  DW_OP_lit14 = 0x3e,
+  DW_OP_lit15 = 0x3f,
+  DW_OP_lit16 = 0x40,
+  DW_OP_lit17 = 0x41,
+  DW_OP_lit18 = 0x42,
+  DW_OP_lit19 = 0x43,
+  DW_OP_lit20 = 0x44,
+  DW_OP_lit21 = 0x45,
+  DW_OP_lit22 = 0x46,
+  DW_OP_lit23 = 0x47,
+  DW_OP_lit24 = 0x48,
+  DW_OP_lit25 = 0x49,
+  DW_OP_lit26 = 0x4a,
+  DW_OP_lit27 = 0x4b,
+  DW_OP_lit28 = 0x4c,
+  DW_OP_lit29 = 0x4d,
+  DW_OP_lit30 = 0x4e,
+  DW_OP_lit31 = 0x4f,
+  DW_OP_reg0 = 0x50,
+  DW_OP_reg1 = 0x51,
+  DW_OP_reg2 = 0x52,
+  DW_OP_reg3 = 0x53,
+  DW_OP_reg4 = 0x54,
+  DW_OP_reg5 = 0x55,
+  DW_OP_reg6 = 0x56,
+  DW_OP_reg7 = 0x57,
+  DW_OP_reg8 = 0x58,
+  DW_OP_reg9 = 0x59,
+  DW_OP_reg10 = 0x5a,
+  DW_OP_reg11 = 0x5b,
+  DW_OP_reg12 = 0x5c,
+  DW_OP_reg13 = 0x5d,
+  DW_OP_reg14 = 0x5e,
+  DW_OP_reg15 = 0x5f,
+  DW_OP_reg16 = 0x60,
+  DW_OP_reg17 = 0x61,
+  DW_OP_reg18 = 0x62,
+  DW_OP_reg19 = 0x63,
+  DW_OP_reg20 = 0x64,
+  DW_OP_reg21 = 0x65,
+  DW_OP_reg22 = 0x66,
+  DW_OP_reg23 = 0x67,
+  DW_OP_reg24 = 0x68,
+  DW_OP_reg25 = 0x69,
+  DW_OP_reg26 = 0x6a,
+  DW_OP_reg27 = 0x6b,
+  DW_OP_reg28 = 0x6c,
+  DW_OP_reg29 = 0x6d,
+  DW_OP_reg30 = 0x6e,
+  DW_OP_reg31 = 0x6f,
+  DW_OP_breg0 = 0x50,
+  DW_OP_breg1 = 0x51,
+  DW_OP_breg2 = 0x52,
+  DW_OP_breg3 = 0x53,
+  DW_OP_breg4 = 0x54,
+  DW_OP_breg5 = 0x55,
+  DW_OP_breg6 = 0x56,
+  DW_OP_breg7 = 0x57,
+  DW_OP_breg8 = 0x58,
+  DW_OP_breg9 = 0x59,
+  DW_OP_breg10 = 0x5a,
+  DW_OP_breg11 = 0x5b,
+  DW_OP_breg12 = 0x5c,
+  DW_OP_breg13 = 0x5d,
+  DW_OP_breg14 = 0x5e,
+  DW_OP_breg15 = 0x5f,
+  DW_OP_breg16 = 0x60,
+  DW_OP_breg17 = 0x61,
+  DW_OP_breg18 = 0x62,
+  DW_OP_breg19 = 0x63,
+  DW_OP_breg20 = 0x64,
+  DW_OP_breg21 = 0x65,
+  DW_OP_breg22 = 0x66,
+  DW_OP_breg23 = 0x67,
+  DW_OP_breg24 = 0x68,
+  DW_OP_breg25 = 0x69,
+  DW_OP_breg26 = 0x6a,
+  DW_OP_breg27 = 0x6b,
+  DW_OP_breg28 = 0x6c,
+  DW_OP_breg29 = 0x6d,
+  DW_OP_breg30 = 0x6e,
+  DW_OP_breg31 = 0x6f,
+  DW_OP_regx = 0x90,
+  DW_OP_fbreg = 0x91,
+  DW_OP_bregx = 0x92,
+  DW_OP_piece = 0x93,
+  DW_OP_deref_size = 0x94,
+  DW_OP_xderef_size = 0x95,
+  DW_OP_nop = 0x96,
+  DW_OP_push_object_address = 0x97,
+  DW_OP_call2 = 0x98,
+  DW_OP_call4 = 0x99,
+  DW_OP_call_ref = 0x9a,
+  DW_OP_form_tls_address = 0x9b,
+  DW_OP_call_frame_cfa = 0x9c,
+  DW_OP_bit_piece = 0x9d,
+  DW_OP_implicit_value = 0x9e,
+  DW_OP_stack_value = 0x9f,
+#ifdef INCLUDE_DWARF5_VALUES
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_OP_entry_value = 0xa0,
+  DW_OP_const_type = 0xa1,
+  DW_OP_regval_type = 0xa2,
+  DW_OP_deref_type = 0xa3,
+  DW_OP_xderef_type = 0xa4,
+  DW_OP_convert = 0xa5,
+  DW_OP_reinterpret = 0xa6,
+#endif
+  DW_OP_lo_user = 0xe0,
+  DW_OP_hi_user = 0xff
+};
+
+enum BaseTypeEncoding : uint8_t {
+  DW_ATE_address = 0x01,
+  DW_ATE_boolean = 0x02,
+  DW_ATE_complex_float = 0x03,
+  DW_ATE_float = 0x04,
+  DW_ATE_signed = 0x05,
+  DW_ATE_signed_char = 0x06,
+  DW_ATE_unsigned = 0x07,
+  DW_ATE_unsigned_char = 0x08,
+  DW_ATE_imaginary_float = 0x09,
+  DW_ATE_packed_decimal = 0x0a,
+  DW_ATE_numeric_string = 0x0b,
+  DW_ATE_edited = 0x0c,
+  DW_ATE_signed_fixed = 0x0d,
+  DW_ATE_unsigned_fixed = 0x0e,
+  DW_ATE_decimal_float = 0x0f,
+  DW_ATE_UTF = 0x10,
+  DW_ATE_lo_user = 0x80,
+  DW_ATE_hi_user = 0xff
+};
+
+enum DecimalSign : uint8_t {
+  DW_DS_unsigned = 0x01,
+  DW_DS_leading_overpunch = 0x02,
+  DW_DS_trailing_overpunch = 0x03,
+  DW_DS_leading_separate = 0x04,
+  DW_DS_trailing_separate = 0x05
+};
+
+enum Endianity : uint8_t {
+  DW_END_default = 0x00,
+  DW_END_big = 0x01,
+  DW_END_little = 0x02,
+  DW_END_lo_user = 0x40,
+  DW_END_hi_user = 0xff
+};
+
+enum Accessibility : uint8_t {
+  DW_ACCESS_public = 0x01,
+  DW_ACCESS_protected = 0x02,
+  DW_ACCESS_private = 0x03
+};
+
+enum Visibility : uint8_t {
+  DW_VIS_local = 0x01,
+  DW_VIS_exported = 0x02,
+  DW_VIS_qualified = 0x03
+};
+
+enum Virtuality : uint8_t {
+  DW_VIRTUALITY_none = 0x00,
+  DW_VIRTUALITY_virtual = 0x01,
+  DW_VIRTUALITY_pure_virtual = 0x02
+};
+
+enum Language {
+  DW_LANG_C89 = 0x01,
+  DW_LANG_C = 0x02,
+  DW_LANG_Ada83 = 0x03,
+  DW_LANG_C_plus_plus = 0x04,
+  DW_LANG_Cobol74 = 0x05,
+  DW_LANG_Cobol85 = 0x06,
+  DW_LANG_Fortran77 = 0x07,
+  DW_LANG_Fortran90 = 0x08,
+  DW_LANG_Pascal83 = 0x09,
+  DW_LANG_Modula2 = 0x0a,
+  DW_LANG_Java = 0x0b,
+  DW_LANG_C99 = 0x0c,
+  DW_LANG_Ada95 = 0x0d,
+  DW_LANG_Fortran95 = 0x0e,
+  DW_LANG_PLI = 0x0f,
+  DW_LANG_ObjC = 0x10,
+  DW_LANG_ObjC_plus_plus = 0x11,
+  DW_LANG_UPC = 0x12,
+  DW_LANG_D = 0x13,
+  DW_LANG_Python = 0x14,
+#ifdef INCLUDE_DWARF5_VALUES
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_LANG_OpenCL = 0x15,
+  DW_LANG_Go = 0x16,
+  DW_LANG_Modula3 = 0x17,
+  DW_LANG_Haskell = 0x18,
+  DW_LANG_C_plus_plus_03 = 0x19,
+  DW_LANG_C_plus_plus_11 = 0x1a,
+  DW_LANG_OCaml = 0x1b,
+  DW_LANG_Rust = 0x1c,
+  DW_LANG_C11 = 0x1d,
+  DW_LANG_Swift = 0x1e,
+  DW_LANG_Julia = 0x1f,
+#endif
+  DW_LANG_lo_user = 0x8000,
+  DW_LANG_hi_user = 0xffff
+};
+
+enum Identifier : uint8_t {
+  DW_ID_case_sensitive = 0x00,
+  DW_ID_up_case = 0x01,
+  DW_ID_down_case = 0x02,
+  DW_ID_case_insensitive = 0x03
+};
+
+enum CallingConvention : uint8_t {
+  DW_CC_normal = 0x01,
+  DW_CC_program = 0x02,
+  DW_CC_nocall = 0x03,
+  DW_CC_lo_user = 0x40,
+  DW_CC_hi_user = 0xff
+};
+
+enum Inline : uint8_t {
+  DW_INL_not_inlined = 0x00,
+  DW_INL_inlined = 0x01,
+  DW_INL_declared_not_inlined = 0x02,
+  DW_INL_declared_inlined = 0x03
+};
+
+enum ArrayOrdering : uint8_t {
+  DW_ORD_row_major = 0x00,
+  DW_ORD_col_major = 0x01
+};
+
+enum DiscriminantList : uint8_t {
+  DW_DSC_label = 0x00,
+  DW_DSC_range = 0x01
+};
+
+enum LineNumberOpcode : uint8_t {
+  DW_LNS_copy = 0x01,
+  DW_LNS_advance_pc = 0x02,
+  DW_LNS_advance_line = 0x03,
+  DW_LNS_set_file = 0x04,
+  DW_LNS_set_column = 0x05,
+  DW_LNS_negate_stmt = 0x06,
+  DW_LNS_set_basic_block = 0x07,
+  DW_LNS_const_add_pc = 0x08,
+  DW_LNS_fixed_advance_pc = 0x09,
+  DW_LNS_set_prologue_end = 0x0a,
+  DW_LNS_set_epilogue_begin = 0x0b,
+  DW_LNS_set_isa = 0x0c
+};
+
+enum LineNumberExtendedOpcode : uint8_t {
+  DW_LNE_end_sequence = 0x01,
+  DW_LNE_set_address = 0x02,
+  DW_LNE_define_file = 0x03,
+  DW_LNE_set_discriminator = 0x04,
+  DW_LNE_lo_user = 0x80,
+  DW_LNE_hi_user = 0xff
+};
+
+#ifdef INCLUDE_DWARF5_VALUES
+enum LineNumberFormat : uint8_t {
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  //
+  DW_LNF_path = 0x1,
+  DW_LNF_include_index = 0x2,
+  DW_LNF_timestamp = 0x3,
+  DW_LNF_size = 0x4,
+  DW_LNF_MD5 = 0x5,
+  DW_LNF_lo_user = 0x2000,
+  DW_LNF_hi_user = 0x3fff
+};
+#endif
+
+enum MacroInfo : uint8_t {
+  DW_MACINFO_define = 0x01,
+  DW_MACINFO_undef = 0x02,
+  DW_MACINFO_start_file = 0x03,
+  DW_MACINFO_end_file = 0x04,
+  DW_MACINFO_vendor_ext = 0xff
+};
+
+#ifdef INCLUDE_DWARF5_VALUES
+enum Macro : uint8_t {
+  // Values to be added in Dwarf 5. Final value not yet specified. Values listed
+  // may be different than other implementations. Use with caution.
+  // TODO Update these values when Dwarf 5 is released.
+  DW_MACRO_define = 0x01,
+  DW_MACRO_undef = 0x02,
+  DW_MACRO_start_file = 0x03,
+  DW_MACRO_end_file = 0x04,
+  DW_MACRO_define_indirect = 0x05,
+  DW_MACRO_undef_indirect = 0x06,
+  DW_MACRO_transparent_include = 0x07,
+  DW_MACRO_define_indirectx = 0x0b,
+  DW_MACRO_undef_indirectx = 0x0c,
+  DW_MACRO_lo_user = 0xe0,
+  DW_MACRO_hi_user = 0xff
+};
+#endif
+
+const uint32_t CIE_ID_32 = 0xffffffff;
+const uint64_t CIE_ID_64 = 0xffffffffffffffff;
+
+enum CallFrameInstruction : uint8_t {
+  DW_CFA_advance_loc = 0x40,
+  DW_CFA_offset = 0x80,
+  DW_CFA_restore = 0xc0,
+  DW_CFA_nop = 0x00,
+  DW_CFA_set_loc = 0x01,
+  DW_CFA_advance_loc1 = 0x02,
+  DW_CFA_advance_loc2 = 0x03,
+  DW_CFA_advance_loc4 = 0x04,
+  DW_CFA_offset_extended = 0x05,
+  DW_CFA_restore_extended = 0x06,
+  DW_CFA_undefined = 0x07,
+  DW_CFA_same_value = 0x08,
+  DW_CFA_register = 0x09,
+  DW_CFA_remember_state = 0x0a,
+  DW_CFA_restore_state = 0x0b,
+  DW_CFA_def_cfa = 0x0c,
+  DW_CFA_def_cfa_register = 0x0d,
+  DW_CFA_def_cfa_offset = 0x0e,
+  DW_CFA_def_cfa_expression = 0x0f,
+  DW_CFA_expression = 0x10,
+  DW_CFA_offset_extended_sf = 0x11,
+  DW_CFA_def_cfa_sf = 0x12,
+  DW_CFA_def_cfa_offset_sf = 0x13,
+  DW_CFA_val_offset = 0x14,
+  DW_CFA_val_offset_sf = 0x15,
+  DW_CFA_val_expression = 0x16,
+  DW_CFA_lo_user = 0x1c,
+  DW_CFA_hi_user = 0x3f
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_DWARF_H_
diff --git a/runtime/elf.h b/runtime/elf.h
new file mode 100644
index 0000000..6e007a2
--- /dev/null
+++ b/runtime/elf.h
@@ -0,0 +1,1856 @@
+//===-- llvm/Support/ELF.h - ELF constants and data structures --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header contains common, non-processor-specific data structures and
+// constants for the ELF file format.
+//
+// The details of the ELF32 bits in this file are largely based on the Tool
+// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
+// Version 1.2, May 1995. The ELF64 stuff is based on ELF-64 Object File Format
+// Version 1.5, Draft 2, May 1998 as well as OpenBSD header files.
+//
+//===----------------------------------------------------------------------===//
+
+// BEGIN android-changed
+#ifndef ART_RUNTIME_ELF_H_
+#define ART_RUNTIME_ELF_H_
+// END android-changed
+
+// BEGIN android-changed
+#include <stdint.h>
+#include <string.h>
+// END android-changed
+
+typedef uint32_t Elf32_Addr; // Program address
+typedef uint32_t Elf32_Off;  // File offset
+typedef uint16_t Elf32_Half;
+typedef uint32_t Elf32_Word;
+typedef int32_t  Elf32_Sword;
+
+typedef uint64_t Elf64_Addr;
+typedef uint64_t Elf64_Off;
+typedef uint16_t Elf64_Half;
+typedef uint32_t Elf64_Word;
+typedef int32_t  Elf64_Sword;
+typedef uint64_t Elf64_Xword;
+typedef int64_t  Elf64_Sxword;
+
+// Object file magic string.
+static const char ElfMagic[] = { 0x7f, 'E', 'L', 'F', '\0' };
+
+// e_ident size and indices.
+enum {
+  EI_MAG0       = 0,          // File identification index.
+  EI_MAG1       = 1,          // File identification index.
+  EI_MAG2       = 2,          // File identification index.
+  EI_MAG3       = 3,          // File identification index.
+  EI_CLASS      = 4,          // File class.
+  EI_DATA       = 5,          // Data encoding.
+  EI_VERSION    = 6,          // File version.
+  EI_OSABI      = 7,          // OS/ABI identification.
+  EI_ABIVERSION = 8,          // ABI version.
+  EI_PAD        = 9,          // Start of padding bytes.
+  EI_NIDENT     = 16          // Number of bytes in e_ident.
+};
+
+// BEGIN android-added for <elf.h> compat
+const char ELFMAG0 = ElfMagic[EI_MAG0];
+const char ELFMAG1 = ElfMagic[EI_MAG1];
+const char ELFMAG2 = ElfMagic[EI_MAG2];
+const char ELFMAG3 = ElfMagic[EI_MAG3];
+// END android-added for <elf.h> compat
+
+struct Elf32_Ehdr {
+  unsigned char e_ident[EI_NIDENT]; // ELF Identification bytes
+  Elf32_Half    e_type;      // Type of file (see ET_* below)
+  Elf32_Half    e_machine;   // Required architecture for this file (see EM_*)
+  Elf32_Word    e_version;   // Must be equal to 1
+  Elf32_Addr    e_entry;     // Address to jump to in order to start program
+  Elf32_Off     e_phoff;     // Program header table's file offset, in bytes
+  Elf32_Off     e_shoff;     // Section header table's file offset, in bytes
+  Elf32_Word    e_flags;     // Processor-specific flags
+  Elf32_Half    e_ehsize;    // Size of ELF header, in bytes
+  Elf32_Half    e_phentsize; // Size of an entry in the program header table
+  Elf32_Half    e_phnum;     // Number of entries in the program header table
+  Elf32_Half    e_shentsize; // Size of an entry in the section header table
+  Elf32_Half    e_shnum;     // Number of entries in the section header table
+  Elf32_Half    e_shstrndx;  // Sect hdr table index of sect name string table
+  bool checkMagic() const {
+    return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0;
+  }
+  unsigned char getFileClass() const { return e_ident[EI_CLASS]; }
+  unsigned char getDataEncoding() const { return e_ident[EI_DATA]; }
+};
+
+// 64-bit ELF header. Fields are the same as for ELF32, but with different
+// types (see above).
+struct Elf64_Ehdr {
+  unsigned char e_ident[EI_NIDENT];
+  Elf64_Half    e_type;
+  Elf64_Half    e_machine;
+  Elf64_Word    e_version;
+  Elf64_Addr    e_entry;
+  Elf64_Off     e_phoff;
+  Elf64_Off     e_shoff;
+  Elf64_Word    e_flags;
+  Elf64_Half    e_ehsize;
+  Elf64_Half    e_phentsize;
+  Elf64_Half    e_phnum;
+  Elf64_Half    e_shentsize;
+  Elf64_Half    e_shnum;
+  Elf64_Half    e_shstrndx;
+  bool checkMagic() const {
+    return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0;
+  }
+  unsigned char getFileClass() const { return e_ident[EI_CLASS]; }
+  unsigned char getDataEncoding() const { return e_ident[EI_DATA]; }
+};
+
+// File types
+enum {
+  ET_NONE   = 0,      // No file type
+  ET_REL    = 1,      // Relocatable file
+  ET_EXEC   = 2,      // Executable file
+  ET_DYN    = 3,      // Shared object file
+  ET_CORE   = 4,      // Core file
+  ET_LOPROC = 0xff00, // Beginning of processor-specific codes
+  ET_HIPROC = 0xffff  // Processor-specific
+};
+
+// Versioning
+enum {
+  EV_NONE = 0,
+  EV_CURRENT = 1
+};
+
+// Machine architectures
+enum {
+  EM_NONE          = 0, // No machine
+  EM_M32           = 1, // AT&T WE 32100
+  EM_SPARC         = 2, // SPARC
+  EM_386           = 3, // Intel 386
+  EM_68K           = 4, // Motorola 68000
+  EM_88K           = 5, // Motorola 88000
+  EM_486           = 6, // Intel 486 (deprecated)
+  EM_860           = 7, // Intel 80860
+  EM_MIPS          = 8, // MIPS R3000
+  EM_S370          = 9, // IBM System/370
+  EM_MIPS_RS3_LE   = 10, // MIPS RS3000 Little-endian
+  EM_PARISC        = 15, // Hewlett-Packard PA-RISC
+  EM_VPP500        = 17, // Fujitsu VPP500
+  EM_SPARC32PLUS   = 18, // Enhanced instruction set SPARC
+  EM_960           = 19, // Intel 80960
+  EM_PPC           = 20, // PowerPC
+  EM_PPC64         = 21, // PowerPC64
+  EM_S390          = 22, // IBM System/390
+  EM_SPU           = 23, // IBM SPU/SPC
+  EM_V800          = 36, // NEC V800
+  EM_FR20          = 37, // Fujitsu FR20
+  EM_RH32          = 38, // TRW RH-32
+  EM_RCE           = 39, // Motorola RCE
+  EM_ARM           = 40, // ARM
+  EM_ALPHA         = 41, // DEC Alpha
+  EM_SH            = 42, // Hitachi SH
+  EM_SPARCV9       = 43, // SPARC V9
+  EM_TRICORE       = 44, // Siemens TriCore
+  EM_ARC           = 45, // Argonaut RISC Core
+  EM_H8_300        = 46, // Hitachi H8/300
+  EM_H8_300H       = 47, // Hitachi H8/300H
+  EM_H8S           = 48, // Hitachi H8S
+  EM_H8_500        = 49, // Hitachi H8/500
+  EM_IA_64         = 50, // Intel IA-64 processor architecture
+  EM_MIPS_X        = 51, // Stanford MIPS-X
+  EM_COLDFIRE      = 52, // Motorola ColdFire
+  EM_68HC12        = 53, // Motorola M68HC12
+  EM_MMA           = 54, // Fujitsu MMA Multimedia Accelerator
+  EM_PCP           = 55, // Siemens PCP
+  EM_NCPU          = 56, // Sony nCPU embedded RISC processor
+  EM_NDR1          = 57, // Denso NDR1 microprocessor
+  EM_STARCORE      = 58, // Motorola Star*Core processor
+  EM_ME16          = 59, // Toyota ME16 processor
+  EM_ST100         = 60, // STMicroelectronics ST100 processor
+  EM_TINYJ         = 61, // Advanced Logic Corp. TinyJ embedded processor family
+  EM_X86_64        = 62, // AMD x86-64 architecture
+  EM_PDSP          = 63, // Sony DSP Processor
+  EM_PDP10         = 64, // Digital Equipment Corp. PDP-10
+  EM_PDP11         = 65, // Digital Equipment Corp. PDP-11
+  EM_FX66          = 66, // Siemens FX66 microcontroller
+  EM_ST9PLUS       = 67, // STMicroelectronics ST9+ 8/16 bit microcontroller
+  EM_ST7           = 68, // STMicroelectronics ST7 8-bit microcontroller
+  EM_68HC16        = 69, // Motorola MC68HC16 Microcontroller
+  EM_68HC11        = 70, // Motorola MC68HC11 Microcontroller
+  EM_68HC08        = 71, // Motorola MC68HC08 Microcontroller
+  EM_68HC05        = 72, // Motorola MC68HC05 Microcontroller
+  EM_SVX           = 73, // Silicon Graphics SVx
+  EM_ST19          = 74, // STMicroelectronics ST19 8-bit microcontroller
+  EM_VAX           = 75, // Digital VAX
+  EM_CRIS          = 76, // Axis Communications 32-bit embedded processor
+  EM_JAVELIN       = 77, // Infineon Technologies 32-bit embedded processor
+  EM_FIREPATH      = 78, // Element 14 64-bit DSP Processor
+  EM_ZSP           = 79, // LSI Logic 16-bit DSP Processor
+  EM_MMIX          = 80, // Donald Knuth's educational 64-bit processor
+  EM_HUANY         = 81, // Harvard University machine-independent object files
+  EM_PRISM         = 82, // SiTera Prism
+  EM_AVR           = 83, // Atmel AVR 8-bit microcontroller
+  EM_FR30          = 84, // Fujitsu FR30
+  EM_D10V          = 85, // Mitsubishi D10V
+  EM_D30V          = 86, // Mitsubishi D30V
+  EM_V850          = 87, // NEC v850
+  EM_M32R          = 88, // Mitsubishi M32R
+  EM_MN10300       = 89, // Matsushita MN10300
+  EM_MN10200       = 90, // Matsushita MN10200
+  EM_PJ            = 91, // picoJava
+  EM_OPENRISC      = 92, // OpenRISC 32-bit embedded processor
+  EM_ARC_COMPACT   = 93, // ARC International ARCompact processor (old
+                         // spelling/synonym: EM_ARC_A5)
+  EM_XTENSA        = 94, // Tensilica Xtensa Architecture
+  EM_VIDEOCORE     = 95, // Alphamosaic VideoCore processor
+  EM_TMM_GPP       = 96, // Thompson Multimedia General Purpose Processor
+  EM_NS32K         = 97, // National Semiconductor 32000 series
+  EM_TPC           = 98, // Tenor Network TPC processor
+  EM_SNP1K         = 99, // Trebia SNP 1000 processor
+  EM_ST200         = 100, // STMicroelectronics (www.st.com) ST200
+  EM_IP2K          = 101, // Ubicom IP2xxx microcontroller family
+  EM_MAX           = 102, // MAX Processor
+  EM_CR            = 103, // National Semiconductor CompactRISC microprocessor
+  EM_F2MC16        = 104, // Fujitsu F2MC16
+  EM_MSP430        = 105, // Texas Instruments embedded microcontroller msp430
+  EM_BLACKFIN      = 106, // Analog Devices Blackfin (DSP) processor
+  EM_SE_C33        = 107, // S1C33 Family of Seiko Epson processors
+  EM_SEP           = 108, // Sharp embedded microprocessor
+  EM_ARCA          = 109, // Arca RISC Microprocessor
+  EM_UNICORE       = 110, // Microprocessor series from PKU-Unity Ltd. and MPRC
+                          // of Peking University
+  EM_EXCESS        = 111, // eXcess: 16/32/64-bit configurable embedded CPU
+  EM_DXP           = 112, // Icera Semiconductor Inc. Deep Execution Processor
+  EM_ALTERA_NIOS2  = 113, // Altera Nios II soft-core processor
+  EM_CRX           = 114, // National Semiconductor CompactRISC CRX
+  EM_XGATE         = 115, // Motorola XGATE embedded processor
+  EM_C166          = 116, // Infineon C16x/XC16x processor
+  EM_M16C          = 117, // Renesas M16C series microprocessors
+  EM_DSPIC30F      = 118, // Microchip Technology dsPIC30F Digital Signal
+                          // Controller
+  EM_CE            = 119, // Freescale Communication Engine RISC core
+  EM_M32C          = 120, // Renesas M32C series microprocessors
+  EM_TSK3000       = 131, // Altium TSK3000 core
+  EM_RS08          = 132, // Freescale RS08 embedded processor
+  EM_SHARC         = 133, // Analog Devices SHARC family of 32-bit DSP
+                          // processors
+  EM_ECOG2         = 134, // Cyan Technology eCOG2 microprocessor
+  EM_SCORE7        = 135, // Sunplus S+core7 RISC processor
+  EM_DSP24         = 136, // New Japan Radio (NJR) 24-bit DSP Processor
+  EM_VIDEOCORE3    = 137, // Broadcom VideoCore III processor
+  EM_LATTICEMICO32 = 138, // RISC processor for Lattice FPGA architecture
+  EM_SE_C17        = 139, // Seiko Epson C17 family
+  EM_TI_C6000      = 140, // The Texas Instruments TMS320C6000 DSP family
+  EM_TI_C2000      = 141, // The Texas Instruments TMS320C2000 DSP family
+  EM_TI_C5500      = 142, // The Texas Instruments TMS320C55x DSP family
+  EM_MMDSP_PLUS    = 160, // STMicroelectronics 64bit VLIW Data Signal Processor
+  EM_CYPRESS_M8C   = 161, // Cypress M8C microprocessor
+  EM_R32C          = 162, // Renesas R32C series microprocessors
+  EM_TRIMEDIA      = 163, // NXP Semiconductors TriMedia architecture family
+  EM_HEXAGON       = 164, // Qualcomm Hexagon processor
+  EM_8051          = 165, // Intel 8051 and variants
+  EM_STXP7X        = 166, // STMicroelectronics STxP7x family of configurable
+                          // and extensible RISC processors
+  EM_NDS32         = 167, // Andes Technology compact code size embedded RISC
+                          // processor family
+  EM_ECOG1         = 168, // Cyan Technology eCOG1X family
+  EM_ECOG1X        = 168, // Cyan Technology eCOG1X family
+  EM_MAXQ30        = 169, // Dallas Semiconductor MAXQ30 Core Micro-controllers
+  EM_XIMO16        = 170, // New Japan Radio (NJR) 16-bit DSP Processor
+  EM_MANIK         = 171, // M2000 Reconfigurable RISC Microprocessor
+  EM_CRAYNV2       = 172, // Cray Inc. NV2 vector architecture
+  EM_RX            = 173, // Renesas RX family
+  EM_METAG         = 174, // Imagination Technologies META processor
+                          // architecture
+  EM_MCST_ELBRUS   = 175, // MCST Elbrus general purpose hardware architecture
+  EM_ECOG16        = 176, // Cyan Technology eCOG16 family
+  EM_CR16          = 177, // National Semiconductor CompactRISC CR16 16-bit
+                          // microprocessor
+  EM_ETPU          = 178, // Freescale Extended Time Processing Unit
+  EM_SLE9X         = 179, // Infineon Technologies SLE9X core
+  EM_L10M          = 180, // Intel L10M
+  EM_K10M          = 181, // Intel K10M
+  EM_AARCH64       = 183, // ARM AArch64
+  EM_AVR32         = 185, // Atmel Corporation 32-bit microprocessor family
+  EM_STM8          = 186, // STMicroeletronics STM8 8-bit microcontroller
+  EM_TILE64        = 187, // Tilera TILE64 multicore architecture family
+  EM_TILEPRO       = 188, // Tilera TILEPro multicore architecture family
+  EM_CUDA          = 190, // NVIDIA CUDA architecture
+  EM_TILEGX        = 191, // Tilera TILE-Gx multicore architecture family
+  EM_CLOUDSHIELD   = 192, // CloudShield architecture family
+  EM_COREA_1ST     = 193, // KIPO-KAIST Core-A 1st generation processor family
+  EM_COREA_2ND     = 194, // KIPO-KAIST Core-A 2nd generation processor family
+  EM_ARC_COMPACT2  = 195, // Synopsys ARCompact V2
+  EM_OPEN8         = 196, // Open8 8-bit RISC soft processor core
+  EM_RL78          = 197, // Renesas RL78 family
+  EM_VIDEOCORE5    = 198, // Broadcom VideoCore V processor
+  EM_78KOR         = 199, // Renesas 78KOR family
+  EM_56800EX       = 200  // Freescale 56800EX Digital Signal Controller (DSC)
+};
+
+// Object file classes.
+enum {
+  ELFCLASSNONE = 0,
+  ELFCLASS32 = 1, // 32-bit object file
+  ELFCLASS64 = 2  // 64-bit object file
+};
+
+// Object file byte orderings.
+enum {
+  ELFDATANONE = 0, // Invalid data encoding.
+  ELFDATA2LSB = 1, // Little-endian object file
+  ELFDATA2MSB = 2  // Big-endian object file
+};
+
+// OS ABI identification.
+enum {
+  ELFOSABI_NONE = 0,          // UNIX System V ABI
+  ELFOSABI_HPUX = 1,          // HP-UX operating system
+  ELFOSABI_NETBSD = 2,        // NetBSD
+  ELFOSABI_GNU = 3,           // GNU/Linux
+  ELFOSABI_LINUX = 3,         // Historical alias for ELFOSABI_GNU.
+  ELFOSABI_HURD = 4,          // GNU/Hurd
+  ELFOSABI_SOLARIS = 6,       // Solaris
+  ELFOSABI_AIX = 7,           // AIX
+  ELFOSABI_IRIX = 8,          // IRIX
+  ELFOSABI_FREEBSD = 9,       // FreeBSD
+  ELFOSABI_TRU64 = 10,        // TRU64 UNIX
+  ELFOSABI_MODESTO = 11,      // Novell Modesto
+  ELFOSABI_OPENBSD = 12,      // OpenBSD
+  ELFOSABI_OPENVMS = 13,      // OpenVMS
+  ELFOSABI_NSK = 14,          // Hewlett-Packard Non-Stop Kernel
+  ELFOSABI_AROS = 15,         // AROS
+  ELFOSABI_FENIXOS = 16,      // FenixOS
+  ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000
+  ELFOSABI_C6000_LINUX = 65,  // Linux TMS320C6000
+  ELFOSABI_ARM = 97,          // ARM
+  ELFOSABI_STANDALONE = 255   // Standalone (embedded) application
+};
+
+// X86_64 relocations.
+enum {
+  R_X86_64_NONE       = 0,
+  R_X86_64_64         = 1,
+  R_X86_64_PC32       = 2,
+  R_X86_64_GOT32      = 3,
+  R_X86_64_PLT32      = 4,
+  R_X86_64_COPY       = 5,
+  R_X86_64_GLOB_DAT   = 6,
+  R_X86_64_JUMP_SLOT  = 7,
+  R_X86_64_RELATIVE   = 8,
+  R_X86_64_GOTPCREL   = 9,
+  R_X86_64_32         = 10,
+  R_X86_64_32S        = 11,
+  R_X86_64_16         = 12,
+  R_X86_64_PC16       = 13,
+  R_X86_64_8          = 14,
+  R_X86_64_PC8        = 15,
+  R_X86_64_DTPMOD64   = 16,
+  R_X86_64_DTPOFF64   = 17,
+  R_X86_64_TPOFF64    = 18,
+  R_X86_64_TLSGD      = 19,
+  R_X86_64_TLSLD      = 20,
+  R_X86_64_DTPOFF32   = 21,
+  R_X86_64_GOTTPOFF   = 22,
+  R_X86_64_TPOFF32    = 23,
+  R_X86_64_PC64       = 24,
+  R_X86_64_GOTOFF64   = 25,
+  R_X86_64_GOTPC32    = 26,
+  R_X86_64_GOT64      = 27,
+  R_X86_64_GOTPCREL64 = 28,
+  R_X86_64_GOTPC64    = 29,
+  R_X86_64_GOTPLT64   = 30,
+  R_X86_64_PLTOFF64   = 31,
+  R_X86_64_SIZE32     = 32,
+  R_X86_64_SIZE64     = 33,
+  R_X86_64_GOTPC32_TLSDESC = 34,
+  R_X86_64_TLSDESC_CALL    = 35,
+  R_X86_64_TLSDESC    = 36,
+  R_X86_64_IRELATIVE  = 37
+};
+
+// i386 relocations.
+// TODO: this is just a subset
+enum {
+  R_386_NONE          = 0,
+  R_386_32            = 1,
+  R_386_PC32          = 2,
+  R_386_GOT32         = 3,
+  R_386_PLT32         = 4,
+  R_386_COPY          = 5,
+  R_386_GLOB_DAT      = 6,
+  R_386_JUMP_SLOT     = 7,
+  R_386_RELATIVE      = 8,
+  R_386_GOTOFF        = 9,
+  R_386_GOTPC         = 10,
+  R_386_32PLT         = 11,
+  R_386_TLS_TPOFF     = 14,
+  R_386_TLS_IE        = 15,
+  R_386_TLS_GOTIE     = 16,
+  R_386_TLS_LE        = 17,
+  R_386_TLS_GD        = 18,
+  R_386_TLS_LDM       = 19,
+  R_386_16            = 20,
+  R_386_PC16          = 21,
+  R_386_8             = 22,
+  R_386_PC8           = 23,
+  R_386_TLS_GD_32     = 24,
+  R_386_TLS_GD_PUSH   = 25,
+  R_386_TLS_GD_CALL   = 26,
+  R_386_TLS_GD_POP    = 27,
+  R_386_TLS_LDM_32    = 28,
+  R_386_TLS_LDM_PUSH  = 29,
+  R_386_TLS_LDM_CALL  = 30,
+  R_386_TLS_LDM_POP   = 31,
+  R_386_TLS_LDO_32    = 32,
+  R_386_TLS_IE_32     = 33,
+  R_386_TLS_LE_32     = 34,
+  R_386_TLS_DTPMOD32  = 35,
+  R_386_TLS_DTPOFF32  = 36,
+  R_386_TLS_TPOFF32   = 37,
+  R_386_TLS_GOTDESC   = 39,
+  R_386_TLS_DESC_CALL = 40,
+  R_386_TLS_DESC      = 41,
+  R_386_IRELATIVE     = 42,
+  R_386_NUM           = 43
+};
+
+// ELF Relocation types for PPC32
+enum {
+  R_PPC_NONE                  = 0,      /* No relocation. */
+  R_PPC_ADDR32                = 1,
+  R_PPC_ADDR24                = 2,
+  R_PPC_ADDR16                = 3,
+  R_PPC_ADDR16_LO             = 4,
+  R_PPC_ADDR16_HI             = 5,
+  R_PPC_ADDR16_HA             = 6,
+  R_PPC_ADDR14                = 7,
+  R_PPC_ADDR14_BRTAKEN        = 8,
+  R_PPC_ADDR14_BRNTAKEN       = 9,
+  R_PPC_REL24                 = 10,
+  R_PPC_REL14                 = 11,
+  R_PPC_REL14_BRTAKEN         = 12,
+  R_PPC_REL14_BRNTAKEN        = 13,
+  R_PPC_GOT16                 = 14,
+  R_PPC_GOT16_LO              = 15,
+  R_PPC_GOT16_HI              = 16,
+  R_PPC_GOT16_HA              = 17,
+  R_PPC_REL32                 = 26,
+  R_PPC_TLS                   = 67,
+  R_PPC_DTPMOD32              = 68,
+  R_PPC_TPREL16               = 69,
+  R_PPC_TPREL16_LO            = 70,
+  R_PPC_TPREL16_HI            = 71,
+  R_PPC_TPREL16_HA            = 72,
+  R_PPC_TPREL32               = 73,
+  R_PPC_DTPREL16              = 74,
+  R_PPC_DTPREL16_LO           = 75,
+  R_PPC_DTPREL16_HI           = 76,
+  R_PPC_DTPREL16_HA           = 77,
+  R_PPC_DTPREL32              = 78,
+  R_PPC_GOT_TLSGD16           = 79,
+  R_PPC_GOT_TLSGD16_LO        = 80,
+  R_PPC_GOT_TLSGD16_HI        = 81,
+  R_PPC_GOT_TLSGD16_HA        = 82,
+  R_PPC_GOT_TLSLD16           = 83,
+  R_PPC_GOT_TLSLD16_LO        = 84,
+  R_PPC_GOT_TLSLD16_HI        = 85,
+  R_PPC_GOT_TLSLD16_HA        = 86,
+  R_PPC_GOT_TPREL16           = 87,
+  R_PPC_GOT_TPREL16_LO        = 88,
+  R_PPC_GOT_TPREL16_HI        = 89,
+  R_PPC_GOT_TPREL16_HA        = 90,
+  R_PPC_GOT_DTPREL16          = 91,
+  R_PPC_GOT_DTPREL16_LO       = 92,
+  R_PPC_GOT_DTPREL16_HI       = 93,
+  R_PPC_GOT_DTPREL16_HA       = 94,
+  R_PPC_TLSGD                 = 95,
+  R_PPC_TLSLD                 = 96,
+  R_PPC_REL16                 = 249,
+  R_PPC_REL16_LO              = 250,
+  R_PPC_REL16_HI              = 251,
+  R_PPC_REL16_HA              = 252
+};
+
+// ELF Relocation types for PPC64
+enum {
+  R_PPC64_NONE                = 0,
+  R_PPC64_ADDR32              = 1,
+  R_PPC64_ADDR24              = 2,
+  R_PPC64_ADDR16              = 3,
+  R_PPC64_ADDR16_LO           = 4,
+  R_PPC64_ADDR16_HI           = 5,
+  R_PPC64_ADDR16_HA           = 6,
+  R_PPC64_ADDR14              = 7,
+  R_PPC64_ADDR14_BRTAKEN      = 8,
+  R_PPC64_ADDR14_BRNTAKEN     = 9,
+  R_PPC64_REL24               = 10,
+  R_PPC64_REL14               = 11,
+  R_PPC64_REL14_BRTAKEN       = 12,
+  R_PPC64_REL14_BRNTAKEN      = 13,
+  R_PPC64_GOT16               = 14,
+  R_PPC64_GOT16_LO            = 15,
+  R_PPC64_GOT16_HI            = 16,
+  R_PPC64_GOT16_HA            = 17,
+  R_PPC64_REL32               = 26,
+  R_PPC64_ADDR64              = 38,
+  R_PPC64_ADDR16_HIGHER       = 39,
+  R_PPC64_ADDR16_HIGHERA      = 40,
+  R_PPC64_ADDR16_HIGHEST      = 41,
+  R_PPC64_ADDR16_HIGHESTA     = 42,
+  R_PPC64_REL64               = 44,
+  R_PPC64_TOC16               = 47,
+  R_PPC64_TOC16_LO            = 48,
+  R_PPC64_TOC16_HI            = 49,
+  R_PPC64_TOC16_HA            = 50,
+  R_PPC64_TOC                 = 51,
+  R_PPC64_ADDR16_DS           = 56,
+  R_PPC64_ADDR16_LO_DS        = 57,
+  R_PPC64_GOT16_DS            = 58,
+  R_PPC64_GOT16_LO_DS         = 59,
+  R_PPC64_TOC16_DS            = 63,
+  R_PPC64_TOC16_LO_DS         = 64,
+  R_PPC64_TLS                 = 67,
+  R_PPC64_DTPMOD64            = 68,
+  R_PPC64_TPREL16             = 69,
+  R_PPC64_TPREL16_LO          = 70,
+  R_PPC64_TPREL16_HI          = 71,
+  R_PPC64_TPREL16_HA          = 72,
+  R_PPC64_TPREL64             = 73,
+  R_PPC64_DTPREL16            = 74,
+  R_PPC64_DTPREL16_LO         = 75,
+  R_PPC64_DTPREL16_HI         = 76,
+  R_PPC64_DTPREL16_HA         = 77,
+  R_PPC64_DTPREL64            = 78,
+  R_PPC64_GOT_TLSGD16         = 79,
+  R_PPC64_GOT_TLSGD16_LO      = 80,
+  R_PPC64_GOT_TLSGD16_HI      = 81,
+  R_PPC64_GOT_TLSGD16_HA      = 82,
+  R_PPC64_GOT_TLSLD16         = 83,
+  R_PPC64_GOT_TLSLD16_LO      = 84,
+  R_PPC64_GOT_TLSLD16_HI      = 85,
+  R_PPC64_GOT_TLSLD16_HA      = 86,
+  R_PPC64_GOT_TPREL16_DS      = 87,
+  R_PPC64_GOT_TPREL16_LO_DS   = 88,
+  R_PPC64_GOT_TPREL16_HI      = 89,
+  R_PPC64_GOT_TPREL16_HA      = 90,
+  R_PPC64_GOT_DTPREL16_DS     = 91,
+  R_PPC64_GOT_DTPREL16_LO_DS  = 92,
+  R_PPC64_GOT_DTPREL16_HI     = 93,
+  R_PPC64_GOT_DTPREL16_HA     = 94,
+  R_PPC64_TPREL16_DS          = 95,
+  R_PPC64_TPREL16_LO_DS       = 96,
+  R_PPC64_TPREL16_HIGHER      = 97,
+  R_PPC64_TPREL16_HIGHERA     = 98,
+  R_PPC64_TPREL16_HIGHEST     = 99,
+  R_PPC64_TPREL16_HIGHESTA    = 100,
+  R_PPC64_DTPREL16_DS         = 101,
+  R_PPC64_DTPREL16_LO_DS      = 102,
+  R_PPC64_DTPREL16_HIGHER     = 103,
+  R_PPC64_DTPREL16_HIGHERA    = 104,
+  R_PPC64_DTPREL16_HIGHEST    = 105,
+  R_PPC64_DTPREL16_HIGHESTA   = 106,
+  R_PPC64_TLSGD               = 107,
+  R_PPC64_TLSLD               = 108,
+  R_PPC64_REL16               = 249,
+  R_PPC64_REL16_LO            = 250,
+  R_PPC64_REL16_HI            = 251,
+  R_PPC64_REL16_HA            = 252
+};
+
+// ELF Relocation types for AArch64
+
+enum {
+  R_AARCH64_NONE                        = 0x100,
+
+  R_AARCH64_ABS64                       = 0x101,
+  R_AARCH64_ABS32                       = 0x102,
+  R_AARCH64_ABS16                       = 0x103,
+  R_AARCH64_PREL64                      = 0x104,
+  R_AARCH64_PREL32                      = 0x105,
+  R_AARCH64_PREL16                      = 0x106,
+
+  R_AARCH64_MOVW_UABS_G0                = 0x107,
+  R_AARCH64_MOVW_UABS_G0_NC             = 0x108,
+  R_AARCH64_MOVW_UABS_G1                = 0x109,
+  R_AARCH64_MOVW_UABS_G1_NC             = 0x10a,
+  R_AARCH64_MOVW_UABS_G2                = 0x10b,
+  R_AARCH64_MOVW_UABS_G2_NC             = 0x10c,
+  R_AARCH64_MOVW_UABS_G3                = 0x10d,
+  R_AARCH64_MOVW_SABS_G0                = 0x10e,
+  R_AARCH64_MOVW_SABS_G1                = 0x10f,
+  R_AARCH64_MOVW_SABS_G2                = 0x110,
+
+  R_AARCH64_LD_PREL_LO19                = 0x111,
+  R_AARCH64_ADR_PREL_LO21               = 0x112,
+  R_AARCH64_ADR_PREL_PG_HI21            = 0x113,
+  R_AARCH64_ADD_ABS_LO12_NC             = 0x115,
+  R_AARCH64_LDST8_ABS_LO12_NC           = 0x116,
+
+  R_AARCH64_TSTBR14                     = 0x117,
+  R_AARCH64_CONDBR19                    = 0x118,
+  R_AARCH64_JUMP26                      = 0x11a,
+  R_AARCH64_CALL26                      = 0x11b,
+
+  R_AARCH64_LDST16_ABS_LO12_NC          = 0x11c,
+  R_AARCH64_LDST32_ABS_LO12_NC          = 0x11d,
+  R_AARCH64_LDST64_ABS_LO12_NC          = 0x11e,
+
+  R_AARCH64_LDST128_ABS_LO12_NC         = 0x12b,
+
+  R_AARCH64_ADR_GOT_PAGE                = 0x137,
+  R_AARCH64_LD64_GOT_LO12_NC            = 0x138,
+
+  R_AARCH64_TLSLD_MOVW_DTPREL_G2        = 0x20b,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1        = 0x20c,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC     = 0x20d,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0        = 0x20e,
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC     = 0x20f,
+  R_AARCH64_TLSLD_ADD_DTPREL_HI12       = 0x210,
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12       = 0x211,
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC    = 0x212,
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12     = 0x213,
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC  = 0x214,
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12    = 0x215,
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC = 0x216,
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12    = 0x217,
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC = 0x218,
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12    = 0x219,
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC = 0x21a,
+
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G1      = 0x21b,
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC   = 0x21c,
+  R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21   = 0x21d,
+  R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 0x21e,
+  R_AARCH64_TLSIE_LD_GOTTPREL_PREL19    = 0x21f,
+
+  R_AARCH64_TLSLE_MOVW_TPREL_G2         = 0x220,
+  R_AARCH64_TLSLE_MOVW_TPREL_G1         = 0x221,
+  R_AARCH64_TLSLE_MOVW_TPREL_G1_NC      = 0x222,
+  R_AARCH64_TLSLE_MOVW_TPREL_G0         = 0x223,
+  R_AARCH64_TLSLE_MOVW_TPREL_G0_NC      = 0x224,
+  R_AARCH64_TLSLE_ADD_TPREL_HI12        = 0x225,
+  R_AARCH64_TLSLE_ADD_TPREL_LO12        = 0x226,
+  R_AARCH64_TLSLE_ADD_TPREL_LO12_NC     = 0x227,
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12      = 0x228,
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC   = 0x229,
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12     = 0x22a,
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC  = 0x22b,
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12     = 0x22c,
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC  = 0x22d,
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12     = 0x22e,
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC  = 0x22f,
+
+  R_AARCH64_TLSDESC_ADR_PAGE            = 0x232,
+  R_AARCH64_TLSDESC_LD64_LO12_NC        = 0x233,
+  R_AARCH64_TLSDESC_ADD_LO12_NC         = 0x234,
+
+  R_AARCH64_TLSDESC_CALL                = 0x239
+};
+
+// ARM Specific e_flags
+enum : unsigned {
+  EF_ARM_SOFT_FLOAT =     0x00000200U,
+  EF_ARM_VFP_FLOAT =      0x00000400U,
+  EF_ARM_EABI_UNKNOWN =   0x00000000U,
+  EF_ARM_EABI_VER1 =      0x01000000U,
+  EF_ARM_EABI_VER2 =      0x02000000U,
+  EF_ARM_EABI_VER3 =      0x03000000U,
+  EF_ARM_EABI_VER4 =      0x04000000U,
+  EF_ARM_EABI_VER5 =      0x05000000U,
+  EF_ARM_EABIMASK =       0xFF000000U
+};
+
+// ELF Relocation types for ARM
+// Meets 2.08 ABI Specs.
+
+enum {
+  R_ARM_NONE                  = 0x00,
+  R_ARM_PC24                  = 0x01,
+  R_ARM_ABS32                 = 0x02,
+  R_ARM_REL32                 = 0x03,
+  R_ARM_LDR_PC_G0             = 0x04,
+  R_ARM_ABS16                 = 0x05,
+  R_ARM_ABS12                 = 0x06,
+  R_ARM_THM_ABS5              = 0x07,
+  R_ARM_ABS8                  = 0x08,
+  R_ARM_SBREL32               = 0x09,
+  R_ARM_THM_CALL              = 0x0a,
+  R_ARM_THM_PC8               = 0x0b,
+  R_ARM_BREL_ADJ              = 0x0c,
+  R_ARM_TLS_DESC              = 0x0d,
+  R_ARM_THM_SWI8              = 0x0e,
+  R_ARM_XPC25                 = 0x0f,
+  R_ARM_THM_XPC22             = 0x10,
+  R_ARM_TLS_DTPMOD32          = 0x11,
+  R_ARM_TLS_DTPOFF32          = 0x12,
+  R_ARM_TLS_TPOFF32           = 0x13,
+  R_ARM_COPY                  = 0x14,
+  R_ARM_GLOB_DAT              = 0x15,
+  R_ARM_JUMP_SLOT             = 0x16,
+  R_ARM_RELATIVE              = 0x17,
+  R_ARM_GOTOFF32              = 0x18,
+  R_ARM_BASE_PREL             = 0x19,
+  R_ARM_GOT_BREL              = 0x1a,
+  R_ARM_PLT32                 = 0x1b,
+  R_ARM_CALL                  = 0x1c,
+  R_ARM_JUMP24                = 0x1d,
+  R_ARM_THM_JUMP24            = 0x1e,
+  R_ARM_BASE_ABS              = 0x1f,
+  R_ARM_ALU_PCREL_7_0         = 0x20,
+  R_ARM_ALU_PCREL_15_8        = 0x21,
+  R_ARM_ALU_PCREL_23_15       = 0x22,
+  R_ARM_LDR_SBREL_11_0_NC     = 0x23,
+  R_ARM_ALU_SBREL_19_12_NC    = 0x24,
+  R_ARM_ALU_SBREL_27_20_CK    = 0x25,
+  R_ARM_TARGET1               = 0x26,
+  R_ARM_SBREL31               = 0x27,
+  R_ARM_V4BX                  = 0x28,
+  R_ARM_TARGET2               = 0x29,
+  R_ARM_PREL31                = 0x2a,
+  R_ARM_MOVW_ABS_NC           = 0x2b,
+  R_ARM_MOVT_ABS              = 0x2c,
+  R_ARM_MOVW_PREL_NC          = 0x2d,
+  R_ARM_MOVT_PREL             = 0x2e,
+  R_ARM_THM_MOVW_ABS_NC       = 0x2f,
+  R_ARM_THM_MOVT_ABS          = 0x30,
+  R_ARM_THM_MOVW_PREL_NC      = 0x31,
+  R_ARM_THM_MOVT_PREL         = 0x32,
+  R_ARM_THM_JUMP19            = 0x33,
+  R_ARM_THM_JUMP6             = 0x34,
+  R_ARM_THM_ALU_PREL_11_0     = 0x35,
+  R_ARM_THM_PC12              = 0x36,
+  R_ARM_ABS32_NOI             = 0x37,
+  R_ARM_REL32_NOI             = 0x38,
+  R_ARM_ALU_PC_G0_NC          = 0x39,
+  R_ARM_ALU_PC_G0             = 0x3a,
+  R_ARM_ALU_PC_G1_NC          = 0x3b,
+  R_ARM_ALU_PC_G1             = 0x3c,
+  R_ARM_ALU_PC_G2             = 0x3d,
+  R_ARM_LDR_PC_G1             = 0x3e,
+  R_ARM_LDR_PC_G2             = 0x3f,
+  R_ARM_LDRS_PC_G0            = 0x40,
+  R_ARM_LDRS_PC_G1            = 0x41,
+  R_ARM_LDRS_PC_G2            = 0x42,
+  R_ARM_LDC_PC_G0             = 0x43,
+  R_ARM_LDC_PC_G1             = 0x44,
+  R_ARM_LDC_PC_G2             = 0x45,
+  R_ARM_ALU_SB_G0_NC          = 0x46,
+  R_ARM_ALU_SB_G0             = 0x47,
+  R_ARM_ALU_SB_G1_NC          = 0x48,
+  R_ARM_ALU_SB_G1             = 0x49,
+  R_ARM_ALU_SB_G2             = 0x4a,
+  R_ARM_LDR_SB_G0             = 0x4b,
+  R_ARM_LDR_SB_G1             = 0x4c,
+  R_ARM_LDR_SB_G2             = 0x4d,
+  R_ARM_LDRS_SB_G0            = 0x4e,
+  R_ARM_LDRS_SB_G1            = 0x4f,
+  R_ARM_LDRS_SB_G2            = 0x50,
+  R_ARM_LDC_SB_G0             = 0x51,
+  R_ARM_LDC_SB_G1             = 0x52,
+  R_ARM_LDC_SB_G2             = 0x53,
+  R_ARM_MOVW_BREL_NC          = 0x54,
+  R_ARM_MOVT_BREL             = 0x55,
+  R_ARM_MOVW_BREL             = 0x56,
+  R_ARM_THM_MOVW_BREL_NC      = 0x57,
+  R_ARM_THM_MOVT_BREL         = 0x58,
+  R_ARM_THM_MOVW_BREL         = 0x59,
+  R_ARM_TLS_GOTDESC           = 0x5a,
+  R_ARM_TLS_CALL              = 0x5b,
+  R_ARM_TLS_DESCSEQ           = 0x5c,
+  R_ARM_THM_TLS_CALL          = 0x5d,
+  R_ARM_PLT32_ABS             = 0x5e,
+  R_ARM_GOT_ABS               = 0x5f,
+  R_ARM_GOT_PREL              = 0x60,
+  R_ARM_GOT_BREL12            = 0x61,
+  R_ARM_GOTOFF12              = 0x62,
+  R_ARM_GOTRELAX              = 0x63,
+  R_ARM_GNU_VTENTRY           = 0x64,
+  R_ARM_GNU_VTINHERIT         = 0x65,
+  R_ARM_THM_JUMP11            = 0x66,
+  R_ARM_THM_JUMP8             = 0x67,
+  R_ARM_TLS_GD32              = 0x68,
+  R_ARM_TLS_LDM32             = 0x69,
+  R_ARM_TLS_LDO32             = 0x6a,
+  R_ARM_TLS_IE32              = 0x6b,
+  R_ARM_TLS_LE32              = 0x6c,
+  R_ARM_TLS_LDO12             = 0x6d,
+  R_ARM_TLS_LE12              = 0x6e,
+  R_ARM_TLS_IE12GP            = 0x6f,
+  R_ARM_PRIVATE_0             = 0x70,
+  R_ARM_PRIVATE_1             = 0x71,
+  R_ARM_PRIVATE_2             = 0x72,
+  R_ARM_PRIVATE_3             = 0x73,
+  R_ARM_PRIVATE_4             = 0x74,
+  R_ARM_PRIVATE_5             = 0x75,
+  R_ARM_PRIVATE_6             = 0x76,
+  R_ARM_PRIVATE_7             = 0x77,
+  R_ARM_PRIVATE_8             = 0x78,
+  R_ARM_PRIVATE_9             = 0x79,
+  R_ARM_PRIVATE_10            = 0x7a,
+  R_ARM_PRIVATE_11            = 0x7b,
+  R_ARM_PRIVATE_12            = 0x7c,
+  R_ARM_PRIVATE_13            = 0x7d,
+  R_ARM_PRIVATE_14            = 0x7e,
+  R_ARM_PRIVATE_15            = 0x7f,
+  R_ARM_ME_TOO                = 0x80,
+  R_ARM_THM_TLS_DESCSEQ16     = 0x81,
+  R_ARM_THM_TLS_DESCSEQ32     = 0x82
+};
+
+// Mips Specific e_flags
+enum : unsigned {
+  EF_MIPS_NOREORDER = 0x00000001, // Don't reorder instructions
+  EF_MIPS_PIC       = 0x00000002, // Position independent code
+  EF_MIPS_CPIC      = 0x00000004, // Call object with Position independent code
+  EF_MIPS_ABI2      = 0x00000020,
+  EF_MIPS_32BITMODE = 0x00000100,
+  EF_MIPS_NAN2008   = 0x00000400, // Uses IEE 754-2008 NaN encoding
+  EF_MIPS_ABI_O32   = 0x00001000, // This file follows the first MIPS 32 bit ABI
+
+  //ARCH_ASE
+  EF_MIPS_MICROMIPS = 0x02000000, // microMIPS
+  EF_MIPS_ARCH_ASE_M16 =
+                      0x04000000, // Has Mips-16 ISA extensions
+  //ARCH
+  EF_MIPS_ARCH_1    = 0x00000000, // MIPS1 instruction set
+  EF_MIPS_ARCH_2    = 0x10000000, // MIPS2 instruction set
+  EF_MIPS_ARCH_3    = 0x20000000, // MIPS3 instruction set
+  EF_MIPS_ARCH_4    = 0x30000000, // MIPS4 instruction set
+  EF_MIPS_ARCH_5    = 0x40000000, // MIPS5 instruction set
+  EF_MIPS_ARCH_32   = 0x50000000, // MIPS32 instruction set per linux not elf.h
+  EF_MIPS_ARCH_64   = 0x60000000, // MIPS64 instruction set per linux not elf.h
+  EF_MIPS_ARCH_32R2 = 0x70000000, // mips32r2
+  EF_MIPS_ARCH_64R2 = 0x80000000, // mips64r2
+  EF_MIPS_ARCH_32R6 = 0x90000000, // mips32r6
+  EF_MIPS_ARCH_64R6 = 0xa0000000, // mips64r6
+  EF_MIPS_ARCH      = 0xf0000000  // Mask for applying EF_MIPS_ARCH_ variant
+};
+
+// ELF Relocation types for Mips
+enum {
+  R_MIPS_NONE              =  0,
+  R_MIPS_16                =  1,
+  R_MIPS_32                =  2,
+  R_MIPS_REL32             =  3,
+  R_MIPS_26                =  4,
+  R_MIPS_HI16              =  5,
+  R_MIPS_LO16              =  6,
+  R_MIPS_GPREL16           =  7,
+  R_MIPS_LITERAL           =  8,
+  R_MIPS_GOT16             =  9,
+  R_MIPS_PC16              = 10,
+  R_MIPS_CALL16            = 11,
+  R_MIPS_GPREL32           = 12,
+  R_MIPS_UNUSED1           = 13,
+  R_MIPS_UNUSED2           = 14,
+  R_MIPS_SHIFT5            = 16,
+  R_MIPS_SHIFT6            = 17,
+  R_MIPS_64                = 18,
+  R_MIPS_GOT_DISP          = 19,
+  R_MIPS_GOT_PAGE          = 20,
+  R_MIPS_GOT_OFST          = 21,
+  R_MIPS_GOT_HI16          = 22,
+  R_MIPS_GOT_LO16          = 23,
+  R_MIPS_SUB               = 24,
+  R_MIPS_INSERT_A          = 25,
+  R_MIPS_INSERT_B          = 26,
+  R_MIPS_DELETE            = 27,
+  R_MIPS_HIGHER            = 28,
+  R_MIPS_HIGHEST           = 29,
+  R_MIPS_CALL_HI16         = 30,
+  R_MIPS_CALL_LO16         = 31,
+  R_MIPS_SCN_DISP          = 32,
+  R_MIPS_REL16             = 33,
+  R_MIPS_ADD_IMMEDIATE     = 34,
+  R_MIPS_PJUMP             = 35,
+  R_MIPS_RELGOT            = 36,
+  R_MIPS_JALR              = 37,
+  R_MIPS_TLS_DTPMOD32      = 38,
+  R_MIPS_TLS_DTPREL32      = 39,
+  R_MIPS_TLS_DTPMOD64      = 40,
+  R_MIPS_TLS_DTPREL64      = 41,
+  R_MIPS_TLS_GD            = 42,
+  R_MIPS_TLS_LDM           = 43,
+  R_MIPS_TLS_DTPREL_HI16   = 44,
+  R_MIPS_TLS_DTPREL_LO16   = 45,
+  R_MIPS_TLS_GOTTPREL      = 46,
+  R_MIPS_TLS_TPREL32       = 47,
+  R_MIPS_TLS_TPREL64       = 48,
+  R_MIPS_TLS_TPREL_HI16    = 49,
+  R_MIPS_TLS_TPREL_LO16    = 50,
+  R_MIPS_GLOB_DAT          = 51,
+  R_MIPS_PC21_S2           = 60,
+  R_MIPS_PC26_S2           = 61,
+  R_MIPS_PC18_S3           = 62,
+  R_MIPS_PC19_S2           = 63,
+  R_MIPS_PCHI16            = 64,
+  R_MIPS_PCLO16            = 65,
+  R_MIPS16_GOT16           = 102,
+  R_MIPS16_HI16            = 104,
+  R_MIPS16_LO16            = 105,
+  R_MIPS_COPY              = 126,
+  R_MIPS_JUMP_SLOT         = 127,
+  R_MICROMIPS_26_S1        = 133,
+  R_MICROMIPS_HI16         = 134,
+  R_MICROMIPS_LO16         = 135,
+  R_MICROMIPS_GOT16        = 138,
+  R_MICROMIPS_PC16_S1      = 141,
+  R_MICROMIPS_CALL16       = 142,
+  R_MICROMIPS_GOT_DISP     = 145,
+  R_MICROMIPS_GOT_PAGE     = 146,
+  R_MICROMIPS_GOT_OFST     = 147,
+  R_MICROMIPS_TLS_GD          = 162,
+  R_MICROMIPS_TLS_LDM         = 163,
+  R_MICROMIPS_TLS_DTPREL_HI16 = 164,
+  R_MICROMIPS_TLS_DTPREL_LO16 = 165,
+  R_MICROMIPS_TLS_TPREL_HI16  = 169,
+  R_MICROMIPS_TLS_TPREL_LO16  = 170,
+  R_MIPS_NUM               = 218,
+  R_MIPS_PC32              = 248
+};
+
+// Special values for the st_other field in the symbol table entry for MIPS.
+enum {
+  STO_MIPS_OPTIONAL        = 0x04,  // Symbol whose definition is optional
+  STO_MIPS_PLT             = 0x08,  // PLT entry related dynamic table record
+  STO_MIPS_PIC             = 0x20,  // PIC func in an object mixes PIC/non-PIC
+  STO_MIPS_MICROMIPS       = 0x80,  // MIPS Specific ISA for MicroMips
+  STO_MIPS_MIPS16          = 0xf0   // MIPS Specific ISA for Mips16
+};
+
+// Hexagon Specific e_flags
+// Release 5 ABI
+enum {
+  // Object processor version flags, bits[3:0]
+  EF_HEXAGON_MACH_V2      = 0x00000001,   // Hexagon V2
+  EF_HEXAGON_MACH_V3      = 0x00000002,   // Hexagon V3
+  EF_HEXAGON_MACH_V4      = 0x00000003,   // Hexagon V4
+  EF_HEXAGON_MACH_V5      = 0x00000004,   // Hexagon V5
+
+  // Highest ISA version flags
+  EF_HEXAGON_ISA_MACH     = 0x00000000,   // Same as specified in bits[3:0]
+                                          // of e_flags
+  EF_HEXAGON_ISA_V2       = 0x00000010,   // Hexagon V2 ISA
+  EF_HEXAGON_ISA_V3       = 0x00000020,   // Hexagon V3 ISA
+  EF_HEXAGON_ISA_V4       = 0x00000030,   // Hexagon V4 ISA
+  EF_HEXAGON_ISA_V5       = 0x00000040    // Hexagon V5 ISA
+};
+
+// Hexagon specific Section indexes for common small data
+// Release 5 ABI
+enum {
+  SHN_HEXAGON_SCOMMON     = 0xff00,       // Other access sizes
+  SHN_HEXAGON_SCOMMON_1   = 0xff01,       // Byte-sized access
+  SHN_HEXAGON_SCOMMON_2   = 0xff02,       // Half-word-sized access
+  SHN_HEXAGON_SCOMMON_4   = 0xff03,       // Word-sized access
+  SHN_HEXAGON_SCOMMON_8   = 0xff04        // Double-word-size access
+};
+
+// ELF Relocation types for Hexagon
+// Release 5 ABI
+enum {
+  R_HEX_NONE              =  0,
+  R_HEX_B22_PCREL         =  1,
+  R_HEX_B15_PCREL         =  2,
+  R_HEX_B7_PCREL          =  3,
+  R_HEX_LO16              =  4,
+  R_HEX_HI16              =  5,
+  R_HEX_32                =  6,
+  R_HEX_16                =  7,
+  R_HEX_8                 =  8,
+  R_HEX_GPREL16_0         =  9,
+  R_HEX_GPREL16_1         =  10,
+  R_HEX_GPREL16_2         =  11,
+  R_HEX_GPREL16_3         =  12,
+  R_HEX_HL16              =  13,
+  R_HEX_B13_PCREL         =  14,
+  R_HEX_B9_PCREL          =  15,
+  R_HEX_B32_PCREL_X       =  16,
+  R_HEX_32_6_X            =  17,
+  R_HEX_B22_PCREL_X       =  18,
+  R_HEX_B15_PCREL_X       =  19,
+  R_HEX_B13_PCREL_X       =  20,
+  R_HEX_B9_PCREL_X        =  21,
+  R_HEX_B7_PCREL_X        =  22,
+  R_HEX_16_X              =  23,
+  R_HEX_12_X              =  24,
+  R_HEX_11_X              =  25,
+  R_HEX_10_X              =  26,
+  R_HEX_9_X               =  27,
+  R_HEX_8_X               =  28,
+  R_HEX_7_X               =  29,
+  R_HEX_6_X               =  30,
+  R_HEX_32_PCREL          =  31,
+  R_HEX_COPY              =  32,
+  R_HEX_GLOB_DAT          =  33,
+  R_HEX_JMP_SLOT          =  34,
+  R_HEX_RELATIVE          =  35,
+  R_HEX_PLT_B22_PCREL     =  36,
+  R_HEX_GOTREL_LO16       =  37,
+  R_HEX_GOTREL_HI16       =  38,
+  R_HEX_GOTREL_32         =  39,
+  R_HEX_GOT_LO16          =  40,
+  R_HEX_GOT_HI16          =  41,
+  R_HEX_GOT_32            =  42,
+  R_HEX_GOT_16            =  43,
+  R_HEX_DTPMOD_32         =  44,
+  R_HEX_DTPREL_LO16       =  45,
+  R_HEX_DTPREL_HI16       =  46,
+  R_HEX_DTPREL_32         =  47,
+  R_HEX_DTPREL_16         =  48,
+  R_HEX_GD_PLT_B22_PCREL  =  49,
+  R_HEX_GD_GOT_LO16       =  50,
+  R_HEX_GD_GOT_HI16       =  51,
+  R_HEX_GD_GOT_32         =  52,
+  R_HEX_GD_GOT_16         =  53,
+  R_HEX_IE_LO16           =  54,
+  R_HEX_IE_HI16           =  55,
+  R_HEX_IE_32             =  56,
+  R_HEX_IE_GOT_LO16       =  57,
+  R_HEX_IE_GOT_HI16       =  58,
+  R_HEX_IE_GOT_32         =  59,
+  R_HEX_IE_GOT_16         =  60,
+  R_HEX_TPREL_LO16        =  61,
+  R_HEX_TPREL_HI16        =  62,
+  R_HEX_TPREL_32          =  63,
+  R_HEX_TPREL_16          =  64,
+  R_HEX_6_PCREL_X         =  65,
+  R_HEX_GOTREL_32_6_X     =  66,
+  R_HEX_GOTREL_16_X       =  67,
+  R_HEX_GOTREL_11_X       =  68,
+  R_HEX_GOT_32_6_X        =  69,
+  R_HEX_GOT_16_X          =  70,
+  R_HEX_GOT_11_X          =  71,
+  R_HEX_DTPREL_32_6_X     =  72,
+  R_HEX_DTPREL_16_X       =  73,
+  R_HEX_DTPREL_11_X       =  74,
+  R_HEX_GD_GOT_32_6_X     =  75,
+  R_HEX_GD_GOT_16_X       =  76,
+  R_HEX_GD_GOT_11_X       =  77,
+  R_HEX_IE_32_6_X         =  78,
+  R_HEX_IE_16_X           =  79,
+  R_HEX_IE_GOT_32_6_X     =  80,
+  R_HEX_IE_GOT_16_X       =  81,
+  R_HEX_IE_GOT_11_X       =  82,
+  R_HEX_TPREL_32_6_X      =  83,
+  R_HEX_TPREL_16_X        =  84,
+  R_HEX_TPREL_11_X        =  85
+};
+
+// ELF Relocation types for S390/zSeries
+enum {
+  R_390_NONE        =  0,
+  R_390_8           =  1,
+  R_390_12          =  2,
+  R_390_16          =  3,
+  R_390_32          =  4,
+  R_390_PC32        =  5,
+  R_390_GOT12       =  6,
+  R_390_GOT32       =  7,
+  R_390_PLT32       =  8,
+  R_390_COPY        =  9,
+  R_390_GLOB_DAT    = 10,
+  R_390_JMP_SLOT    = 11,
+  R_390_RELATIVE    = 12,
+  R_390_GOTOFF      = 13,
+  R_390_GOTPC       = 14,
+  R_390_GOT16       = 15,
+  R_390_PC16        = 16,
+  R_390_PC16DBL     = 17,
+  R_390_PLT16DBL    = 18,
+  R_390_PC32DBL     = 19,
+  R_390_PLT32DBL    = 20,
+  R_390_GOTPCDBL    = 21,
+  R_390_64          = 22,
+  R_390_PC64        = 23,
+  R_390_GOT64       = 24,
+  R_390_PLT64       = 25,
+  R_390_GOTENT      = 26,
+  R_390_GOTOFF16    = 27,
+  R_390_GOTOFF64    = 28,
+  R_390_GOTPLT12    = 29,
+  R_390_GOTPLT16    = 30,
+  R_390_GOTPLT32    = 31,
+  R_390_GOTPLT64    = 32,
+  R_390_GOTPLTENT   = 33,
+  R_390_PLTOFF16    = 34,
+  R_390_PLTOFF32    = 35,
+  R_390_PLTOFF64    = 36,
+  R_390_TLS_LOAD    = 37,
+  R_390_TLS_GDCALL  = 38,
+  R_390_TLS_LDCALL  = 39,
+  R_390_TLS_GD32    = 40,
+  R_390_TLS_GD64    = 41,
+  R_390_TLS_GOTIE12 = 42,
+  R_390_TLS_GOTIE32 = 43,
+  R_390_TLS_GOTIE64 = 44,
+  R_390_TLS_LDM32   = 45,
+  R_390_TLS_LDM64   = 46,
+  R_390_TLS_IE32    = 47,
+  R_390_TLS_IE64    = 48,
+  R_390_TLS_IEENT   = 49,
+  R_390_TLS_LE32    = 50,
+  R_390_TLS_LE64    = 51,
+  R_390_TLS_LDO32   = 52,
+  R_390_TLS_LDO64   = 53,
+  R_390_TLS_DTPMOD  = 54,
+  R_390_TLS_DTPOFF  = 55,
+  R_390_TLS_TPOFF   = 56,
+  R_390_20          = 57,
+  R_390_GOT20       = 58,
+  R_390_GOTPLT20    = 59,
+  R_390_TLS_GOTIE20 = 60,
+  R_390_IRELATIVE   = 61
+};
+
+// ELF Relocation type for Sparc.
+enum {
+  R_SPARC_NONE        = 0,
+  R_SPARC_8           = 1,
+  R_SPARC_16          = 2,
+  R_SPARC_32          = 3,
+  R_SPARC_DISP8       = 4,
+  R_SPARC_DISP16      = 5,
+  R_SPARC_DISP32      = 6,
+  R_SPARC_WDISP30     = 7,
+  R_SPARC_WDISP22     = 8,
+  R_SPARC_HI22        = 9,
+  R_SPARC_22          = 10,
+  R_SPARC_13          = 11,
+  R_SPARC_LO10        = 12,
+  R_SPARC_GOT10       = 13,
+  R_SPARC_GOT13       = 14,
+  R_SPARC_GOT22       = 15,
+  R_SPARC_PC10        = 16,
+  R_SPARC_PC22        = 17,
+  R_SPARC_WPLT30      = 18,
+  R_SPARC_COPY        = 19,
+  R_SPARC_GLOB_DAT    = 20,
+  R_SPARC_JMP_SLOT    = 21,
+  R_SPARC_RELATIVE    = 22,
+  R_SPARC_UA32        = 23,
+  R_SPARC_PLT32       = 24,
+  R_SPARC_HIPLT22     = 25,
+  R_SPARC_LOPLT10     = 26,
+  R_SPARC_PCPLT32     = 27,
+  R_SPARC_PCPLT22     = 28,
+  R_SPARC_PCPLT10     = 29,
+  R_SPARC_10          = 30,
+  R_SPARC_11          = 31,
+  R_SPARC_64          = 32,
+  R_SPARC_OLO10       = 33,
+  R_SPARC_HH22        = 34,
+  R_SPARC_HM10        = 35,
+  R_SPARC_LM22        = 36,
+  R_SPARC_PC_HH22     = 37,
+  R_SPARC_PC_HM10     = 38,
+  R_SPARC_PC_LM22     = 39,
+  R_SPARC_WDISP16     = 40,
+  R_SPARC_WDISP19     = 41,
+  R_SPARC_7           = 43,
+  R_SPARC_5           = 44,
+  R_SPARC_6           = 45,
+  R_SPARC_DISP64      = 46,
+  R_SPARC_PLT64       = 47,
+  R_SPARC_HIX22       = 48,
+  R_SPARC_LOX10       = 49,
+  R_SPARC_H44         = 50,
+  R_SPARC_M44         = 51,
+  R_SPARC_L44         = 52,
+  R_SPARC_REGISTER    = 53,
+  R_SPARC_UA64        = 54,
+  R_SPARC_UA16        = 55,
+  R_SPARC_TLS_GD_HI22   = 56,
+  R_SPARC_TLS_GD_LO10   = 57,
+  R_SPARC_TLS_GD_ADD    = 58,
+  R_SPARC_TLS_GD_CALL   = 59,
+  R_SPARC_TLS_LDM_HI22  = 60,
+  R_SPARC_TLS_LDM_LO10  = 61,
+  R_SPARC_TLS_LDM_ADD   = 62,
+  R_SPARC_TLS_LDM_CALL  = 63,
+  R_SPARC_TLS_LDO_HIX22 = 64,
+  R_SPARC_TLS_LDO_LOX10 = 65,
+  R_SPARC_TLS_LDO_ADD   = 66,
+  R_SPARC_TLS_IE_HI22   = 67,
+  R_SPARC_TLS_IE_LO10   = 68,
+  R_SPARC_TLS_IE_LD     = 69,
+  R_SPARC_TLS_IE_LDX    = 70,
+  R_SPARC_TLS_IE_ADD    = 71,
+  R_SPARC_TLS_LE_HIX22  = 72,
+  R_SPARC_TLS_LE_LOX10  = 73,
+  R_SPARC_TLS_DTPMOD32  = 74,
+  R_SPARC_TLS_DTPMOD64  = 75,
+  R_SPARC_TLS_DTPOFF32  = 76,
+  R_SPARC_TLS_DTPOFF64  = 77,
+  R_SPARC_TLS_TPOFF32   = 78,
+  R_SPARC_TLS_TPOFF64   = 79,
+  R_SPARC_GOTDATA_HIX22 = 80,
+  R_SPARC_GOTDATA_LOX22 = 81,
+  R_SPARC_GOTDATA_OP_HIX22 = 82,
+  R_SPARC_GOTDATA_OP_LOX22 = 83,
+  R_SPARC_GOTDATA_OP    = 84
+};
+
+// Section header.
+struct Elf32_Shdr {
+  Elf32_Word sh_name;      // Section name (index into string table)
+  Elf32_Word sh_type;      // Section type (SHT_*)
+  Elf32_Word sh_flags;     // Section flags (SHF_*)
+  Elf32_Addr sh_addr;      // Address where section is to be loaded
+  Elf32_Off  sh_offset;    // File offset of section data, in bytes
+  Elf32_Word sh_size;      // Size of section, in bytes
+  Elf32_Word sh_link;      // Section type-specific header table index link
+  Elf32_Word sh_info;      // Section type-specific extra information
+  Elf32_Word sh_addralign; // Section address alignment
+  Elf32_Word sh_entsize;   // Size of records contained within the section
+};
+
+// Section header for ELF64 - same fields as ELF32, different types.
+struct Elf64_Shdr {
+  Elf64_Word  sh_name;
+  Elf64_Word  sh_type;
+  Elf64_Xword sh_flags;
+  Elf64_Addr  sh_addr;
+  Elf64_Off   sh_offset;
+  Elf64_Xword sh_size;
+  Elf64_Word  sh_link;
+  Elf64_Word  sh_info;
+  Elf64_Xword sh_addralign;
+  Elf64_Xword sh_entsize;
+};
+
+// Special section indices.
+enum {
+  SHN_UNDEF     = 0,      // Undefined, missing, irrelevant, or meaningless
+  SHN_LORESERVE = 0xff00, // Lowest reserved index
+  SHN_LOPROC    = 0xff00, // Lowest processor-specific index
+  SHN_HIPROC    = 0xff1f, // Highest processor-specific index
+  SHN_LOOS      = 0xff20, // Lowest operating system-specific index
+  SHN_HIOS      = 0xff3f, // Highest operating system-specific index
+  SHN_ABS       = 0xfff1, // Symbol has absolute value; does not need relocation
+  SHN_COMMON    = 0xfff2, // FORTRAN COMMON or C external global variables
+  SHN_XINDEX    = 0xffff, // Mark that the index is >= SHN_LORESERVE
+  SHN_HIRESERVE = 0xffff  // Highest reserved index
+};
+
+// Section types.
+enum : unsigned {
+  SHT_NULL          = 0,  // No associated section (inactive entry).
+  SHT_PROGBITS      = 1,  // Program-defined contents.
+  SHT_SYMTAB        = 2,  // Symbol table.
+  SHT_STRTAB        = 3,  // String table.
+  SHT_RELA          = 4,  // Relocation entries; explicit addends.
+  SHT_HASH          = 5,  // Symbol hash table.
+  SHT_DYNAMIC       = 6,  // Information for dynamic linking.
+  SHT_NOTE          = 7,  // Information about the file.
+  SHT_NOBITS        = 8,  // Data occupies no space in the file.
+  SHT_REL           = 9,  // Relocation entries; no explicit addends.
+  SHT_SHLIB         = 10, // Reserved.
+  SHT_DYNSYM        = 11, // Symbol table.
+  SHT_INIT_ARRAY    = 14, // Pointers to initialization functions.
+  SHT_FINI_ARRAY    = 15, // Pointers to termination functions.
+  SHT_PREINIT_ARRAY = 16, // Pointers to pre-init functions.
+  SHT_GROUP         = 17, // Section group.
+  SHT_SYMTAB_SHNDX  = 18, // Indices for SHN_XINDEX entries.
+  SHT_LOOS          = 0x60000000, // Lowest operating system-specific type.
+  SHT_GNU_ATTRIBUTES= 0x6ffffff5, // Object attributes.
+  SHT_GNU_HASH      = 0x6ffffff6, // GNU-style hash table.
+  SHT_GNU_verdef    = 0x6ffffffd, // GNU version definitions.
+  SHT_GNU_verneed   = 0x6ffffffe, // GNU version references.
+  SHT_GNU_versym    = 0x6fffffff, // GNU symbol versions table.
+  SHT_HIOS          = 0x6fffffff, // Highest operating system-specific type.
+  SHT_LOPROC        = 0x70000000, // Lowest processor arch-specific type.
+  // Fixme: All this is duplicated in MCSectionELF. Why??
+  // Exception Index table
+  SHT_ARM_EXIDX           = 0x70000001U,
+  // BPABI DLL dynamic linking pre-emption map
+  SHT_ARM_PREEMPTMAP      = 0x70000002U,
+  //  Object file compatibility attributes
+  SHT_ARM_ATTRIBUTES      = 0x70000003U,
+  SHT_ARM_DEBUGOVERLAY    = 0x70000004U,
+  SHT_ARM_OVERLAYSECTION  = 0x70000005U,
+  SHT_HEX_ORDERED         = 0x70000000, // Link editor is to sort the entries in
+                                        // this section based on their sizes
+  SHT_X86_64_UNWIND       = 0x70000001, // Unwind information
+
+  SHT_MIPS_REGINFO        = 0x70000006, // Register usage information
+  SHT_MIPS_OPTIONS        = 0x7000000d, // General options
+
+  SHT_HIPROC        = 0x7fffffff, // Highest processor arch-specific type.
+  SHT_LOUSER        = 0x80000000, // Lowest type reserved for applications.
+  SHT_HIUSER        = 0xffffffff  // Highest type reserved for applications.
+};
+
+// Section flags.
+enum : unsigned {
+  // Section data should be writable during execution.
+  SHF_WRITE = 0x1,
+
+  // Section occupies memory during program execution.
+  SHF_ALLOC = 0x2,
+
+  // Section contains executable machine instructions.
+  SHF_EXECINSTR = 0x4,
+
+  // The data in this section may be merged.
+  SHF_MERGE = 0x10,
+
+  // The data in this section is null-terminated strings.
+  SHF_STRINGS = 0x20,
+
+  // A field in this section holds a section header table index.
+  SHF_INFO_LINK = 0x40U,
+
+  // Adds special ordering requirements for link editors.
+  SHF_LINK_ORDER = 0x80U,
+
+  // This section requires special OS-specific processing to avoid incorrect
+  // behavior.
+  SHF_OS_NONCONFORMING = 0x100U,
+
+  // This section is a member of a section group.
+  SHF_GROUP = 0x200U,
+
+  // This section holds Thread-Local Storage.
+  SHF_TLS = 0x400U,
+
+  // This section is excluded from the final executable or shared library.
+  SHF_EXCLUDE = 0x80000000U,
+
+  // Start of target-specific flags.
+
+  /// XCORE_SHF_CP_SECTION - All sections with the "c" flag are grouped
+  /// together by the linker to form the constant pool and the cp register is
+  /// set to the start of the constant pool by the boot code.
+  XCORE_SHF_CP_SECTION = 0x800U,
+
+  /// XCORE_SHF_DP_SECTION - All sections with the "d" flag are grouped
+  /// together by the linker to form the data section and the dp register is
+  /// set to the start of the section by the boot code.
+  XCORE_SHF_DP_SECTION = 0x1000U,
+
+  SHF_MASKOS   = 0x0ff00000,
+
+  // Bits indicating processor-specific flags.
+  SHF_MASKPROC = 0xf0000000,
+
+  // If an object file section does not have this flag set, then it may not hold
+  // more than 2GB and can be freely referred to in objects using smaller code
+  // models. Otherwise, only objects using larger code models can refer to them.
+  // For example, a medium code model object can refer to data in a section that
+  // sets this flag besides being able to refer to data in a section that does
+  // not set it; likewise, a small code model object can refer only to code in a
+  // section that does not set this flag.
+  SHF_X86_64_LARGE = 0x10000000,
+
+  // All sections with the GPREL flag are grouped into a global data area
+  // for faster accesses
+  SHF_HEX_GPREL = 0x10000000,
+
+  // Section contains text/data which may be replicated in other sections.
+  // Linker must retain only one copy.
+  SHF_MIPS_NODUPES = 0x01000000,
+
+  // Linker must generate implicit hidden weak names.
+  SHF_MIPS_NAMES   = 0x02000000,
+
+  // Section data local to process.
+  SHF_MIPS_LOCAL   = 0x04000000,
+
+  // Do not strip this section.
+  SHF_MIPS_NOSTRIP = 0x08000000,
+
+  // Section must be part of global data area.
+  SHF_MIPS_GPREL   = 0x10000000,
+
+  // This section should be merged.
+  SHF_MIPS_MERGE   = 0x20000000,
+
+  // Address size to be inferred from section entry size.
+  SHF_MIPS_ADDR    = 0x40000000,
+
+  // Section data is string data by default.
+  SHF_MIPS_STRING  = 0x80000000
+};
+
+// Section Group Flags
+enum : unsigned {
+  GRP_COMDAT = 0x1,
+  GRP_MASKOS = 0x0ff00000,
+  GRP_MASKPROC = 0xf0000000
+};
+
+// Symbol table entries for ELF32.
+struct Elf32_Sym {
+  Elf32_Word    st_name;  // Symbol name (index into string table)
+  Elf32_Addr    st_value; // Value or address associated with the symbol
+  Elf32_Word    st_size;  // Size of the symbol
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf32_Half    st_shndx; // Which section (header table index) it's defined in
+
+  // These accessors and mutators correspond to the ELF32_ST_BIND,
+  // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
+  unsigned char getBinding() const { return st_info >> 4; }
+  unsigned char getType() const { return st_info & 0x0f; }
+  void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+  void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+  void setBindingAndType(unsigned char b, unsigned char t) {
+    st_info = (b << 4) + (t & 0x0f);
+  }
+};
+
+// BEGIN android-added for <elf.h> compat
+static inline unsigned char ELF32_ST_TYPE(unsigned char st_info) { return st_info & 0x0f; }
+// END android-added for <elf.h> compat
+
+// Symbol table entries for ELF64.
+struct Elf64_Sym {
+  Elf64_Word      st_name;  // Symbol name (index into string table)
+  unsigned char   st_info;  // Symbol's type and binding attributes
+  unsigned char   st_other; // Must be zero; reserved
+  Elf64_Half      st_shndx; // Which section (header tbl index) it's defined in
+  Elf64_Addr      st_value; // Value or address associated with the symbol
+  Elf64_Xword     st_size;  // Size of the symbol
+
+  // These accessors and mutators are identical to those defined for ELF32
+  // symbol table entries.
+  unsigned char getBinding() const { return st_info >> 4; }
+  unsigned char getType() const { return st_info & 0x0f; }
+  void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+  void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+  void setBindingAndType(unsigned char b, unsigned char t) {
+    st_info = (b << 4) + (t & 0x0f);
+  }
+};
+
+// The size (in bytes) of symbol table entries.
+enum {
+  SYMENTRY_SIZE32 = 16, // 32-bit symbol entry size
+  SYMENTRY_SIZE64 = 24  // 64-bit symbol entry size.
+};
+
+// Symbol bindings.
+enum {
+  STB_LOCAL = 0,   // Local symbol, not visible outside obj file containing def
+  STB_GLOBAL = 1,  // Global symbol, visible to all object files being combined
+  STB_WEAK = 2,    // Weak symbol, like global but lower-precedence
+  STB_LOOS   = 10, // Lowest operating system-specific binding type
+  STB_HIOS   = 12, // Highest operating system-specific binding type
+  STB_LOPROC = 13, // Lowest processor-specific binding type
+  STB_HIPROC = 15  // Highest processor-specific binding type
+};
+
+// Symbol types.
+enum {
+  STT_NOTYPE  = 0,   // Symbol's type is not specified
+  STT_OBJECT  = 1,   // Symbol is a data object (variable, array, etc.)
+  STT_FUNC    = 2,   // Symbol is executable code (function, etc.)
+  STT_SECTION = 3,   // Symbol refers to a section
+  STT_FILE    = 4,   // Local, absolute symbol that refers to a file
+  STT_COMMON  = 5,   // An uninitialized common block
+  STT_TLS     = 6,   // Thread local data object
+  STT_LOOS    = 7,   // Lowest operating system-specific symbol type
+  STT_HIOS    = 8,   // Highest operating system-specific symbol type
+  STT_GNU_IFUNC = 10, // GNU indirect function
+  STT_LOPROC  = 13,  // Lowest processor-specific symbol type
+  STT_HIPROC  = 15   // Highest processor-specific symbol type
+};
+
+enum {
+  STV_DEFAULT   = 0,  // Visibility is specified by binding type
+  STV_INTERNAL  = 1,  // Defined by processor supplements
+  STV_HIDDEN    = 2,  // Not visible to other components
+  STV_PROTECTED = 3   // Visible in other components but not preemptable
+};
+
+// Symbol number.
+enum {
+  STN_UNDEF = 0
+};
+
+// Relocation entry, without explicit addend.
+struct Elf32_Rel {
+  Elf32_Addr r_offset; // Location (file byte offset, or program virtual addr)
+  Elf32_Word r_info;   // Symbol table index and type of relocation to apply
+
+  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
+  // and ELF32_R_INFO macros defined in the ELF specification:
+  Elf32_Word getSymbol() const { return (r_info >> 8); }
+  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+  void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
+  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf32_Word s, unsigned char t) {
+    r_info = (s << 8) + t;
+  }
+};
+
+// Relocation entry with explicit addend.
+struct Elf32_Rela {
+  Elf32_Addr  r_offset; // Location (file byte offset, or program virtual addr)
+  Elf32_Word  r_info;   // Symbol table index and type of relocation to apply
+  Elf32_Sword r_addend; // Compute value for relocatable field by adding this
+
+  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
+  // and ELF32_R_INFO macros defined in the ELF specification:
+  Elf32_Word getSymbol() const { return (r_info >> 8); }
+  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+  void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
+  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf32_Word s, unsigned char t) {
+    r_info = (s << 8) + t;
+  }
+};
+
+// Relocation entry, without explicit addend.
+struct Elf64_Rel {
+  Elf64_Addr r_offset; // Location (file byte offset, or program virtual addr).
+  Elf64_Xword r_info;   // Symbol table index and type of relocation to apply.
+
+  // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
+  // and ELF64_R_INFO macros defined in the ELF specification:
+  Elf64_Word getSymbol() const { return (r_info >> 32); }
+  Elf64_Word getType() const {
+    return (Elf64_Word) (r_info & 0xffffffffL);
+  }
+  void setSymbol(Elf64_Word s) { setSymbolAndType(s, getType()); }
+  void setType(Elf64_Word t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf64_Word s, Elf64_Word t) {
+    r_info = ((Elf64_Xword)s << 32) + (t&0xffffffffL);
+  }
+};
+
+// Relocation entry with explicit addend.
+struct Elf64_Rela {
+  Elf64_Addr  r_offset; // Location (file byte offset, or program virtual addr).
+  Elf64_Xword  r_info;   // Symbol table index and type of relocation to apply.
+  Elf64_Sxword r_addend; // Compute value for relocatable field by adding this.
+
+  // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
+  // and ELF64_R_INFO macros defined in the ELF specification:
+  Elf64_Word getSymbol() const { return (r_info >> 32); }
+  Elf64_Word getType() const {
+    return (Elf64_Word) (r_info & 0xffffffffL);
+  }
+  void setSymbol(Elf64_Word s) { setSymbolAndType(s, getType()); }
+  void setType(Elf64_Word t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf64_Word s, Elf64_Word t) {
+    r_info = ((Elf64_Xword)s << 32) + (t&0xffffffffL);
+  }
+};
+
+// Program header for ELF32.
+struct Elf32_Phdr {
+  Elf32_Word p_type;   // Type of segment
+  Elf32_Off  p_offset; // File offset where segment is located, in bytes
+  Elf32_Addr p_vaddr;  // Virtual address of beginning of segment
+  Elf32_Addr p_paddr;  // Physical address of beginning of segment (OS-specific)
+  Elf32_Word p_filesz; // Num. of bytes in file image of segment (may be zero)
+  Elf32_Word p_memsz;  // Num. of bytes in mem image of segment (may be zero)
+  Elf32_Word p_flags;  // Segment flags
+  Elf32_Word p_align;  // Segment alignment constraint
+};
+
+// Program header for ELF64.
+struct Elf64_Phdr {
+  Elf64_Word   p_type;   // Type of segment
+  Elf64_Word   p_flags;  // Segment flags
+  Elf64_Off    p_offset; // File offset where segment is located, in bytes
+  Elf64_Addr   p_vaddr;  // Virtual address of beginning of segment
+  Elf64_Addr   p_paddr;  // Physical addr of beginning of segment (OS-specific)
+  Elf64_Xword  p_filesz; // Num. of bytes in file image of segment (may be zero)
+  Elf64_Xword  p_memsz;  // Num. of bytes in mem image of segment (may be zero)
+  Elf64_Xword  p_align;  // Segment alignment constraint
+};
+
+// Segment types.
+enum {
+  PT_NULL    = 0, // Unused segment.
+  PT_LOAD    = 1, // Loadable segment.
+  PT_DYNAMIC = 2, // Dynamic linking information.
+  PT_INTERP  = 3, // Interpreter pathname.
+  PT_NOTE    = 4, // Auxiliary information.
+  PT_SHLIB   = 5, // Reserved.
+  PT_PHDR    = 6, // The program header table itself.
+  PT_TLS     = 7, // The thread-local storage template.
+  PT_LOOS    = 0x60000000, // Lowest operating system-specific pt entry type.
+  PT_HIOS    = 0x6fffffff, // Highest operating system-specific pt entry type.
+  PT_LOPROC  = 0x70000000, // Lowest processor-specific program hdr entry type.
+  PT_HIPROC  = 0x7fffffff, // Highest processor-specific program hdr entry type.
+
+  // x86-64 program header types.
+  // These all contain stack unwind tables.
+  PT_GNU_EH_FRAME  = 0x6474e550,
+  PT_SUNW_EH_FRAME = 0x6474e550,
+  PT_SUNW_UNWIND   = 0x6464e550,
+
+  PT_GNU_STACK  = 0x6474e551, // Indicates stack executability.
+  PT_GNU_RELRO  = 0x6474e552, // Read-only after relocation.
+
+  // ARM program header types.
+  PT_ARM_ARCHEXT = 0x70000000, // Platform architecture compatibility info
+  // These all contain stack unwind tables.
+  PT_ARM_EXIDX   = 0x70000001,
+  PT_ARM_UNWIND  = 0x70000001,
+
+  // MIPS program header types.
+  PT_MIPS_REGINFO  = 0x70000000,  // Register usage information.
+  PT_MIPS_RTPROC   = 0x70000001,  // Runtime procedure table.
+  PT_MIPS_OPTIONS  = 0x70000002   // Options segment.
+};
+
+// Segment flag bits.
+enum : unsigned {
+  PF_X        = 1,         // Execute
+  PF_W        = 2,         // Write
+  PF_R        = 4,         // Read
+  PF_MASKOS   = 0x0ff00000,// Bits for operating system-specific semantics.
+  PF_MASKPROC = 0xf0000000 // Bits for processor-specific semantics.
+};
+
+// Dynamic table entry for ELF32.
+struct Elf32_Dyn
+{
+  Elf32_Sword d_tag;            // Type of dynamic table entry.
+  union
+  {
+      Elf32_Word d_val;         // Integer value of entry.
+      Elf32_Addr d_ptr;         // Pointer value of entry.
+  } d_un;
+};
+
+// Dynamic table entry for ELF64.
+struct Elf64_Dyn
+{
+  Elf64_Sxword d_tag;           // Type of dynamic table entry.
+  union
+  {
+      Elf64_Xword d_val;        // Integer value of entry.
+      Elf64_Addr  d_ptr;        // Pointer value of entry.
+  } d_un;
+};
+
+// Dynamic table entry tags.
+enum {
+  DT_NULL         = 0,        // Marks end of dynamic array.
+  DT_NEEDED       = 1,        // String table offset of needed library.
+  DT_PLTRELSZ     = 2,        // Size of relocation entries in PLT.
+  DT_PLTGOT       = 3,        // Address associated with linkage table.
+  DT_HASH         = 4,        // Address of symbolic hash table.
+  DT_STRTAB       = 5,        // Address of dynamic string table.
+  DT_SYMTAB       = 6,        // Address of dynamic symbol table.
+  DT_RELA         = 7,        // Address of relocation table (Rela entries).
+  DT_RELASZ       = 8,        // Size of Rela relocation table.
+  DT_RELAENT      = 9,        // Size of a Rela relocation entry.
+  DT_STRSZ        = 10,       // Total size of the string table.
+  DT_SYMENT       = 11,       // Size of a symbol table entry.
+  DT_INIT         = 12,       // Address of initialization function.
+  DT_FINI         = 13,       // Address of termination function.
+  DT_SONAME       = 14,       // String table offset of a shared objects name.
+  DT_RPATH        = 15,       // String table offset of library search path.
+  DT_SYMBOLIC     = 16,       // Changes symbol resolution algorithm.
+  DT_REL          = 17,       // Address of relocation table (Rel entries).
+  DT_RELSZ        = 18,       // Size of Rel relocation table.
+  DT_RELENT       = 19,       // Size of a Rel relocation entry.
+  DT_PLTREL       = 20,       // Type of relocation entry used for linking.
+  DT_DEBUG        = 21,       // Reserved for debugger.
+  DT_TEXTREL      = 22,       // Relocations exist for non-writable segments.
+  DT_JMPREL       = 23,       // Address of relocations associated with PLT.
+  DT_BIND_NOW     = 24,       // Process all relocations before execution.
+  DT_INIT_ARRAY   = 25,       // Pointer to array of initialization functions.
+  DT_FINI_ARRAY   = 26,       // Pointer to array of termination functions.
+  DT_INIT_ARRAYSZ = 27,       // Size of DT_INIT_ARRAY.
+  DT_FINI_ARRAYSZ = 28,       // Size of DT_FINI_ARRAY.
+  DT_RUNPATH      = 29,       // String table offset of lib search path.
+  DT_FLAGS        = 30,       // Flags.
+  DT_ENCODING     = 32,       // Values from here to DT_LOOS follow the rules
+                              // for the interpretation of the d_un union.
+
+  DT_PREINIT_ARRAY = 32,      // Pointer to array of preinit functions.
+  DT_PREINIT_ARRAYSZ = 33,    // Size of the DT_PREINIT_ARRAY array.
+
+  DT_LOOS         = 0x60000000, // Start of environment specific tags.
+  DT_HIOS         = 0x6FFFFFFF, // End of environment specific tags.
+  DT_LOPROC       = 0x70000000, // Start of processor specific tags.
+  DT_HIPROC       = 0x7FFFFFFF, // End of processor specific tags.
+
+  DT_GNU_HASH     = 0x6FFFFEF5, // Reference to the GNU hash table.
+  DT_RELACOUNT    = 0x6FFFFFF9, // ELF32_Rela count.
+  DT_RELCOUNT     = 0x6FFFFFFA, // ELF32_Rel count.
+
+  DT_FLAGS_1      = 0X6FFFFFFB, // Flags_1.
+  DT_VERSYM       = 0x6FFFFFF0, // The address of .gnu.version section.
+  DT_VERDEF       = 0X6FFFFFFC, // The address of the version definition table.
+  DT_VERDEFNUM    = 0X6FFFFFFD, // The number of entries in DT_VERDEF.
+  DT_VERNEED      = 0X6FFFFFFE, // The address of the version Dependency table.
+  DT_VERNEEDNUM   = 0X6FFFFFFF, // The number of entries in DT_VERNEED.
+
+  // Mips specific dynamic table entry tags.
+  DT_MIPS_RLD_VERSION   = 0x70000001, // 32 bit version number for runtime
+                                      // linker interface.
+  DT_MIPS_TIME_STAMP    = 0x70000002, // Time stamp.
+  DT_MIPS_ICHECKSUM     = 0x70000003, // Checksum of external strings
+                                      // and common sizes.
+  DT_MIPS_IVERSION      = 0x70000004, // Index of version string
+                                      // in string table.
+  DT_MIPS_FLAGS         = 0x70000005, // 32 bits of flags.
+  DT_MIPS_BASE_ADDRESS  = 0x70000006, // Base address of the segment.
+  DT_MIPS_MSYM          = 0x70000007, // Address of .msym section.
+  DT_MIPS_CONFLICT      = 0x70000008, // Address of .conflict section.
+  DT_MIPS_LIBLIST       = 0x70000009, // Address of .liblist section.
+  DT_MIPS_LOCAL_GOTNO   = 0x7000000a, // Number of local global offset
+                                      // table entries.
+  DT_MIPS_CONFLICTNO    = 0x7000000b, // Number of entries
+                                      // in the .conflict section.
+  DT_MIPS_LIBLISTNO     = 0x70000010, // Number of entries
+                                      // in the .liblist section.
+  DT_MIPS_SYMTABNO      = 0x70000011, // Number of entries
+                                      // in the .dynsym section.
+  DT_MIPS_UNREFEXTNO    = 0x70000012, // Index of first external dynamic symbol
+                                      // not referenced locally.
+  DT_MIPS_GOTSYM        = 0x70000013, // Index of first dynamic symbol
+                                      // in global offset table.
+  DT_MIPS_HIPAGENO      = 0x70000014, // Number of page table entries
+                                      // in global offset table.
+  DT_MIPS_RLD_MAP       = 0x70000016, // Address of run time loader map,
+                                      // used for debugging.
+  DT_MIPS_DELTA_CLASS       = 0x70000017, // Delta C++ class definition.
+  DT_MIPS_DELTA_CLASS_NO    = 0x70000018, // Number of entries
+                                          // in DT_MIPS_DELTA_CLASS.
+  DT_MIPS_DELTA_INSTANCE    = 0x70000019, // Delta C++ class instances.
+  DT_MIPS_DELTA_INSTANCE_NO = 0x7000001A, // Number of entries
+                                          // in DT_MIPS_DELTA_INSTANCE.
+  DT_MIPS_DELTA_RELOC       = 0x7000001B, // Delta relocations.
+  DT_MIPS_DELTA_RELOC_NO    = 0x7000001C, // Number of entries
+                                          // in DT_MIPS_DELTA_RELOC.
+  DT_MIPS_DELTA_SYM         = 0x7000001D, // Delta symbols that Delta
+                                          // relocations refer to.
+  DT_MIPS_DELTA_SYM_NO      = 0x7000001E, // Number of entries
+                                          // in DT_MIPS_DELTA_SYM.
+  DT_MIPS_DELTA_CLASSSYM    = 0x70000020, // Delta symbols that hold
+                                          // class declarations.
+  DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021, // Number of entries
+                                          // in DT_MIPS_DELTA_CLASSSYM.
+  DT_MIPS_CXX_FLAGS         = 0x70000022, // Flags indicating information
+                                          // about C++ flavor.
+  DT_MIPS_PIXIE_INIT        = 0x70000023, // Pixie information.
+  DT_MIPS_SYMBOL_LIB        = 0x70000024, // Address of .MIPS.symlib
+  DT_MIPS_LOCALPAGE_GOTIDX  = 0x70000025, // The GOT index of the first PTE
+                                          // for a segment
+  DT_MIPS_LOCAL_GOTIDX      = 0x70000026, // The GOT index of the first PTE
+                                          // for a local symbol
+  DT_MIPS_HIDDEN_GOTIDX     = 0x70000027, // The GOT index of the first PTE
+                                          // for a hidden symbol
+  DT_MIPS_PROTECTED_GOTIDX  = 0x70000028, // The GOT index of the first PTE
+                                          // for a protected symbol
+  DT_MIPS_OPTIONS           = 0x70000029, // Address of `.MIPS.options'.
+  DT_MIPS_INTERFACE         = 0x7000002A, // Address of `.interface'.
+  DT_MIPS_DYNSTR_ALIGN      = 0x7000002B, // Unknown.
+  DT_MIPS_INTERFACE_SIZE    = 0x7000002C, // Size of the .interface section.
+  DT_MIPS_RLD_TEXT_RESOLVE_ADDR = 0x7000002D, // Size of rld_text_resolve
+                                              // function stored in the GOT.
+  DT_MIPS_PERF_SUFFIX       = 0x7000002E, // Default suffix of DSO to be added
+                                          // by rld on dlopen() calls.
+  DT_MIPS_COMPACT_SIZE      = 0x7000002F, // Size of compact relocation
+                                          // section (O32).
+  DT_MIPS_GP_VALUE          = 0x70000030, // GP value for auxiliary GOTs.
+  DT_MIPS_AUX_DYNAMIC       = 0x70000031, // Address of auxiliary .dynamic.
+  DT_MIPS_PLTGOT            = 0x70000032, // Address of the base of the PLTGOT.
+  DT_MIPS_RWPLT             = 0x70000034  // Points to the base
+                                          // of a writable PLT.
+};
+
+// DT_FLAGS values.
+enum {
+  DF_ORIGIN     = 0x01, // The object may reference $ORIGIN.
+  DF_SYMBOLIC   = 0x02, // Search the shared lib before searching the exe.
+  DF_TEXTREL    = 0x04, // Relocations may modify a non-writable segment.
+  DF_BIND_NOW   = 0x08, // Process all relocations on load.
+  DF_STATIC_TLS = 0x10  // Reject attempts to load dynamically.
+};
+
+// State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1 entry.
+enum {
+  DF_1_NOW        = 0x00000001, // Set RTLD_NOW for this object.
+  DF_1_GLOBAL     = 0x00000002, // Set RTLD_GLOBAL for this object.
+  DF_1_GROUP      = 0x00000004, // Set RTLD_GROUP for this object.
+  DF_1_NODELETE   = 0x00000008, // Set RTLD_NODELETE for this object.
+  DF_1_LOADFLTR   = 0x00000010, // Trigger filtee loading at runtime.
+  DF_1_INITFIRST  = 0x00000020, // Set RTLD_INITFIRST for this object.
+  DF_1_NOOPEN     = 0x00000040, // Set RTLD_NOOPEN for this object.
+  DF_1_ORIGIN     = 0x00000080, // $ORIGIN must be handled.
+  DF_1_DIRECT     = 0x00000100, // Direct binding enabled.
+  DF_1_TRANS      = 0x00000200,
+  DF_1_INTERPOSE  = 0x00000400, // Object is used to interpose.
+  DF_1_NODEFLIB   = 0x00000800, // Ignore default lib search path.
+  DF_1_NODUMP     = 0x00001000, // Object can't be dldump'ed.
+  DF_1_CONFALT    = 0x00002000, // Configuration alternative created.
+  DF_1_ENDFILTEE  = 0x00004000, // Filtee terminates filters search.
+  DF_1_DISPRELDNE = 0x00008000, // Disp reloc applied at build time.
+  DF_1_DISPRELPND = 0x00010000  // Disp reloc applied at run-time.
+};
+
+// DT_MIPS_FLAGS values.
+enum {
+  RHF_NONE                    = 0x00000000, // No flags.
+  RHF_QUICKSTART              = 0x00000001, // Uses shortcut pointers.
+  RHF_NOTPOT                  = 0x00000002, // Hash size is not a power of two.
+  RHS_NO_LIBRARY_REPLACEMENT  = 0x00000004, // Ignore LD_LIBRARY_PATH.
+  RHF_NO_MOVE                 = 0x00000008, // DSO address may not be relocated.
+  RHF_SGI_ONLY                = 0x00000010, // SGI specific features.
+  RHF_GUARANTEE_INIT          = 0x00000020, // Guarantee that .init will finish
+                                            // executing before any non-init
+                                            // code in DSO is called.
+  RHF_DELTA_C_PLUS_PLUS       = 0x00000040, // Contains Delta C++ code.
+  RHF_GUARANTEE_START_INIT    = 0x00000080, // Guarantee that .init will start
+                                            // executing before any non-init
+                                            // code in DSO is called.
+  RHF_PIXIE                   = 0x00000100, // Generated by pixie.
+  RHF_DEFAULT_DELAY_LOAD      = 0x00000200, // Delay-load DSO by default.
+  RHF_REQUICKSTART            = 0x00000400, // Object may be requickstarted
+  RHF_REQUICKSTARTED          = 0x00000800, // Object has been requickstarted
+  RHF_CORD                    = 0x00001000, // Generated by cord.
+  RHF_NO_UNRES_UNDEF          = 0x00002000, // Object contains no unresolved
+                                            // undef symbols.
+  RHF_RLD_ORDER_SAFE          = 0x00004000  // Symbol table is in a safe order.
+};
+
+// ElfXX_VerDef structure version (GNU versioning)
+enum {
+  VER_DEF_NONE    = 0,
+  VER_DEF_CURRENT = 1
+};
+
+// VerDef Flags (ElfXX_VerDef::vd_flags)
+enum {
+  VER_FLG_BASE = 0x1,
+  VER_FLG_WEAK = 0x2,
+  VER_FLG_INFO = 0x4
+};
+
+// Special constants for the version table. (SHT_GNU_versym/.gnu.version)
+enum {
+  VER_NDX_LOCAL  = 0,      // Unversioned local symbol
+  VER_NDX_GLOBAL = 1,      // Unversioned global symbol
+  VERSYM_VERSION = 0x7fff, // Version Index mask
+  VERSYM_HIDDEN  = 0x8000  // Hidden bit (non-default version)
+};
+
+// ElfXX_VerNeed structure version (GNU versioning)
+enum {
+  VER_NEED_NONE = 0,
+  VER_NEED_CURRENT = 1
+};
+
+// BEGIN android-changed
+#endif  // ART_RUNTIME_ELF_H_
+// END android-changed
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 0df8211..bb33978 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -22,6 +22,8 @@
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "base/stl_util.h"
+#include "dwarf.h"
+#include "leb128.h"
 #include "utils.h"
 #include "instruction_set.h"
 
@@ -108,43 +110,51 @@
   : file_(file),
     writable_(writable),
     program_header_only_(program_header_only),
-    header_(NULL),
-    base_address_(NULL),
-    program_headers_start_(NULL),
-    section_headers_start_(NULL),
-    dynamic_program_header_(NULL),
-    dynamic_section_start_(NULL),
-    symtab_section_start_(NULL),
-    dynsym_section_start_(NULL),
-    strtab_section_start_(NULL),
-    dynstr_section_start_(NULL),
-    hash_section_start_(NULL),
-    symtab_symbol_table_(NULL),
-    dynsym_symbol_table_(NULL),
-    jit_elf_image_(NULL),
-    jit_gdb_entry_(NULL) {
-  CHECK(file != NULL);
+    header_(nullptr),
+    base_address_(nullptr),
+    program_headers_start_(nullptr),
+    section_headers_start_(nullptr),
+    dynamic_program_header_(nullptr),
+    dynamic_section_start_(nullptr),
+    symtab_section_start_(nullptr),
+    dynsym_section_start_(nullptr),
+    strtab_section_start_(nullptr),
+    dynstr_section_start_(nullptr),
+    hash_section_start_(nullptr),
+    symtab_symbol_table_(nullptr),
+    dynsym_symbol_table_(nullptr),
+    jit_elf_image_(nullptr),
+    jit_gdb_entry_(nullptr) {
+  CHECK(file != nullptr);
 }
 
 ElfFile* ElfFile::Open(File* file, bool writable, bool program_header_only,
                        std::string* error_msg) {
   std::unique_ptr<ElfFile> elf_file(new ElfFile(file, writable, program_header_only));
-  if (!elf_file->Setup(error_msg)) {
-    return nullptr;
-  }
-  return elf_file.release();
-}
-
-bool ElfFile::Setup(std::string* error_msg) {
   int prot;
   int flags;
-  if (writable_) {
+  if (writable) {
     prot = PROT_READ | PROT_WRITE;
     flags = MAP_SHARED;
   } else {
     prot = PROT_READ;
     flags = MAP_PRIVATE;
   }
+  if (!elf_file->Setup(prot, flags, error_msg)) {
+    return nullptr;
+  }
+  return elf_file.release();
+}
+
+ElfFile* ElfFile::Open(File* file, int prot, int flags, std::string* error_msg) {
+  std::unique_ptr<ElfFile> elf_file(new ElfFile(file, (prot & PROT_WRITE) == PROT_WRITE, false));
+  if (!elf_file->Setup(prot, flags, error_msg)) {
+    return nullptr;
+  }
+  return elf_file.release();
+}
+
+bool ElfFile::Setup(int prot, int flags, std::string* error_msg) {
   int64_t temp_file_length = file_->GetLength();
   if (temp_file_length < 0) {
     errno = -temp_file_length;
@@ -201,7 +211,7 @@
 
     // Find .dynamic section info from program header
     dynamic_program_header_ = FindProgamHeaderByType(PT_DYNAMIC);
-    if (dynamic_program_header_ == NULL) {
+    if (dynamic_program_header_ == nullptr) {
       *error_msg = StringPrintf("Failed to find PT_DYNAMIC program header in ELF file: '%s'",
                                 file_->GetPath().c_str());
       return false;
@@ -263,14 +273,14 @@
 }
 
 bool ElfFile::SetMap(MemMap* map, std::string* error_msg) {
-  if (map == NULL) {
+  if (map == nullptr) {
     // MemMap::Open should have already set an error.
     DCHECK(!error_msg->empty());
     return false;
   }
   map_.reset(map);
-  CHECK(map_.get() != NULL) << file_->GetPath();
-  CHECK(map_->Begin() != NULL) << file_->GetPath();
+  CHECK(map_.get() != nullptr) << file_->GetPath();
+  CHECK(map_->Begin() != nullptr) << file_->GetPath();
 
   header_ = reinterpret_cast<Elf32_Ehdr*>(map_->Begin());
   if ((ELFMAG0 != header_->e_ident[EI_MAG0])
@@ -397,27 +407,27 @@
 
 
 Elf32_Ehdr& ElfFile::GetHeader() const {
-  CHECK(header_ != NULL);
+  CHECK(header_ != nullptr);
   return *header_;
 }
 
 byte* ElfFile::GetProgramHeadersStart() const {
-  CHECK(program_headers_start_ != NULL);
+  CHECK(program_headers_start_ != nullptr);
   return program_headers_start_;
 }
 
 byte* ElfFile::GetSectionHeadersStart() const {
-  CHECK(section_headers_start_ != NULL);
+  CHECK(section_headers_start_ != nullptr);
   return section_headers_start_;
 }
 
 Elf32_Phdr& ElfFile::GetDynamicProgramHeader() const {
-  CHECK(dynamic_program_header_ != NULL);
+  CHECK(dynamic_program_header_ != nullptr);
   return *dynamic_program_header_;
 }
 
 Elf32_Dyn* ElfFile::GetDynamicSectionStart() const {
-  CHECK(dynamic_section_start_ != NULL);
+  CHECK(dynamic_section_start_ != nullptr);
   return dynamic_section_start_;
 }
 
@@ -435,10 +445,10 @@
     }
     default: {
       LOG(FATAL) << section_type;
-      symbol_section_start = NULL;
+      symbol_section_start = nullptr;
     }
   }
-  CHECK(symbol_section_start != NULL);
+  CHECK(symbol_section_start != nullptr);
   return symbol_section_start;
 }
 
@@ -456,17 +466,17 @@
     }
     default: {
       LOG(FATAL) << section_type;
-      string_section_start = NULL;
+      string_section_start = nullptr;
     }
   }
-  CHECK(string_section_start != NULL);
+  CHECK(string_section_start != nullptr);
   return string_section_start;
 }
 
 const char* ElfFile::GetString(Elf32_Word section_type, Elf32_Word i) const {
   CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
   if (i == 0) {
-    return NULL;
+    return nullptr;
   }
   const char* string_section_start = GetStringSectionStart(section_type);
   const char* string = string_section_start + i;
@@ -474,7 +484,7 @@
 }
 
 Elf32_Word* ElfFile::GetHashSectionStart() const {
-  CHECK(hash_section_start_ != NULL);
+  CHECK(hash_section_start_ != nullptr);
   return hash_section_start_;
 }
 
@@ -516,7 +526,7 @@
       return &program_header;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 Elf32_Word ElfFile::GetSectionHeaderNum() const {
@@ -543,7 +553,7 @@
       return &section_header;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 // from bionic
@@ -565,6 +575,15 @@
 }
 
 const byte* ElfFile::FindDynamicSymbolAddress(const std::string& symbol_name) const {
+  const Elf32_Sym* sym = FindDynamicSymbol(symbol_name);
+  if (sym != nullptr) {
+    return base_address_ + sym->st_value;
+  } else {
+    return nullptr;
+  }
+}
+
+const Elf32_Sym* ElfFile::FindDynamicSymbol(const std::string& symbol_name) const {
   Elf32_Word hash = elfhash(symbol_name.c_str());
   Elf32_Word bucket_index = hash % GetHashBucketNum();
   Elf32_Word symbol_and_chain_index = GetHashBucket(bucket_index);
@@ -572,11 +591,11 @@
     Elf32_Sym& symbol = GetSymbol(SHT_DYNSYM, symbol_and_chain_index);
     const char* name = GetString(SHT_DYNSYM, symbol.st_name);
     if (symbol_name == name) {
-      return base_address_ + symbol.st_value;
+      return &symbol;
     }
     symbol_and_chain_index = GetHashChain(symbol_and_chain_index);
   }
-  return NULL;
+  return nullptr;
 }
 
 bool ElfFile::IsSymbolSectionType(Elf32_Word section_type) {
@@ -606,7 +625,7 @@
     }
     default: {
       LOG(FATAL) << section_type;
-      return NULL;
+      return nullptr;
     }
   }
 }
@@ -618,12 +637,12 @@
   CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
 
   SymbolTable** symbol_table = GetSymbolTable(section_type);
-  if (*symbol_table != NULL || build_map) {
-    if (*symbol_table == NULL) {
+  if (*symbol_table != nullptr || build_map) {
+    if (*symbol_table == nullptr) {
       DCHECK(build_map);
       *symbol_table = new SymbolTable;
       Elf32_Shdr* symbol_section = FindSectionByType(section_type);
-      CHECK(symbol_section != NULL) << file_->GetPath();
+      CHECK(symbol_section != nullptr) << file_->GetPath();
       Elf32_Shdr& string_section = GetSectionHeader(symbol_section->sh_link);
       for (uint32_t i = 0; i < GetSymbolNum(*symbol_section); i++) {
         Elf32_Sym& symbol = GetSymbol(section_type, i);
@@ -632,7 +651,7 @@
           continue;
         }
         const char* name = GetString(string_section, symbol.st_name);
-        if (name == NULL) {
+        if (name == nullptr) {
           continue;
         }
         std::pair<SymbolTable::iterator, bool> result =
@@ -647,36 +666,36 @@
         }
       }
     }
-    CHECK(*symbol_table != NULL);
+    CHECK(*symbol_table != nullptr);
     SymbolTable::const_iterator it = (*symbol_table)->find(symbol_name);
     if (it == (*symbol_table)->end()) {
-      return NULL;
+      return nullptr;
     }
     return it->second;
   }
 
   // Fall back to linear search
   Elf32_Shdr* symbol_section = FindSectionByType(section_type);
-  CHECK(symbol_section != NULL) << file_->GetPath();
+  CHECK(symbol_section != nullptr) << file_->GetPath();
   Elf32_Shdr& string_section = GetSectionHeader(symbol_section->sh_link);
   for (uint32_t i = 0; i < GetSymbolNum(*symbol_section); i++) {
     Elf32_Sym& symbol = GetSymbol(section_type, i);
     const char* name = GetString(string_section, symbol.st_name);
-    if (name == NULL) {
+    if (name == nullptr) {
       continue;
     }
     if (symbol_name == name) {
       return &symbol;
     }
   }
-  return NULL;
+  return nullptr;
 }
 
 Elf32_Addr ElfFile::FindSymbolAddress(Elf32_Word section_type,
                                       const std::string& symbol_name,
                                       bool build_map) {
   Elf32_Sym* symbol = FindSymbolByName(section_type, symbol_name, build_map);
-  if (symbol == NULL) {
+  if (symbol == nullptr) {
     return 0;
   }
   return symbol->st_value;
@@ -688,7 +707,7 @@
   CHECK_EQ(static_cast<Elf32_Word>(SHT_STRTAB), string_section.sh_type) << file_->GetPath();
   CHECK_LT(i, string_section.sh_size) << file_->GetPath();
   if (i == 0) {
-    return NULL;
+    return nullptr;
   }
   byte* strings = Begin() + string_section.sh_offset;
   byte* string = strings + i;
@@ -846,7 +865,7 @@
       std::string reservation_name("ElfFile reservation for ");
       reservation_name += file_->GetPath();
       std::unique_ptr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(),
-                                                     NULL, GetLoadedSize(), PROT_NONE, false,
+                                                     nullptr, GetLoadedSize(), PROT_NONE, false,
                                                      error_msg));
       if (reserve.get() == nullptr) {
         *error_msg = StringPrintf("Failed to allocate %s: %s",
@@ -970,29 +989,323 @@
   return false;
 }
 
-static bool check_section_name(ElfFile& file, int section_num, const char *name) {
-  Elf32_Shdr& section_header = file.GetSectionHeader(section_num);
-  const char *section_name = file.GetString(SHT_SYMTAB, section_header.sh_name);
-  return strcmp(name, section_name) == 0;
+
+Elf32_Shdr* ElfFile::FindSectionByName(const std::string& name) const {
+  CHECK(!program_header_only_);
+  Elf32_Shdr& shstrtab_sec = GetSectionNameStringSection();
+  for (uint32_t i = 0; i < GetSectionHeaderNum(); i++) {
+    Elf32_Shdr& shdr = GetSectionHeader(i);
+    const char* sec_name = GetString(shstrtab_sec, shdr.sh_name);
+    if (sec_name == nullptr) {
+      continue;
+    }
+    if (name == sec_name) {
+      return &shdr;
+    }
+  }
+  return nullptr;
 }
 
-static void IncrementUint32(byte *p, uint32_t increment) {
-  uint32_t *u = reinterpret_cast<uint32_t *>(p);
-  *u += increment;
+struct PACKED(1) FDE {
+  uint32_t raw_length_;
+  uint32_t GetLength() {
+    return raw_length_ + sizeof(raw_length_);
+  }
+  uint32_t CIE_pointer;
+  uint32_t initial_location;
+  uint32_t address_range;
+  uint8_t instructions[0];
+};
+
+static FDE* NextFDE(FDE* frame) {
+  byte* fde_bytes = reinterpret_cast<byte*>(frame);
+  fde_bytes += frame->GetLength();
+  return reinterpret_cast<FDE*>(fde_bytes);
 }
 
-static void RoundAndClear(byte *image, uint32_t& offset, int pwr2) {
-  uint32_t mask = pwr2 - 1;
-  while (offset & mask) {
-    image[offset++] = 0;
+static bool IsFDE(FDE* frame) {
+  // TODO This seems to be the constant everyone uses (for the .debug_frame
+  // section at least), however we should investigate this further.
+  const uint32_t kDwarfCIE_id = 0xffffffff;
+  const uint32_t kReservedLengths[] = {0xffffffff, 0xfffffff0};
+  return frame->CIE_pointer != kDwarfCIE_id &&
+      frame->raw_length_ != kReservedLengths[0] && frame->raw_length_ != kReservedLengths[1];
+}
+
+// TODO This only works for 32-bit Elf Files.
+static bool FixupDebugFrame(uintptr_t text_start, byte* dbg_frame, size_t dbg_frame_size) {
+  FDE* last_frame = reinterpret_cast<FDE*>(dbg_frame + dbg_frame_size);
+  FDE* frame = NextFDE(reinterpret_cast<FDE*>(dbg_frame));
+  for (; frame < last_frame; frame = NextFDE(frame)) {
+    if (!IsFDE(frame)) {
+      return false;
+    }
+    frame->initial_location += text_start;
+  }
+  return true;
+}
+
+struct PACKED(1) DebugInfoHeader {
+  uint32_t unit_length;  // TODO 32-bit specific size
+  uint16_t version;
+  uint32_t debug_abbrev_offset;  // TODO 32-bit specific size
+  uint8_t  address_size;
+};
+
+// Returns -1 if it is variable length, which we will just disallow for now.
+static int32_t FormLength(uint32_t att) {
+  switch (att) {
+    case DW_FORM_data1:
+    case DW_FORM_flag:
+    case DW_FORM_flag_present:
+    case DW_FORM_ref1:
+      return 1;
+
+    case DW_FORM_data2:
+    case DW_FORM_ref2:
+      return 2;
+
+    case DW_FORM_addr:        // TODO 32-bit only
+    case DW_FORM_ref_addr:    // TODO 32-bit only
+    case DW_FORM_sec_offset:  // TODO 32-bit only
+    case DW_FORM_strp:        // TODO 32-bit only
+    case DW_FORM_data4:
+    case DW_FORM_ref4:
+      return 4;
+
+    case DW_FORM_data8:
+    case DW_FORM_ref8:
+    case DW_FORM_ref_sig8:
+      return 8;
+
+    case DW_FORM_block:
+    case DW_FORM_block1:
+    case DW_FORM_block2:
+    case DW_FORM_block4:
+    case DW_FORM_exprloc:
+    case DW_FORM_indirect:
+    case DW_FORM_ref_udata:
+    case DW_FORM_sdata:
+    case DW_FORM_string:
+    case DW_FORM_udata:
+    default:
+      return -1;
   }
 }
 
-// Simple macro to bump a point to a section header to the next one.
-#define BUMP_SHENT(sp) \
-  sp = reinterpret_cast<Elf32_Shdr *> (\
-      reinterpret_cast<byte*>(sp) + elf_hdr.e_shentsize);\
-  offset += elf_hdr.e_shentsize
+class DebugTag {
+ public:
+  const uint32_t index_;
+  ~DebugTag() {}
+  // Creates a new tag and moves data pointer up to the start of the next one.
+  // nullptr means error.
+  static DebugTag* Create(const byte** data_pointer) {
+    const byte* data = *data_pointer;
+    uint32_t index = DecodeUnsignedLeb128(&data);
+    std::unique_ptr<DebugTag> tag(new DebugTag(index));
+    tag->size_ = static_cast<uint32_t>(
+        reinterpret_cast<uintptr_t>(data) - reinterpret_cast<uintptr_t>(*data_pointer));
+    // skip the abbrev
+    tag->tag_ = DecodeUnsignedLeb128(&data);
+    tag->has_child_ = (*data == 0);
+    data++;
+    while (true) {
+      uint32_t attr = DecodeUnsignedLeb128(&data);
+      uint32_t form = DecodeUnsignedLeb128(&data);
+      if (attr == 0 && form == 0) {
+        break;
+      } else if (attr == 0 || form == 0) {
+        // Bad abbrev.
+        return nullptr;
+      }
+      int32_t size = FormLength(form);
+      if (size == -1) {
+        return nullptr;
+      }
+      tag->AddAttribute(attr, static_cast<uint32_t>(size));
+    }
+    *data_pointer = data;
+    return tag.release();
+  }
+
+  uint32_t GetSize() const {
+    return size_;
+  }
+
+  bool HasChild() {
+    return has_child_;
+  }
+
+  uint32_t GetTagNumber() {
+    return tag_;
+  }
+
+  // Gets the offset of a particular attribute in this tag structure.
+  // Interpretation of the data is left to the consumer. 0 is returned if the
+  // tag does not contain the attribute.
+  uint32_t GetOffsetOf(uint32_t dwarf_attribute) const {
+    auto it = off_map_.find(dwarf_attribute);
+    if (it == off_map_.end()) {
+      return 0;
+    } else {
+      return it->second;
+    }
+  }
+
+  // Gets the size of attribute
+  uint32_t GetAttrSize(uint32_t dwarf_attribute) const {
+    auto it = size_map_.find(dwarf_attribute);
+    if (it == size_map_.end()) {
+      return 0;
+    } else {
+      return it->second;
+    }
+  }
+
+ private:
+  explicit DebugTag(uint32_t index) : index_(index) {}
+  void AddAttribute(uint32_t type, uint32_t attr_size) {
+    off_map_.insert(std::pair<uint32_t, uint32_t>(type, size_));
+    size_map_.insert(std::pair<uint32_t, uint32_t>(type, attr_size));
+    size_ += attr_size;
+  }
+  std::map<uint32_t, uint32_t> off_map_;
+  std::map<uint32_t, uint32_t> size_map_;
+  uint32_t size_;
+  uint32_t tag_;
+  bool has_child_;
+};
+
+class DebugAbbrev {
+ public:
+  ~DebugAbbrev() {}
+  static DebugAbbrev* Create(const byte* dbg_abbrev, size_t dbg_abbrev_size) {
+    std::unique_ptr<DebugAbbrev> abbrev(new DebugAbbrev);
+    const byte* last = dbg_abbrev + dbg_abbrev_size;
+    while (dbg_abbrev < last) {
+      std::unique_ptr<DebugTag> tag(DebugTag::Create(&dbg_abbrev));
+      if (tag.get() == nullptr) {
+        return nullptr;
+      } else {
+        abbrev->tags_.insert(std::pair<uint32_t, uint32_t>(tag->index_, abbrev->tag_list_.size()));
+        abbrev->tag_list_.push_back(std::move(tag));
+      }
+    }
+    return abbrev.release();
+  }
+
+  DebugTag* ReadTag(const byte* entry) {
+    uint32_t tag_num = DecodeUnsignedLeb128(&entry);
+    auto it = tags_.find(tag_num);
+    if (it == tags_.end()) {
+      return nullptr;
+    } else {
+      CHECK_GT(tag_list_.size(), it->second);
+      return tag_list_.at(it->second).get();
+    }
+  }
+
+ private:
+  DebugAbbrev() {}
+  std::map<uint32_t, uint32_t> tags_;
+  std::vector<std::unique_ptr<DebugTag>> tag_list_;
+};
+
+class DebugInfoIterator {
+ public:
+  static DebugInfoIterator* Create(DebugInfoHeader* header, size_t frame_size,
+                                   DebugAbbrev* abbrev) {
+    std::unique_ptr<DebugInfoIterator> iter(new DebugInfoIterator(header, frame_size, abbrev));
+    if (iter->GetCurrentTag() == nullptr) {
+      return nullptr;
+    } else {
+      return iter.release();
+    }
+  }
+  ~DebugInfoIterator() {}
+
+  // Moves to the next DIE. Returns false if at last entry.
+  // TODO Handle variable length attributes.
+  bool next() {
+    if (current_entry_ == nullptr || current_tag_ == nullptr) {
+      return false;
+    }
+    current_entry_ += current_tag_->GetSize();
+    if (current_entry_ >= last_entry_) {
+      current_entry_ = nullptr;
+      return false;
+    }
+    current_tag_ = abbrev_->ReadTag(current_entry_);
+    if (current_tag_ == nullptr) {
+      current_entry_ = nullptr;
+      return false;
+    } else {
+      return true;
+    }
+  }
+
+  const DebugTag* GetCurrentTag() {
+    return const_cast<DebugTag*>(current_tag_);
+  }
+  byte* GetPointerToField(uint8_t dwarf_field) {
+    if (current_tag_ == nullptr || current_entry_ == nullptr || current_entry_ >= last_entry_) {
+      return nullptr;
+    }
+    uint32_t off = current_tag_->GetOffsetOf(dwarf_field);
+    if (off == 0) {
+      // tag does not have that field.
+      return nullptr;
+    } else {
+      DCHECK_LT(off, current_tag_->GetSize());
+      return current_entry_ + off;
+    }
+  }
+
+ private:
+  DebugInfoIterator(DebugInfoHeader* header, size_t frame_size, DebugAbbrev* abbrev)
+      : abbrev_(abbrev),
+        last_entry_(reinterpret_cast<byte*>(header) + frame_size),
+        current_entry_(reinterpret_cast<byte*>(header) + sizeof(DebugInfoHeader)),
+        current_tag_(abbrev_->ReadTag(current_entry_)) {}
+  DebugAbbrev* abbrev_;
+  byte* last_entry_;
+  byte* current_entry_;
+  DebugTag* current_tag_;
+};
+
+static bool FixupDebugInfo(uint32_t text_start, DebugInfoIterator* iter) {
+  do {
+    if (iter->GetCurrentTag()->GetAttrSize(DW_AT_low_pc) != sizeof(int32_t) ||
+        iter->GetCurrentTag()->GetAttrSize(DW_AT_high_pc) != sizeof(int32_t)) {
+      return false;
+    }
+    uint32_t* PC_low = reinterpret_cast<uint32_t*>(iter->GetPointerToField(DW_AT_low_pc));
+    uint32_t* PC_high = reinterpret_cast<uint32_t*>(iter->GetPointerToField(DW_AT_high_pc));
+    if (PC_low != nullptr && PC_high != nullptr) {
+      *PC_low  += text_start;
+      *PC_high += text_start;
+    }
+  } while (iter->next());
+  return true;
+}
+
+static bool FixupDebugSections(const byte* dbg_abbrev, size_t dbg_abbrev_size,
+                               uintptr_t text_start,
+                               byte* dbg_info, size_t dbg_info_size,
+                               byte* dbg_frame, size_t dbg_frame_size) {
+  std::unique_ptr<DebugAbbrev> abbrev(DebugAbbrev::Create(dbg_abbrev, dbg_abbrev_size));
+  if (abbrev.get() == nullptr) {
+    return false;
+  }
+  std::unique_ptr<DebugInfoIterator> iter(
+      DebugInfoIterator::Create(reinterpret_cast<DebugInfoHeader*>(dbg_info),
+                                dbg_info_size, abbrev.get()));
+  if (iter.get() == nullptr) {
+    return false;
+  }
+  return FixupDebugInfo(text_start, iter.get())
+      && FixupDebugFrame(text_start, dbg_frame, dbg_frame_size);
+}
 
 void ElfFile::GdbJITSupport() {
   // We only get here if we only are mapping the program header.
@@ -1000,18 +1313,25 @@
 
   // Well, we need the whole file to do this.
   std::string error_msg;
-  std::unique_ptr<ElfFile> ptr(Open(const_cast<File*>(file_), false, false, &error_msg));
-  ElfFile& all = *ptr;
-
-  // Do we have interesting sections?
-  // Is this an OAT file with interesting sections?
-  if (all.GetSectionHeaderNum() != kExpectedSectionsInOATFile) {
+  // Make it MAP_PRIVATE so we can just give it to gdb if all the necessary
+  // sections are there.
+  std::unique_ptr<ElfFile> all_ptr(Open(const_cast<File*>(file_), PROT_READ | PROT_WRITE,
+                                        MAP_PRIVATE, &error_msg));
+  if (all_ptr.get() == nullptr) {
     return;
   }
-  if (!check_section_name(all, 8, ".debug_info") ||
-      !check_section_name(all, 9, ".debug_abbrev") ||
-      !check_section_name(all, 10, ".debug_frame") ||
-      !check_section_name(all, 11, ".debug_str")) {
+  ElfFile& all = *all_ptr;
+
+  // Do we have interesting sections?
+  const Elf32_Shdr* debug_info = all.FindSectionByName(".debug_info");
+  const Elf32_Shdr* debug_abbrev = all.FindSectionByName(".debug_abbrev");
+  const Elf32_Shdr* debug_frame = all.FindSectionByName(".debug_frame");
+  const Elf32_Shdr* debug_str = all.FindSectionByName(".debug_str");
+  const Elf32_Shdr* strtab_sec = all.FindSectionByName(".strtab");
+  const Elf32_Shdr* symtab_sec = all.FindSectionByName(".symtab");
+  Elf32_Shdr* text_sec = all.FindSectionByName(".text");
+  if (debug_info == nullptr || debug_abbrev == nullptr || debug_frame == nullptr ||
+      debug_str == nullptr || text_sec == nullptr || strtab_sec == nullptr || symtab_sec == nullptr) {
     return;
   }
 #ifdef __LP64__
@@ -1019,227 +1339,29 @@
     return;  // No ELF debug support in 64bit.
   }
 #endif
-  // This is not needed if we have no .text segment.
-  uint32_t text_start_addr = 0;
-  for (uint32_t i = 0; i < segments_.size(); i++) {
-    if (segments_[i]->GetProtect() & PROT_EXEC) {
-      // We found the .text section.
-      text_start_addr = PointerToLowMemUInt32(segments_[i]->Begin());
-      break;
-    }
-  }
-  if (text_start_addr == 0U) {
-    return;
-  }
-
-  // Okay, we are good enough.  Fake up an ELF image and tell GDB about it.
-  // We need some extra space for the debug and string sections, the ELF header, and the
-  // section header.
-  uint32_t needed_size = KB;
-
-  for (Elf32_Word i = 1; i < all.GetSectionHeaderNum(); i++) {
-    Elf32_Shdr& section_header = all.GetSectionHeader(i);
-    if (section_header.sh_addr == 0 && section_header.sh_type != SHT_DYNSYM) {
-      // Debug section: we need it.
-      needed_size += section_header.sh_size;
-    } else if (section_header.sh_type == SHT_STRTAB &&
-                strcmp(".shstrtab",
-                       all.GetString(SHT_SYMTAB, section_header.sh_name)) == 0) {
-      // We also need the shared string table.
-      needed_size += section_header.sh_size;
-
-      // We also need the extra strings .symtab\0.strtab\0
-      needed_size += 16;
-    }
-  }
-
-  // Start creating our image.
-  jit_elf_image_ = new byte[needed_size];
-
-  // Create the Elf Header by copying the old one
-  Elf32_Ehdr& elf_hdr =
-    *reinterpret_cast<Elf32_Ehdr*>(jit_elf_image_);
-
-  elf_hdr = all.GetHeader();
+  // We need to add in a strtab and symtab to the image.
+  // all is MAP_PRIVATE so it can be written to freely.
+  // We also already have strtab and symtab so we are fine there.
+  Elf32_Ehdr& elf_hdr = all.GetHeader();
   elf_hdr.e_entry = 0;
   elf_hdr.e_phoff = 0;
   elf_hdr.e_phnum = 0;
   elf_hdr.e_phentsize = 0;
   elf_hdr.e_type = ET_EXEC;
 
-  uint32_t offset = sizeof(Elf32_Ehdr);
+  text_sec->sh_type = SHT_NOBITS;
+  text_sec->sh_offset = 0;
 
-  // Copy the debug sections and string table.
-  uint32_t debug_offsets[kExpectedSectionsInOATFile];
-  memset(debug_offsets, '\0', sizeof debug_offsets);
-  Elf32_Shdr *text_header = nullptr;
-  int extra_shstrtab_entries = -1;
-  int text_section_index = -1;
-  int section_index = 1;
-  for (Elf32_Word i = 1; i < kExpectedSectionsInOATFile; i++) {
-    Elf32_Shdr& section_header = all.GetSectionHeader(i);
-    // Round up to multiple of 4, ensuring zero fill.
-    RoundAndClear(jit_elf_image_, offset, 4);
-    if (section_header.sh_addr == 0 && section_header.sh_type != SHT_DYNSYM) {
-      // Debug section: we need it.  Unfortunately, it wasn't mapped in.
-      debug_offsets[i] = offset;
-      // Read it from the file.
-      lseek(file_->Fd(), section_header.sh_offset, SEEK_SET);
-      read(file_->Fd(), jit_elf_image_ + offset, section_header.sh_size);
-      offset += section_header.sh_size;
-      section_index++;
-      offset += 16;
-    } else if (section_header.sh_type == SHT_STRTAB &&
-                strcmp(".shstrtab",
-                       all.GetString(SHT_SYMTAB, section_header.sh_name)) == 0) {
-      // We also need the shared string table.
-      debug_offsets[i] = offset;
-      // Read it from the file.
-      lseek(file_->Fd(), section_header.sh_offset, SEEK_SET);
-      read(file_->Fd(), jit_elf_image_ + offset, section_header.sh_size);
-      offset += section_header.sh_size;
-      // We also need the extra strings .symtab\0.strtab\0
-      extra_shstrtab_entries = section_header.sh_size;
-      memcpy(jit_elf_image_+offset, ".symtab\0.strtab\0", 16);
-      offset += 16;
-      section_index++;
-    } else if (section_header.sh_flags & SHF_EXECINSTR) {
-      DCHECK(strcmp(".text", all.GetString(SHT_SYMTAB,
-                                           section_header.sh_name)) == 0);
-      text_header = &section_header;
-      text_section_index = section_index++;
-    }
-  }
-  DCHECK(text_header != nullptr);
-  DCHECK_NE(extra_shstrtab_entries, -1);
-
-  // We now need to update the addresses for debug_info and debug_frame to get to the
-  // correct offset within the .text section.
-  byte *p = jit_elf_image_+debug_offsets[8];
-  byte *end = p + all.GetSectionHeader(8).sh_size;
-
-  // For debug_info; patch compilation using low_pc @ offset 13, high_pc at offset 17.
-  IncrementUint32(p + 13, text_start_addr);
-  IncrementUint32(p + 17, text_start_addr);
-
-  // Now fix the low_pc, high_pc for each method address.
-  // First method starts at offset 0x15, each subsequent method is 1+3*4 bytes further.
-  for (p += 0x15; p < end; p += 1 /* attr# */ + 3 * sizeof(uint32_t) /* addresses */) {
-    IncrementUint32(p + 1 + sizeof(uint32_t), text_start_addr);
-    IncrementUint32(p + 1 + 2 * sizeof(uint32_t), text_start_addr);
+  if (!FixupDebugSections(
+        all.Begin() + debug_abbrev->sh_offset, debug_abbrev->sh_size, text_sec->sh_addr,
+        all.Begin() + debug_info->sh_offset, debug_info->sh_size,
+        all.Begin() + debug_frame->sh_offset, debug_frame->sh_size)) {
+    LOG(ERROR) << "Failed to load GDB data";
+    return;
   }
 
-  // Now we have to handle the debug_frame method start addresses
-  p = jit_elf_image_+debug_offsets[10];
-  end = p + all.GetSectionHeader(10).sh_size;
-
-  // Skip past the CIE.
-  p += *reinterpret_cast<uint32_t *>(p) + 4;
-
-  // And walk the FDEs.
-  for (; p < end; p += *reinterpret_cast<uint32_t *>(p) + sizeof(uint32_t)) {
-    IncrementUint32(p + 2 * sizeof(uint32_t), text_start_addr);
-  }
-
-  // Create the data for the symbol table.
-  const int kSymbtabAlignment = 16;
-  RoundAndClear(jit_elf_image_, offset, kSymbtabAlignment);
-  uint32_t symtab_offset = offset;
-
-  // First entry is empty.
-  memset(jit_elf_image_+offset, 0, sizeof(Elf32_Sym));
-  offset += sizeof(Elf32_Sym);
-
-  // Symbol 1 is the real .text section.
-  Elf32_Sym& sym_ent = *reinterpret_cast<Elf32_Sym*>(jit_elf_image_+offset);
-  sym_ent.st_name = 1; /* .text */
-  sym_ent.st_value = text_start_addr;
-  sym_ent.st_size = text_header->sh_size;
-  SetBindingAndType(&sym_ent, STB_LOCAL, STT_SECTION);
-  sym_ent.st_other = 0;
-  sym_ent.st_shndx = text_section_index;
-  offset += sizeof(Elf32_Sym);
-
-  // Create the data for the string table.
-  RoundAndClear(jit_elf_image_, offset, kSymbtabAlignment);
-  const int kTextStringSize = 7;
-  uint32_t strtab_offset = offset;
-  memcpy(jit_elf_image_+offset, "\0.text", kTextStringSize);
-  offset += kTextStringSize;
-
-  // Create the section header table.
-  // Round up to multiple of kSymbtabAlignment, ensuring zero fill.
-  RoundAndClear(jit_elf_image_, offset, kSymbtabAlignment);
-  elf_hdr.e_shoff = offset;
-  Elf32_Shdr *sp =
-    reinterpret_cast<Elf32_Shdr *>(jit_elf_image_ + offset);
-
-  // Copy the first empty index.
-  *sp = all.GetSectionHeader(0);
-  BUMP_SHENT(sp);
-
-  elf_hdr.e_shnum = 1;
-  for (Elf32_Word i = 1; i < kExpectedSectionsInOATFile; i++) {
-    Elf32_Shdr& section_header = all.GetSectionHeader(i);
-    if (section_header.sh_addr == 0 && section_header.sh_type != SHT_DYNSYM) {
-      // Debug section: we need it.
-      *sp = section_header;
-      sp->sh_offset = debug_offsets[i];
-      sp->sh_addr = 0;
-      elf_hdr.e_shnum++;
-      BUMP_SHENT(sp);
-    } else if (section_header.sh_type == SHT_STRTAB &&
-                strcmp(".shstrtab",
-                       all.GetString(SHT_SYMTAB, section_header.sh_name)) == 0) {
-      // We also need the shared string table.
-      *sp = section_header;
-      sp->sh_offset = debug_offsets[i];
-      sp->sh_size += 16; /* sizeof ".symtab\0.strtab\0" */
-      sp->sh_addr = 0;
-      elf_hdr.e_shstrndx = elf_hdr.e_shnum;
-      elf_hdr.e_shnum++;
-      BUMP_SHENT(sp);
-    }
-  }
-
-  // Add a .text section for the matching code section.
-  *sp = *text_header;
-  sp->sh_type = SHT_NOBITS;
-  sp->sh_offset = 0;
-  sp->sh_addr = text_start_addr;
-  elf_hdr.e_shnum++;
-  BUMP_SHENT(sp);
-
-  // .symtab section:  Need an empty index and the .text entry
-  sp->sh_name = extra_shstrtab_entries;
-  sp->sh_type = SHT_SYMTAB;
-  sp->sh_flags = 0;
-  sp->sh_addr = 0;
-  sp->sh_offset = symtab_offset;
-  sp->sh_size = 2 * sizeof(Elf32_Sym);
-  sp->sh_link = elf_hdr.e_shnum + 1;  // Link to .strtab section.
-  sp->sh_info = 0;
-  sp->sh_addralign = 16;
-  sp->sh_entsize = sizeof(Elf32_Sym);
-  elf_hdr.e_shnum++;
-  BUMP_SHENT(sp);
-
-  // .strtab section:  Enough for .text\0.
-  sp->sh_name = extra_shstrtab_entries + 8;
-  sp->sh_type = SHT_STRTAB;
-  sp->sh_flags = 0;
-  sp->sh_addr = 0;
-  sp->sh_offset = strtab_offset;
-  sp->sh_size = kTextStringSize;
-  sp->sh_link = 0;
-  sp->sh_info = 0;
-  sp->sh_addralign = 16;
-  sp->sh_entsize = 0;
-  elf_hdr.e_shnum++;
-  BUMP_SHENT(sp);
-
-  // We now have enough information to tell GDB about our file.
-  jit_gdb_entry_ = CreateCodeEntry(jit_elf_image_, offset);
+  jit_gdb_entry_ = CreateCodeEntry(all.Begin(), all.Size());
+  gdb_file_mapping_.reset(all_ptr.release());
 }
 
 }  // namespace art
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index 6650acd..496690b 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -41,6 +41,9 @@
 class ElfFile {
  public:
   static ElfFile* Open(File* file, bool writable, bool program_header_only, std::string* error_msg);
+  // Open with specific mmap flags, Always maps in the whole file, not just the
+  // program header sections.
+  static ElfFile* Open(File* file, int mmap_prot, int mmap_flags, std::string* error_msg);
   ~ElfFile();
 
   // Load segments into memory based on PT_LOAD program headers
@@ -70,17 +73,19 @@
   Elf32_Word GetSectionHeaderNum() const;
   Elf32_Shdr& GetSectionHeader(Elf32_Word) const;
   Elf32_Shdr* FindSectionByType(Elf32_Word type) const;
+  Elf32_Shdr* FindSectionByName(const std::string& name) const;
 
   Elf32_Shdr& GetSectionNameStringSection() const;
 
   // Find .dynsym using .hash for more efficient lookup than FindSymbolAddress.
   const byte* FindDynamicSymbolAddress(const std::string& symbol_name) const;
+  const Elf32_Sym* FindDynamicSymbol(const std::string& symbol_name) const;
 
   static bool IsSymbolSectionType(Elf32_Word section_type);
   Elf32_Word GetSymbolNum(Elf32_Shdr&) const;
   Elf32_Sym& GetSymbol(Elf32_Word section_type, Elf32_Word i) const;
 
-  // Find symbol in specified table, returning NULL if it is not found.
+  // Find symbol in specified table, returning nullptr if it is not found.
   //
   // If build_map is true, builds a map to speed repeated access. The
   // map does not included untyped symbol values (aka STT_NOTYPE)
@@ -98,11 +103,11 @@
                                const std::string& symbol_name,
                                bool build_map);
 
-  // Lookup a string given string section and offset. Returns NULL for
+  // Lookup a string given string section and offset. Returns nullptr for
   // special 0 offset.
   const char* GetString(Elf32_Shdr&, Elf32_Word) const;
 
-  // Lookup a string by section type. Returns NULL for special 0 offset.
+  // Lookup a string by section type. Returns nullptr for special 0 offset.
   const char* GetString(Elf32_Word section_type, Elf32_Word) const;
 
   Elf32_Word GetDynamicNum() const;
@@ -125,7 +130,7 @@
  private:
   ElfFile(File* file, bool writable, bool program_header_only);
 
-  bool Setup(std::string* error_msg);
+  bool Setup(int prot, int flags, std::string* error_msg);
 
   bool SetMap(MemMap* map, std::string* error_msg);
 
@@ -181,9 +186,8 @@
   // Support for GDB JIT
   byte* jit_elf_image_;
   JITCodeEntry* jit_gdb_entry_;
+  std::unique_ptr<ElfFile> gdb_file_mapping_;
   void GdbJITSupport();
-  // Is this an OAT file with debug information in it?
-  static constexpr uint32_t kExpectedSectionsInOATFile = 12;
 };
 
 }  // namespace art
diff --git a/runtime/elf_utils.h b/runtime/elf_utils.h
index ce8587b..2c50047 100644
--- a/runtime/elf_utils.h
+++ b/runtime/elf_utils.h
@@ -19,8 +19,8 @@
 
 #include <sys/cdefs.h>
 
-// Explicitly include elf.h from elfutils to avoid Linux and other dependencies.
-#include "../../external/elfutils/0.153/libelf/elf.h"
+// Explicitly include our own elf.h to avoid Linux and other dependencies.
+#include "./elf.h"
 
 // Architecture dependent flags for the ELF header.
 #define EF_ARM_EABI_VER5 0x05000000
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 3301254..dde74de 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -154,10 +154,12 @@
 }
 
 // Generate the entrypoint functions.
+#if !defined(__APPLE__) || !defined(__LP64__)
 GENERATE_ENTRYPOINTS(_dlmalloc);
 GENERATE_ENTRYPOINTS(_rosalloc);
 GENERATE_ENTRYPOINTS(_bump_pointer);
 GENERATE_ENTRYPOINTS(_tlab);
+#endif
 
 static bool entry_points_instrumented = false;
 static gc::AllocatorType entry_points_allocator = gc::kAllocatorTypeDlMalloc;
@@ -172,6 +174,7 @@
 
 void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
   switch (entry_points_allocator) {
+#if !defined(__APPLE__) || !defined(__LP64__)
     case gc::kAllocatorTypeDlMalloc: {
       SetQuickAllocEntryPoints_dlmalloc(qpoints, entry_points_instrumented);
       break;
@@ -190,6 +193,7 @@
       SetQuickAllocEntryPoints_tlab(qpoints, entry_points_instrumented);
       break;
     }
+#endif
     default: {
       LOG(FATAL) << "Unimplemented";
     }
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 469d373..032f6be 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -129,7 +129,6 @@
   void (*pInvokeVirtualTrampolineWithAccessCheck)(uint32_t, void*);
 
   // Thread
-  void (*pCheckSuspend)(Thread*);  // Stub that is called when the suspend count is non-zero
   void (*pTestSuspend)();  // Stub that is periodically called to test the suspend count
 
   // Throws
diff --git a/runtime/entrypoints/quick/quick_thread_entrypoints.cc b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
index f61c754..5c48fc7 100644
--- a/runtime/entrypoints/quick/quick_thread_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
@@ -21,13 +21,6 @@
 
 namespace art {
 
-void CheckSuspendFromCode(Thread* thread)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  // Called when thread->suspend_count_ != 0 on JNI return. JNI method acts as callee-save frame.
-  thread->VerifyStack();
-  CheckSuspend(thread);
-}
-
 extern "C" void artTestSuspendFromCode(Thread* thread, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when suspend count check value is 0 and thread->suspend_count_ != 0
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 7a144b6..6fb9624 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -185,8 +185,8 @@
       case 3: return (5 * GetBytesPerGprSpillLocation(kRuntimeISA));
       case 4: return (6 * GetBytesPerGprSpillLocation(kRuntimeISA));
       default:
-        LOG(FATAL) << "Unexpected GPR index: " << gpr_index;
-        return 0;
+      LOG(FATAL) << "Unexpected GPR index: " << gpr_index;
+      return 0;
     }
   }
 #else
@@ -209,16 +209,15 @@
     return *reinterpret_cast<uintptr_t*>(lr);
   }
 
-  QuickArgumentVisitor(StackReference<mirror::ArtMethod>* sp, bool is_static,
-                       const char* shorty, uint32_t shorty_len)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
-      is_static_(is_static), shorty_(shorty), shorty_len_(shorty_len),
-      gpr_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset),
-      fpr_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset),
-      stack_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize
-                  + StackArgumentStartFromShorty(is_static, shorty, shorty_len)),
-      gpr_index_(0), fpr_index_(0), stack_index_(0), cur_type_(Primitive::kPrimVoid),
-      is_split_long_or_double_(false) { }
+  QuickArgumentVisitor(StackReference<mirror::ArtMethod>* sp, bool is_static, const char* shorty,
+                       uint32_t shorty_len) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
+          is_static_(is_static), shorty_(shorty), shorty_len_(shorty_len),
+          gpr_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset),
+          fpr_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset),
+          stack_args_(reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize
+                      + StackArgumentStartFromShorty(is_static, shorty, shorty_len)),
+          gpr_index_(0), fpr_index_(0), stack_index_(0), cur_type_(Primitive::kPrimVoid),
+          is_split_long_or_double_(false) {}
 
   virtual ~QuickArgumentVisitor() {}
 
@@ -388,9 +387,12 @@
     }
   }
 
+ protected:
   const bool is_static_;
   const char* const shorty_;
   const uint32_t shorty_len_;
+
+ private:
   byte* const gpr_args_;  // Address of GPR arguments in callee save frame.
   byte* const fpr_args_;  // Address of FPR arguments in callee save frame.
   byte* const stack_args_;  // Address of stack arguments in caller's frame.
@@ -409,7 +411,7 @@
   BuildQuickShadowFrameVisitor(StackReference<mirror::ArtMethod>* sp, bool is_static,
                                const char* shorty, uint32_t shorty_len, ShadowFrame* sf,
                                size_t first_arg_reg) :
-    QuickArgumentVisitor(sp, is_static, shorty, shorty_len), sf_(sf), cur_reg_(first_arg_reg) {}
+      QuickArgumentVisitor(sp, is_static, shorty, shorty_len), sf_(sf), cur_reg_(first_arg_reg) {}
 
   void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
 
@@ -420,7 +422,7 @@
   DISALLOW_COPY_AND_ASSIGN(BuildQuickShadowFrameVisitor);
 };
 
-void BuildQuickShadowFrameVisitor::Visit()  {
+void BuildQuickShadowFrameVisitor::Visit() {
   Primitive::Type type = GetParamPrimitiveType();
   switch (type) {
     case Primitive::kPrimLong:  // Fall-through.
@@ -465,13 +467,14 @@
     return 0;
   } else {
     DCHECK(!method->IsNative()) << PrettyMethod(method);
-    const char* old_cause = self->StartAssertNoThreadSuspension("Building interpreter shadow frame");
+    const char* old_cause = self->StartAssertNoThreadSuspension(
+        "Building interpreter shadow frame");
     const DexFile::CodeItem* code_item = method->GetCodeItem();
     DCHECK(code_item != nullptr) << PrettyMethod(method);
     uint16_t num_regs = code_item->registers_size_;
     void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
-    ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, NULL,  // No last shadow coming from quick.
-                                                  method, 0, memory));
+    // No last shadow coming from quick.
+    ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, nullptr, method, 0, memory));
     size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_;
     uint32_t shorty_len = 0;
     const char* shorty = method->GetShorty(&shorty_len);
@@ -512,7 +515,7 @@
   BuildQuickArgumentVisitor(StackReference<mirror::ArtMethod>* sp, bool is_static,
                             const char* shorty, uint32_t shorty_len,
                             ScopedObjectAccessUnchecked* soa, std::vector<jvalue>* args) :
-    QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa), args_(args) {}
+      QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa), args_(args) {}
 
   void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
 
@@ -584,7 +587,8 @@
   const char* old_cause =
       self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments");
   // Register the top of the managed stack, making stack crawlable.
-  DCHECK_EQ(sp->AsMirrorPtr(), proxy_method) << PrettyMethod(proxy_method);
+  DCHECK_EQ(sp->AsMirrorPtr(), proxy_method)
+  << PrettyMethod(proxy_method);
   self->SetTopOfStack(sp, 0);
   DCHECK_EQ(proxy_method->GetFrameSizeInBytes(),
             Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes())
@@ -600,7 +604,7 @@
   // Placing arguments into args vector and remove the receiver.
   mirror::ArtMethod* non_proxy_method = proxy_method->GetInterfaceMethodIfProxy();
   CHECK(!non_proxy_method->IsStatic()) << PrettyMethod(proxy_method) << " "
-      << PrettyMethod(non_proxy_method);
+                                       << PrettyMethod(non_proxy_method);
   std::vector<jvalue> args;
   uint32_t shorty_len = 0;
   const char* shorty = proxy_method->GetShorty(&shorty_len);
@@ -632,7 +636,7 @@
   RememberForGcArgumentVisitor(StackReference<mirror::ArtMethod>* sp, bool is_static,
                                const char* shorty, uint32_t shorty_len,
                                ScopedObjectAccessUnchecked* soa) :
-    QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa) {}
+      QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa) {}
 
   void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
 
@@ -641,7 +645,8 @@
  private:
   ScopedObjectAccessUnchecked* const soa_;
   // References which we must update when exiting in case the GC moved the objects.
-  std::vector<std::pair<jobject, StackReference<mirror::Object>*>> references_;
+  std::vector<std::pair<jobject, StackReference<mirror::Object>*> > references_;
+
   DISALLOW_COPY_AND_ASSIGN(RememberForGcArgumentVisitor);
 };
 
@@ -663,7 +668,6 @@
   }
 }
 
-
 // Lazily resolve a method for quick. Called by stub code.
 extern "C" const void* artQuickResolutionTrampoline(mirror::ArtMethod* called,
                                                     mirror::Object* receiver,
@@ -740,7 +744,6 @@
         is_range = false;
     }
     dex_method_idx = (is_range) ? instr->VRegB_3rc() : instr->VRegB_35c();
-
   } else {
     invoke_type = kStatic;
     dex_file = called->GetDexFile();
@@ -825,8 +828,6 @@
   return code;
 }
 
-
-
 /*
  * This class uses a couple of observations to unite the different calling conventions through
  * a few constants.
@@ -867,7 +868,7 @@
  *                                          entry in the HandleScope (nullptr if necessary).
  *
  */
-template <class T> class BuildGenericJniFrameStateMachine {
+template<class T> class BuildNativeCallFrameStateMachine {
  public:
 #if defined(__arm__)
   // TODO: These are all dummy values!
@@ -912,7 +913,7 @@
 
   static constexpr size_t kRegistersNeededForLong = 2;
   static constexpr size_t kRegistersNeededForDouble = 2;
-  static constexpr bool kMultiRegistersAligned = false;       // x86 not using regs, anyways
+  static constexpr bool kMultiRegistersAligned = false;  // x86 not using regs, anyways
   static constexpr bool kMultiRegistersWidened = false;
   static constexpr bool kAlignLongOnStack = false;
   static constexpr bool kAlignDoubleOnStack = false;
@@ -932,34 +933,34 @@
 #endif
 
  public:
-  explicit BuildGenericJniFrameStateMachine(T* delegate) : gpr_index_(kNumNativeGprArgs),
-                                                           fpr_index_(kNumNativeFprArgs),
-                                                           stack_entries_(0),
-                                                           delegate_(delegate) {
+  explicit BuildNativeCallFrameStateMachine(T* delegate)
+      : gpr_index_(kNumNativeGprArgs),
+        fpr_index_(kNumNativeFprArgs),
+        stack_entries_(0),
+        delegate_(delegate) {
     // For register alignment, we want to assume that counters (gpr_index_, fpr_index_) are even iff
     // the next register is even; counting down is just to make the compiler happy...
     CHECK_EQ(kNumNativeGprArgs % 2, 0U);
     CHECK_EQ(kNumNativeFprArgs % 2, 0U);
   }
 
-  virtual ~BuildGenericJniFrameStateMachine() {}
+  virtual ~BuildNativeCallFrameStateMachine() {}
 
   bool HavePointerGpr() {
     return gpr_index_ > 0;
   }
 
-  void AdvancePointer(void* val) {
+  void AdvancePointer(const void* val) {
     if (HavePointerGpr()) {
       gpr_index_--;
       PushGpr(reinterpret_cast<uintptr_t>(val));
     } else {
-      stack_entries_++;         // TODO: have a field for pointer length as multiple of 32b
+      stack_entries_++;  // TODO: have a field for pointer length as multiple of 32b
       PushStack(reinterpret_cast<uintptr_t>(val));
       gpr_index_ = 0;
     }
   }
 
-
   bool HaveHandleScopeGpr() {
     return gpr_index_ > 0;
   }
@@ -976,7 +977,6 @@
     }
   }
 
-
   bool HaveIntGpr() {
     return gpr_index_ > 0;
   }
@@ -992,7 +992,6 @@
     }
   }
 
-
   bool HaveLongGpr() {
     return gpr_index_ >= kRegistersNeededForLong + (LongGprNeedsPadding() ? 1 : 0);
   }
@@ -1039,30 +1038,22 @@
     }
   }
 
-
   bool HaveFloatFpr() {
     return fpr_index_ > 0;
   }
 
-  template <typename U, typename V> V convert(U in) {
-    CHECK_LE(sizeof(U), sizeof(V));
-    union { U u; V v; } tmp;
-    tmp.u = in;
-    return tmp.v;
-  }
-
   void AdvanceFloat(float val) {
     if (kNativeSoftFloatAbi) {
-      AdvanceInt(convert<float, uint32_t>(val));
+      AdvanceInt(bit_cast<float, uint32_t>(val));
     } else {
       if (HaveFloatFpr()) {
         fpr_index_--;
         if (kRegistersNeededForDouble == 1) {
           if (kMultiRegistersWidened) {
-            PushFpr8(convert<double, uint64_t>(val));
+            PushFpr8(bit_cast<double, uint64_t>(val));
           } else {
             // No widening, just use the bits.
-            PushFpr8(convert<float, uint64_t>(val));
+            PushFpr8(bit_cast<float, uint64_t>(val));
           }
         } else {
           PushFpr4(val);
@@ -1071,16 +1062,17 @@
         stack_entries_++;
         if (kRegistersNeededForDouble == 1 && kMultiRegistersWidened) {
           // Need to widen before storing: Note the "double" in the template instantiation.
-          PushStack(convert<double, uintptr_t>(val));
+          // Note: We need to jump through those hoops to make the compiler happy.
+          DCHECK_EQ(sizeof(uintptr_t), sizeof(uint64_t));
+          PushStack(static_cast<uintptr_t>(bit_cast<double, uint64_t>(val)));
         } else {
-          PushStack(convert<float, uintptr_t>(val));
+          PushStack(bit_cast<float, uintptr_t>(val));
         }
         fpr_index_ = 0;
       }
     }
   }
 
-
   bool HaveDoubleFpr() {
     return fpr_index_ >= kRegistersNeededForDouble + (DoubleFprNeedsPadding() ? 1 : 0);
   }
@@ -1162,101 +1154,66 @@
   T* delegate_;             // What Push implementation gets called
 };
 
-class ComputeGenericJniFrameSize FINAL {
+// Computes the sizes of register stacks and call stack area. Handling of references can be extended
+// in subclasses.
+//
+// To handle native pointers, use "L" in the shorty for an object reference, which simulates
+// them with handles.
+class ComputeNativeCallFrameSize {
  public:
-  ComputeGenericJniFrameSize() : num_handle_scope_references_(0), num_stack_entries_(0) {}
+  ComputeNativeCallFrameSize() : num_stack_entries_(0) {}
+
+  virtual ~ComputeNativeCallFrameSize() {}
 
   uint32_t GetStackSize() {
     return num_stack_entries_ * sizeof(uintptr_t);
   }
 
-  // WARNING: After this, *sp won't be pointing to the method anymore!
-  void ComputeLayout(StackReference<mirror::ArtMethod>** m, bool is_static, const char* shorty,
-                     uint32_t shorty_len, void* sp, HandleScope** table,
-                     uint32_t* handle_scope_entries, uintptr_t** start_stack, uintptr_t** start_gpr,
-                     uint32_t** start_fpr, void** code_return, size_t* overall_size)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ComputeAll(is_static, shorty, shorty_len);
-
-    mirror::ArtMethod* method = (*m)->AsMirrorPtr();
-
-    uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp);
-
-    // First, fix up the layout of the callee-save frame.
-    // We have to squeeze in the HandleScope, and relocate the method pointer.
-
-    // "Free" the slot for the method.
-    sp8 += kPointerSize;  // In the callee-save frame we use a full pointer.
-
-    // Under the callee saves put handle scope and new method stack reference.
-    *handle_scope_entries = num_handle_scope_references_;
-
-    size_t handle_scope_size = HandleScope::SizeOf(num_handle_scope_references_);
-    size_t scope_and_method = handle_scope_size + sizeof(StackReference<mirror::ArtMethod>);
-
-    sp8 -= scope_and_method;
-    // Align by kStackAlignment.
-    sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
-
-    uint8_t* sp8_table = sp8 + sizeof(StackReference<mirror::ArtMethod>);
-    *table = reinterpret_cast<HandleScope*>(sp8_table);
-    (*table)->SetNumberOfReferences(num_handle_scope_references_);
-
-    // Add a slot for the method pointer, and fill it. Fix the pointer-pointer given to us.
-    uint8_t* method_pointer = sp8;
-    StackReference<mirror::ArtMethod>* new_method_ref =
-        reinterpret_cast<StackReference<mirror::ArtMethod>*>(method_pointer);
-    new_method_ref->Assign(method);
-    *m = new_method_ref;
-
-    // Reference cookie and padding
-    sp8 -= 8;
-    // Store HandleScope size
-    *reinterpret_cast<uint32_t*>(sp8) = static_cast<uint32_t>(handle_scope_size & 0xFFFFFFFF);
-
-    // Next comes the native call stack.
+  uint8_t* LayoutCallStack(uint8_t* sp8) {
     sp8 -= GetStackSize();
     // Align by kStackAlignment.
     sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
-    *start_stack = reinterpret_cast<uintptr_t*>(sp8);
-
-    // put fprs and gprs below
-    // Assumption is OK right now, as we have soft-float arm
-    size_t fregs = BuildGenericJniFrameStateMachine<ComputeGenericJniFrameSize>::kNumNativeFprArgs;
-    sp8 -= fregs * sizeof(uintptr_t);
-    *start_fpr = reinterpret_cast<uint32_t*>(sp8);
-    size_t iregs = BuildGenericJniFrameStateMachine<ComputeGenericJniFrameSize>::kNumNativeGprArgs;
-    sp8 -= iregs * sizeof(uintptr_t);
-    *start_gpr = reinterpret_cast<uintptr_t*>(sp8);
-
-    // reserve space for the code pointer
-    sp8 -= kPointerSize;
-    *code_return = reinterpret_cast<void*>(sp8);
-
-    *overall_size = reinterpret_cast<uint8_t*>(sp) - sp8;
-
-    // The new SP is stored at the end of the alloca, so it can be immediately popped
-    sp8 = reinterpret_cast<uint8_t*>(sp) - 5 * KB;
-    *(reinterpret_cast<uint8_t**>(sp8)) = method_pointer;
+    return sp8;
   }
 
-  void ComputeHandleScopeOffset() { }  // nothing to do, static right now
+  uint8_t* LayoutCallRegisterStacks(uint8_t* sp8, uintptr_t** start_gpr, uint32_t** start_fpr) {
+    // Assumption is OK right now, as we have soft-float arm
+    size_t fregs = BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>::kNumNativeFprArgs;
+    sp8 -= fregs * sizeof(uintptr_t);
+    *start_fpr = reinterpret_cast<uint32_t*>(sp8);
+    size_t iregs = BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>::kNumNativeGprArgs;
+    sp8 -= iregs * sizeof(uintptr_t);
+    *start_gpr = reinterpret_cast<uintptr_t*>(sp8);
+    return sp8;
+  }
 
-  void ComputeAll(bool is_static, const char* shorty, uint32_t shorty_len)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    BuildGenericJniFrameStateMachine<ComputeGenericJniFrameSize> sm(this);
+  uint8_t* LayoutNativeCall(uint8_t* sp8, uintptr_t** start_stack, uintptr_t** start_gpr,
+                            uint32_t** start_fpr) {
+    // Native call stack.
+    sp8 = LayoutCallStack(sp8);
+    *start_stack = reinterpret_cast<uintptr_t*>(sp8);
 
-    // JNIEnv
-    sm.AdvancePointer(nullptr);
+    // Put fprs and gprs below.
+    sp8 = LayoutCallRegisterStacks(sp8, start_gpr, start_fpr);
 
-    // Class object or this as first argument
-    sm.AdvanceHandleScope(reinterpret_cast<mirror::Object*>(0x12345678));
+    // Return the new bottom.
+    return sp8;
+  }
+
+  virtual void WalkHeader(BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>* sm)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {}
+
+  void Walk(const char* shorty, uint32_t shorty_len) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize> sm(this);
+
+    WalkHeader(&sm);
 
     for (uint32_t i = 1; i < shorty_len; ++i) {
       Primitive::Type cur_type_ = Primitive::GetType(shorty[i]);
       switch (cur_type_) {
         case Primitive::kPrimNot:
-          sm.AdvanceHandleScope(reinterpret_cast<mirror::Object*>(0x12345678));
+          sm.AdvanceHandleScope(
+              reinterpret_cast<mirror::Object*>(0x12345678));
           break;
 
         case Primitive::kPrimBoolean:
@@ -1299,50 +1256,135 @@
     // counting is already done in the superclass
   }
 
-  uintptr_t PushHandle(mirror::Object* /* ptr */) {
-    num_handle_scope_references_++;
+  virtual uintptr_t PushHandle(mirror::Object* /* ptr */) {
     return reinterpret_cast<uintptr_t>(nullptr);
   }
 
- private:
-  uint32_t num_handle_scope_references_;
+ protected:
   uint32_t num_stack_entries_;
 };
 
-// Visits arguments on the stack placing them into a region lower down the stack for the benefit
-// of transitioning into native code.
-class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor {
+class ComputeGenericJniFrameSize FINAL : public ComputeNativeCallFrameSize {
  public:
-  BuildGenericJniFrameVisitor(StackReference<mirror::ArtMethod>** sp, bool is_static,
-                              const char* shorty, uint32_t shorty_len, Thread* self) :
-      QuickArgumentVisitor(*sp, is_static, shorty, shorty_len), sm_(this) {
-    ComputeGenericJniFrameSize fsc;
-    fsc.ComputeLayout(sp, is_static, shorty, shorty_len, *sp, &handle_scope_, &handle_scope_expected_refs_,
-                      &cur_stack_arg_, &cur_gpr_reg_, &cur_fpr_reg_, &code_return_,
-                      &alloca_used_size_);
-    handle_scope_number_of_references_ = 0;
-    cur_hs_entry_ = GetFirstHandleScopeEntry();
+  ComputeGenericJniFrameSize() : num_handle_scope_references_(0) {}
 
-    // jni environment is always first argument
-    sm_.AdvancePointer(self->GetJniEnv());
+  // Lays out the callee-save frame. Assumes that the incorrect frame corresponding to RefsAndArgs
+  // is at *m = sp. Will update to point to the bottom of the save frame.
+  //
+  // Note: assumes ComputeAll() has been run before.
+  void LayoutCalleeSaveFrame(StackReference<mirror::ArtMethod>** m, void* sp, HandleScope** table,
+                             uint32_t* handle_scope_entries)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* method = (*m)->AsMirrorPtr();
 
-    if (is_static) {
-      sm_.AdvanceHandleScope((*sp)->AsMirrorPtr()->GetDeclaringClass());
-    }
+    uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp);
+
+    // First, fix up the layout of the callee-save frame.
+    // We have to squeeze in the HandleScope, and relocate the method pointer.
+
+    // "Free" the slot for the method.
+    sp8 += kPointerSize;  // In the callee-save frame we use a full pointer.
+
+    // Under the callee saves put handle scope and new method stack reference.
+    *handle_scope_entries = num_handle_scope_references_;
+
+    size_t handle_scope_size = HandleScope::SizeOf(num_handle_scope_references_);
+    size_t scope_and_method = handle_scope_size + sizeof(StackReference<mirror::ArtMethod>);
+
+    sp8 -= scope_and_method;
+    // Align by kStackAlignment.
+    sp8 = reinterpret_cast<uint8_t*>(RoundDown(
+        reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
+
+    uint8_t* sp8_table = sp8 + sizeof(StackReference<mirror::ArtMethod>);
+    *table = reinterpret_cast<HandleScope*>(sp8_table);
+    (*table)->SetNumberOfReferences(num_handle_scope_references_);
+
+    // Add a slot for the method pointer, and fill it. Fix the pointer-pointer given to us.
+    uint8_t* method_pointer = sp8;
+    StackReference<mirror::ArtMethod>* new_method_ref =
+        reinterpret_cast<StackReference<mirror::ArtMethod>*>(method_pointer);
+    new_method_ref->Assign(method);
+    *m = new_method_ref;
   }
 
-  void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
-
-  void FinalizeHandleScope(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  StackReference<mirror::Object>* GetFirstHandleScopeEntry()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return handle_scope_->GetHandle(0).GetReference();
+  // Adds space for the cookie. Note: may leave stack unaligned.
+  void LayoutCookie(uint8_t** sp) {
+    // Reference cookie and padding
+    *sp -= 8;
   }
 
-  jobject GetFirstHandleScopeJObject()
+  // Re-layout the callee-save frame (insert a handle-scope). Then add space for the cookie.
+  // Returns the new bottom. Note: this may be unaligned.
+  uint8_t* LayoutJNISaveFrame(StackReference<mirror::ArtMethod>** m, void* sp, HandleScope** table,
+                              uint32_t* handle_scope_entries)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return handle_scope_->GetHandle(0).ToJObject();
+    // First, fix up the layout of the callee-save frame.
+    // We have to squeeze in the HandleScope, and relocate the method pointer.
+    LayoutCalleeSaveFrame(m, sp, table, handle_scope_entries);
+
+    // The bottom of the callee-save frame is now where the method is, *m.
+    uint8_t* sp8 = reinterpret_cast<uint8_t*>(*m);
+
+    // Add space for cookie.
+    LayoutCookie(&sp8);
+
+    return sp8;
+  }
+
+  // WARNING: After this, *sp won't be pointing to the method anymore!
+  uint8_t* ComputeLayout(StackReference<mirror::ArtMethod>** m, bool is_static, const char* shorty,
+                         uint32_t shorty_len, HandleScope** table, uint32_t* handle_scope_entries,
+                         uintptr_t** start_stack, uintptr_t** start_gpr, uint32_t** start_fpr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Walk(shorty, shorty_len);
+
+    // JNI part.
+    uint8_t* sp8 = LayoutJNISaveFrame(m, reinterpret_cast<void*>(*m), table, handle_scope_entries);
+
+    sp8 = LayoutNativeCall(sp8, start_stack, start_gpr, start_fpr);
+
+    // Return the new bottom.
+    return sp8;
+  }
+
+  uintptr_t PushHandle(mirror::Object* /* ptr */) OVERRIDE;
+
+  // Add JNIEnv* and jobj/jclass before the shorty-derived elements.
+  void WalkHeader(BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>* sm) OVERRIDE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ private:
+  uint32_t num_handle_scope_references_;
+};
+
+uintptr_t ComputeGenericJniFrameSize::PushHandle(mirror::Object* /* ptr */) {
+  num_handle_scope_references_++;
+  return reinterpret_cast<uintptr_t>(nullptr);
+}
+
+void ComputeGenericJniFrameSize::WalkHeader(
+    BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>* sm) {
+  // JNIEnv
+  sm->AdvancePointer(nullptr);
+
+  // Class object or this as first argument
+  sm->AdvanceHandleScope(reinterpret_cast<mirror::Object*>(0x12345678));
+}
+
+// Class to push values to three separate regions. Used to fill the native call part. Adheres to
+// the template requirements of BuildGenericJniFrameStateMachine.
+class FillNativeCall {
+ public:
+  FillNativeCall(uintptr_t* gpr_regs, uint32_t* fpr_regs, uintptr_t* stack_args) :
+      cur_gpr_reg_(gpr_regs), cur_fpr_reg_(fpr_regs), cur_stack_arg_(stack_args) {}
+
+  virtual ~FillNativeCall() {}
+
+  void Reset(uintptr_t* gpr_regs, uint32_t* fpr_regs, uintptr_t* stack_args) {
+    cur_gpr_reg_ = gpr_regs;
+    cur_fpr_reg_ = fpr_regs;
+    cur_stack_arg_ = stack_args;
   }
 
   void PushGpr(uintptr_t val) {
@@ -1366,46 +1408,110 @@
     cur_stack_arg_++;
   }
 
-  uintptr_t PushHandle(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uintptr_t tmp;
-    if (ref == nullptr) {
-      *cur_hs_entry_ = StackReference<mirror::Object>();
-      tmp = reinterpret_cast<uintptr_t>(nullptr);
-    } else {
-      *cur_hs_entry_ = StackReference<mirror::Object>::FromMirrorPtr(ref);
-      tmp = reinterpret_cast<uintptr_t>(cur_hs_entry_);
-    }
-    cur_hs_entry_++;
-    handle_scope_number_of_references_++;
-    return tmp;
-  }
-
-  // Size of the part of the alloca that we actually need.
-  size_t GetAllocaUsedSize() {
-    return alloca_used_size_;
-  }
-
-  void* GetCodeReturn() {
-    return code_return_;
+  virtual uintptr_t PushHandle(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    LOG(FATAL) << "(Non-JNI) Native call does not use handles.";
+    return 0U;
   }
 
  private:
-  uint32_t handle_scope_number_of_references_;
-  StackReference<mirror::Object>* cur_hs_entry_;
-  HandleScope* handle_scope_;
-  uint32_t handle_scope_expected_refs_;
   uintptr_t* cur_gpr_reg_;
   uint32_t* cur_fpr_reg_;
   uintptr_t* cur_stack_arg_;
-  // StackReference<mirror::Object>* top_of_handle_scope_;
-  void* code_return_;
-  size_t alloca_used_size_;
+};
 
-  BuildGenericJniFrameStateMachine<BuildGenericJniFrameVisitor> sm_;
+// Visits arguments on the stack placing them into a region lower down the stack for the benefit
+// of transitioning into native code.
+class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor {
+ public:
+  BuildGenericJniFrameVisitor(StackReference<mirror::ArtMethod>** sp, bool is_static,
+                              const char* shorty, uint32_t shorty_len, Thread* self)
+     : QuickArgumentVisitor(*sp, is_static, shorty, shorty_len),
+       jni_call_(nullptr, nullptr, nullptr, nullptr), sm_(&jni_call_) {
+    ComputeGenericJniFrameSize fsc;
+    uintptr_t* start_gpr_reg;
+    uint32_t* start_fpr_reg;
+    uintptr_t* start_stack_arg;
+    uint32_t handle_scope_entries;
+    bottom_of_used_area_ = fsc.ComputeLayout(sp, is_static, shorty, shorty_len, &handle_scope_,
+                                             &handle_scope_entries, &start_stack_arg,
+                                             &start_gpr_reg, &start_fpr_reg);
+
+    handle_scope_->SetNumberOfReferences(handle_scope_entries);
+    jni_call_.Reset(start_gpr_reg, start_fpr_reg, start_stack_arg, handle_scope_);
+
+    // jni environment is always first argument
+    sm_.AdvancePointer(self->GetJniEnv());
+
+    if (is_static) {
+      sm_.AdvanceHandleScope((*sp)->AsMirrorPtr()->GetDeclaringClass());
+    }
+  }
+
+  void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
+
+  void FinalizeHandleScope(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  StackReference<mirror::Object>* GetFirstHandleScopeEntry()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return handle_scope_->GetHandle(0).GetReference();
+  }
+
+  jobject GetFirstHandleScopeJObject() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return handle_scope_->GetHandle(0).ToJObject();
+  }
+
+  void* GetBottomOfUsedArea() {
+    return bottom_of_used_area_;
+  }
+
+ private:
+  // A class to fill a JNI call. Adds reference/handle-scope management to FillNativeCall.
+  class FillJniCall FINAL : public FillNativeCall {
+   public:
+    FillJniCall(uintptr_t* gpr_regs, uint32_t* fpr_regs, uintptr_t* stack_args,
+                HandleScope* handle_scope) : FillNativeCall(gpr_regs, fpr_regs, stack_args),
+                                             handle_scope_(handle_scope), cur_entry_(0) {}
+
+    uintptr_t PushHandle(mirror::Object* ref) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+    void Reset(uintptr_t* gpr_regs, uint32_t* fpr_regs, uintptr_t* stack_args, HandleScope* scope) {
+      FillNativeCall::Reset(gpr_regs, fpr_regs, stack_args);
+      handle_scope_ = scope;
+      cur_entry_ = 0U;
+    }
+
+    void ResetRemainingScopeSlots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+      // Initialize padding entries.
+      size_t expected_slots = handle_scope_->NumberOfReferences();
+      while (cur_entry_ < expected_slots) {
+        handle_scope_->GetHandle(cur_entry_++).Assign(nullptr);
+      }
+      DCHECK_NE(cur_entry_, 0U);
+    }
+
+   private:
+    HandleScope* handle_scope_;
+    size_t cur_entry_;
+  };
+
+  HandleScope* handle_scope_;
+  FillJniCall jni_call_;
+  void* bottom_of_used_area_;
+
+  BuildNativeCallFrameStateMachine<FillJniCall> sm_;
 
   DISALLOW_COPY_AND_ASSIGN(BuildGenericJniFrameVisitor);
 };
 
+uintptr_t BuildGenericJniFrameVisitor::FillJniCall::PushHandle(mirror::Object* ref) {
+  uintptr_t tmp;
+  Handle<mirror::Object> h = handle_scope_->GetHandle(cur_entry_);
+  h.Assign(ref);
+  tmp = reinterpret_cast<uintptr_t>(h.ToJObject());
+  cur_entry_++;
+  return tmp;
+}
+
 void BuildGenericJniFrameVisitor::Visit() {
   Primitive::Type type = GetParamPrimitiveType();
   switch (type) {
@@ -1453,14 +1559,8 @@
 }
 
 void BuildGenericJniFrameVisitor::FinalizeHandleScope(Thread* self) {
-  // Initialize padding entries.
-  while (handle_scope_number_of_references_ < handle_scope_expected_refs_) {
-    *cur_hs_entry_ = StackReference<mirror::Object>();
-    cur_hs_entry_++;
-    handle_scope_number_of_references_++;
-  }
-  handle_scope_->SetNumberOfReferences(handle_scope_expected_refs_);
-  DCHECK_NE(handle_scope_expected_refs_, 0U);
+  // Clear out rest of the scope.
+  jni_call_.ResetRemainingScopeSlots();
   // Install HandleScope.
   self->PushHandleScope(handle_scope_);
 }
@@ -1495,19 +1595,20 @@
  * 1) How many bytes of the alloca can be released, if the value is non-negative.
  * 2) An error, if the value is negative.
  */
-extern "C" ssize_t artQuickGenericJniTrampoline(Thread* self, StackReference<mirror::ArtMethod>* sp)
+extern "C" TwoWordReturn artQuickGenericJniTrampoline(Thread* self,
+                                                      StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* called = sp->AsMirrorPtr();
   DCHECK(called->IsNative()) << PrettyMethod(called, true);
-
-  // run the visitor
   uint32_t shorty_len = 0;
   const char* shorty = called->GetShorty(&shorty_len);
+
+  // Run the visitor.
   BuildGenericJniFrameVisitor visitor(&sp, called->IsStatic(), shorty, shorty_len, self);
   visitor.VisitArguments();
   visitor.FinalizeHandleScope(self);
 
-  // fix up managed-stack things in Thread
+  // Fix up managed-stack things in Thread.
   self->SetTopOfStack(sp, 0);
 
   self->VerifyStack();
@@ -1519,7 +1620,7 @@
     if (self->IsExceptionPending()) {
       self->PopHandleScope();
       // A negative value denotes an error.
-      return -1;
+      return GetTwoWordFailureValue();
     }
   } else {
     cookie = JniMethodStart(self);
@@ -1550,36 +1651,31 @@
         artQuickGenericJniEndJNINonRef(self, cookie, lock);
       }
 
-      return -1;
+      return GetTwoWordFailureValue();
     }
     // Note that the native code pointer will be automatically set by artFindNativeMethod().
   }
 
-  // Store the native code pointer in the stack at the right location.
-  uintptr_t* code_pointer = reinterpret_cast<uintptr_t*>(visitor.GetCodeReturn());
-  *code_pointer = reinterpret_cast<uintptr_t>(nativeCode);
-
-  // 5K reserved, window_size + frame pointer used.
-  size_t window_size = visitor.GetAllocaUsedSize();
-  return (5 * KB) - window_size - kPointerSize;
+  // Return native code addr(lo) and bottom of alloca address(hi).
+  return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(visitor.GetBottomOfUsedArea()),
+                                reinterpret_cast<uintptr_t>(nativeCode));
 }
 
 /*
  * Is called after the native JNI code. Responsible for cleanup (handle scope, saved state) and
  * unlocking.
  */
-extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self,
-                                                    StackReference<mirror::ArtMethod>* sp,
-                                                    jvalue result, uint64_t result_f)
+extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, jvalue result, uint64_t result_f)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  StackReference<mirror::ArtMethod>* sp = self->GetManagedStack()->GetTopQuickFrame();
   uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp);
   mirror::ArtMethod* called = sp->AsMirrorPtr();
   uint32_t cookie = *(sp32 - 1);
 
   jobject lock = nullptr;
   if (called->IsSynchronized()) {
-    HandleScope* table = reinterpret_cast<HandleScope*>(
-        reinterpret_cast<uint8_t*>(sp) + sizeof(StackReference<mirror::ArtMethod>));
+    HandleScope* table = reinterpret_cast<HandleScope*>(reinterpret_cast<uint8_t*>(sp)
+        + sizeof(StackReference<mirror::ArtMethod>));
     lock = table->GetHandle(0).ToJObject();
   }
 
@@ -1636,8 +1732,7 @@
     FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
     const DexFile* dex_file = caller_method->GetDeclaringClass()->GetDexCache()->GetDexFile();
     uint32_t shorty_len;
-    const char* shorty =
-        dex_file->GetMethodShorty(dex_file->GetMethodId(method_idx), &shorty_len);
+    const char* shorty = dex_file->GetMethodShorty(dex_file->GetMethodId(method_idx), &shorty_len);
     {
       // Remember the args in case a GC happens in FindMethodFromCode.
       ScopedObjectAccessUnchecked soa(self->GetJniEnv());
@@ -1657,8 +1752,9 @@
   const void* code = method->GetEntryPointFromQuickCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
-  DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
-      << method->GetDexFile()->GetLocation();
+  DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method)
+                          << " location: "
+                          << method->GetDexFile()->GetLocation();
 
   return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(code),
                                 reinterpret_cast<uintptr_t>(method));
@@ -1685,47 +1781,50 @@
 EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kSuper, true);
 #undef EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL
 
-
 // See comments in runtime_support_asm.S
-extern "C" TwoWordReturn artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
-    mirror::Object* this_object,
-    mirror::ArtMethod* caller_method,
-    Thread* self,
-    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return artInvokeCommon<kInterface, true>(method_idx, this_object, caller_method, self, sp);
+extern "C" TwoWordReturn artInvokeInterfaceTrampolineWithAccessCheck(
+    uint32_t method_idx, mirror::Object* this_object,
+    mirror::ArtMethod* caller_method, Thread* self,
+    StackReference<mirror::ArtMethod>* sp)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return artInvokeCommon<kInterface, true>(method_idx, this_object,
+                                           caller_method, self, sp);
 }
 
-
-extern "C" TwoWordReturn artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
-    mirror::Object* this_object,
-    mirror::ArtMethod* caller_method,
-    Thread* self,
-    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return artInvokeCommon<kDirect, true>(method_idx, this_object, caller_method, self, sp);
+extern "C" TwoWordReturn artInvokeDirectTrampolineWithAccessCheck(
+    uint32_t method_idx, mirror::Object* this_object,
+    mirror::ArtMethod* caller_method, Thread* self,
+    StackReference<mirror::ArtMethod>* sp)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return artInvokeCommon<kDirect, true>(method_idx, this_object, caller_method,
+                                        self, sp);
 }
 
-extern "C" TwoWordReturn artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
-    mirror::Object* this_object,
-    mirror::ArtMethod* caller_method,
-    Thread* self,
-    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return artInvokeCommon<kStatic, true>(method_idx, this_object, caller_method, self, sp);
+extern "C" TwoWordReturn artInvokeStaticTrampolineWithAccessCheck(
+    uint32_t method_idx, mirror::Object* this_object,
+    mirror::ArtMethod* caller_method, Thread* self,
+    StackReference<mirror::ArtMethod>* sp)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return artInvokeCommon<kStatic, true>(method_idx, this_object, caller_method,
+                                        self, sp);
 }
 
-extern "C" TwoWordReturn artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
-    mirror::Object* this_object,
-    mirror::ArtMethod* caller_method,
-    Thread* self,
-    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return artInvokeCommon<kSuper, true>(method_idx, this_object, caller_method, self, sp);
+extern "C" TwoWordReturn artInvokeSuperTrampolineWithAccessCheck(
+    uint32_t method_idx, mirror::Object* this_object,
+    mirror::ArtMethod* caller_method, Thread* self,
+    StackReference<mirror::ArtMethod>* sp)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return artInvokeCommon<kSuper, true>(method_idx, this_object, caller_method,
+                                       self, sp);
 }
 
-extern "C" TwoWordReturn artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
-    mirror::Object* this_object,
-    mirror::ArtMethod* caller_method,
-    Thread* self,
-    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return artInvokeCommon<kVirtual, true>(method_idx, this_object, caller_method, self, sp);
+extern "C" TwoWordReturn artInvokeVirtualTrampolineWithAccessCheck(
+    uint32_t method_idx, mirror::Object* this_object,
+    mirror::ArtMethod* caller_method, Thread* self,
+    StackReference<mirror::ArtMethod>* sp)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return artInvokeCommon<kVirtual, true>(method_idx, this_object, caller_method,
+                                         self, sp);
 }
 
 // Determine target of interface dispatch. This object is known non-null.
@@ -1769,10 +1868,11 @@
       dex_method_idx = instr->VRegB_3rc();
     }
 
-    const DexFile* dex_file = caller_method->GetDeclaringClass()->GetDexCache()->GetDexFile();
+    const DexFile* dex_file = caller_method->GetDeclaringClass()->GetDexCache()
+        ->GetDexFile();
     uint32_t shorty_len;
-    const char* shorty =
-        dex_file->GetMethodShorty(dex_file->GetMethodId(dex_method_idx), &shorty_len);
+    const char* shorty = dex_file->GetMethodShorty(dex_file->GetMethodId(dex_method_idx),
+                                                   &shorty_len);
     {
       // Remember the args in case a GC happens in FindMethodFromCode.
       ScopedObjectAccessUnchecked soa(self->GetJniEnv());
@@ -1791,8 +1891,8 @@
   const void* code = method->GetEntryPointFromQuickCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
-  DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method) << " location: "
-      << method->GetDexFile()->GetLocation();
+  DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method)
+                          << " location: " << method->GetDexFile()->GetLocation();
 
   return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(code),
                                 reinterpret_cast<uintptr_t>(method));
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 0dd33cf..c572baf 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -251,8 +251,7 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInvokeSuperTrampolineWithAccessCheck,
                          pInvokeVirtualTrampolineWithAccessCheck, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInvokeVirtualTrampolineWithAccessCheck,
-                         pCheckSuspend, kPointerSize);
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckSuspend, pTestSuspend, kPointerSize);
+                         pTestSuspend, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pTestSuspend, pDeliverException, kPointerSize);
 
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pDeliverException, pThrowArrayBounds, kPointerSize);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index a34cd38..6d70a38 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -417,7 +417,10 @@
 
   // Implements java.lang.Runtime.freeMemory.
   size_t GetFreeMemory() const {
-    return max_allowed_footprint_ - num_bytes_allocated_.LoadSequentiallyConsistent();
+    size_t byte_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
+    // Make sure we don't get a negative number since the max allowed footprint is only updated
+    // after the GC. But we can still allocate even if bytes_allocated > max_allowed_footprint_.
+    return std::max(max_allowed_footprint_, byte_allocated) - byte_allocated;
   }
 
   // get the space that corresponds to an object's address. Current implementation searches all
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 96eeb8d..6e10a4c 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -33,7 +33,8 @@
   kThumb2,
   kX86,
   kX86_64,
-  kMips
+  kMips,
+  kMips64
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index d637a94..29d3c8a 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -115,12 +115,13 @@
   // Delete all the JNI references.
   JNIEnv* env = self->GetJniEnv();
   for (const auto& pair : object_to_entry_) {
-    const ObjectRegistryEntry& entry = *pair.second;
-    if (entry.jni_reference_type == JNIWeakGlobalRefType) {
-      env->DeleteWeakGlobalRef(entry.jni_reference);
+    const ObjectRegistryEntry* entry = pair.second;
+    if (entry->jni_reference_type == JNIWeakGlobalRefType) {
+      env->DeleteWeakGlobalRef(entry->jni_reference);
     } else {
-      env->DeleteGlobalRef(entry.jni_reference);
+      env->DeleteGlobalRef(entry->jni_reference);
     }
+    delete entry;
   }
   // Clear the maps.
   object_to_entry_.clear();
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 8842f59..845691d 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -112,6 +112,17 @@
                                  kind, c->GetDescriptor().c_str(), name, sig);
 }
 
+static void ReportInvalidJNINativeMethod(const ScopedObjectAccess& soa, mirror::Class* c,
+                                         const char* kind, jint idx, bool return_errors)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  LOG(return_errors ? ERROR : FATAL) << "Failed to register native method in "
+      << PrettyDescriptor(c) << " in " << c->GetDexCache()->GetLocation()->ToModifiedUtf8()
+      << ": " << kind << " is null at index " << idx;
+  ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
+  soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchMethodError;",
+                                 "%s is null at index %d", kind, idx);
+}
+
 static mirror::Class* EnsureInitialized(Thread* self, mirror::Class* klass)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (LIKELY(klass->IsInitialized())) {
@@ -592,7 +603,12 @@
     mirror::ArtMethod* m = soa.DecodeMethod(mid);
     CHECK(!kMovingMethods);
     jobject art_method = soa.AddLocalReference<jobject>(m);
-    jobject reflect_method = env->AllocObject(WellKnownClasses::java_lang_reflect_Method);
+    jobject reflect_method;
+    if (m->IsConstructor()) {
+      reflect_method = env->AllocObject(WellKnownClasses::java_lang_reflect_Constructor);
+    } else {
+      reflect_method = env->AllocObject(WellKnownClasses::java_lang_reflect_Method);
+    }
     if (env->ExceptionCheck()) {
       return nullptr;
     }
@@ -679,6 +695,11 @@
   static void ExceptionDescribe(JNIEnv* env) {
     ScopedObjectAccess soa(env);
 
+    // If we have no exception to describe, pass through.
+    if (!soa.Self()->GetException(nullptr)) {
+      return;
+    }
+
     StackHandleScope<3> hs(soa.Self());
     // TODO: Use nullptr instead of null handles?
     auto old_throw_this_object(hs.NewHandle<mirror::Object>(nullptr));
@@ -2347,6 +2368,17 @@
     for (jint i = 0; i < method_count; ++i) {
       const char* name = methods[i].name;
       const char* sig = methods[i].signature;
+      const void* fnPtr = methods[i].fnPtr;
+      if (UNLIKELY(name == nullptr)) {
+        ReportInvalidJNINativeMethod(soa, c, "method name", i, return_errors);
+        return JNI_ERR;
+      } else if (UNLIKELY(sig == nullptr)) {
+        ReportInvalidJNINativeMethod(soa, c, "method signature", i, return_errors);
+        return JNI_ERR;
+      } else if (UNLIKELY(fnPtr == nullptr)) {
+        ReportInvalidJNINativeMethod(soa, c, "native function", i, return_errors);
+        return JNI_ERR;
+      }
       bool is_fast = false;
       if (*sig == '!') {
         is_fast = true;
@@ -2374,7 +2406,7 @@
 
       VLOG(jni) << "[Registering JNI native method " << PrettyMethod(m) << "]";
 
-      m->RegisterNative(soa.Self(), methods[i].fnPtr, is_fast);
+      m->RegisterNative(soa.Self(), fnPtr, is_fast);
     }
     return JNI_OK;
   }
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index a933f86..8ef1cb6 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -380,19 +380,39 @@
 
 TEST_F(JniInternalTest, FromReflectedMethod_ToReflectedMethod) {
   jclass jlrMethod = env_->FindClass("java/lang/reflect/Method");
+  ASSERT_NE(jlrMethod, nullptr);
+  jclass jlrConstructor = env_->FindClass("java/lang/reflect/Constructor");
+  ASSERT_NE(jlrConstructor, nullptr);
   jclass c = env_->FindClass("java/lang/String");
   ASSERT_NE(c, nullptr);
-  jmethodID mid = env_->GetMethodID(c, "length", "()I");
+
+  jmethodID mid = env_->GetMethodID(c, "<init>", "()V");
   ASSERT_NE(mid, nullptr);
-  // Turn the mid into a java.lang.reflect.Method...
+  // Turn the mid into a java.lang.reflect.Constructor...
   jobject method = env_->ToReflectedMethod(c, mid, JNI_FALSE);
-  ASSERT_NE(c, nullptr);
-  ASSERT_TRUE(env_->IsInstanceOf(method, jlrMethod));
+  ASSERT_NE(method, nullptr);
+  ASSERT_TRUE(env_->IsInstanceOf(method, jlrConstructor));
   // ...and back again.
   jmethodID mid2 = env_->FromReflectedMethod(method);
   ASSERT_NE(mid2, nullptr);
   // Make sure we can actually use it.
-  jstring s = env_->NewStringUTF("poop");
+  jstring s = reinterpret_cast<jstring>(env_->AllocObject(c));
+  ASSERT_NE(s, nullptr);
+  env_->CallVoidMethod(s, mid2);
+  ASSERT_EQ(JNI_FALSE, env_->ExceptionCheck());
+
+  mid = env_->GetMethodID(c, "length", "()I");
+  ASSERT_NE(mid, nullptr);
+  // Turn the mid into a java.lang.reflect.Method...
+  method = env_->ToReflectedMethod(c, mid, JNI_FALSE);
+  ASSERT_NE(method, nullptr);
+  ASSERT_TRUE(env_->IsInstanceOf(method, jlrMethod));
+  // ...and back again.
+  mid2 = env_->FromReflectedMethod(method);
+  ASSERT_NE(mid2, nullptr);
+  // Make sure we can actually use it.
+  s = env_->NewStringUTF("poop");
+  ASSERT_NE(s, nullptr);
   ASSERT_EQ(4, env_->CallIntMethod(s, mid2));
 
   // Bad arguments.
@@ -412,27 +432,49 @@
 TEST_F(JniInternalTest, RegisterAndUnregisterNatives) {
   jclass jlobject = env_->FindClass("java/lang/Object");
   jclass jlnsme = env_->FindClass("java/lang/NoSuchMethodError");
+  void* native_function = reinterpret_cast<void*>(BogusMethod);
 
   // Sanity check that no exceptions are pending.
   ASSERT_FALSE(env_->ExceptionCheck());
 
+  // Check that registering method without name causes a NoSuchMethodError.
+  {
+    JNINativeMethod methods[] = { { nullptr, "()V", native_function } };
+    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
+  }
+  ExpectException(jlnsme);
+
+  // Check that registering method without signature causes a NoSuchMethodError.
+  {
+    JNINativeMethod methods[] = { { "notify", nullptr, native_function } };
+    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
+  }
+  ExpectException(jlnsme);
+
+  // Check that registering method without function causes a NoSuchMethodError.
+  {
+    JNINativeMethod methods[] = { { "notify", "()V", nullptr } };
+    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
+  }
+  ExpectException(jlnsme);
+
   // Check that registering to a non-existent java.lang.Object.foo() causes a NoSuchMethodError.
   {
-    JNINativeMethod methods[] = { { "foo", "()V", nullptr } };
+    JNINativeMethod methods[] = { { "foo", "()V", native_function } };
     EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
   }
   ExpectException(jlnsme);
 
   // Check that registering non-native methods causes a NoSuchMethodError.
   {
-    JNINativeMethod methods[] = { { "equals", "(Ljava/lang/Object;)Z", nullptr } };
+    JNINativeMethod methods[] = { { "equals", "(Ljava/lang/Object;)Z", native_function } };
     EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
   }
   ExpectException(jlnsme);
 
   // Check that registering native methods is successful.
   {
-    JNINativeMethod methods[] = { { "notify", "()V", reinterpret_cast<void*>(BogusMethod) } };
+    JNINativeMethod methods[] = { { "notify", "()V", native_function } };
     EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_OK);
   }
   EXPECT_FALSE(env_->ExceptionCheck());
@@ -1452,6 +1494,12 @@
   env_->DeleteWeakGlobalRef(o2);
 }
 
+TEST_F(JniInternalTest, ExceptionDescribe) {
+  // This checks how ExceptionDescribe handles call without exception.
+  env_->ExceptionClear();
+  env_->ExceptionDescribe();
+}
+
 TEST_F(JniInternalTest, Throw) {
   EXPECT_EQ(JNI_ERR, env_->Throw(nullptr));
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 8d987df..1074253 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -72,7 +72,7 @@
 
 std::multimap<void*, MemMap*> MemMap::maps_;
 
-#if defined(__LP64__) && !defined(__x86_64__)
+#if USE_ART_LOW_4G_ALLOCATOR
 // Handling mem_map in 32b address range for 64b architectures that do not support MAP_32BIT.
 
 // The regular start of memory allocations. The first 64KB is protected by SELinux.
@@ -235,7 +235,7 @@
   // A page allocator would be a useful abstraction here, as
   // 1) It is doubtful that MAP_32BIT on x86_64 is doing the right job for us
   // 2) The linear scheme, even with simple saving of the last known position, is very crude
-#if defined(__LP64__) && !defined(__x86_64__)
+#if USE_ART_LOW_4G_ALLOCATOR
   // MAP_32BIT only available on x86_64.
   void* actual = MAP_FAILED;
   if (low_4gb && expected == nullptr) {
@@ -299,7 +299,7 @@
   }
 
 #else
-#ifdef __x86_64__
+#if defined(__LP64__)
   if (low_4gb && expected == nullptr) {
     flags |= MAP_32BIT;
   }
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index e42251c..defa6a5 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -30,6 +30,12 @@
 
 namespace art {
 
+#if defined(__LP64__) && (!defined(__x86_64__) || defined(__APPLE__))
+#define USE_ART_LOW_4G_ALLOCATOR 1
+#else
+#define USE_ART_LOW_4G_ALLOCATOR 0
+#endif
+
 #ifdef __linux__
 static constexpr bool kMadviseZeroes = true;
 #else
@@ -147,8 +153,8 @@
   size_t base_size_;  // Length of mapping. May be changed by RemapAtEnd (ie Zygote).
   int prot_;  // Protection of the map.
 
-#if defined(__LP64__) && !defined(__x86_64__)
-  static uintptr_t next_mem_pos_;   // next memory location to check for low_4g extent
+#if USE_ART_LOW_4G_ALLOCATOR
+  static uintptr_t next_mem_pos_;   // Next memory location to check for low_4g extent.
 #endif
 
   // All the non-empty MemMaps. Use a multimap as we do a reserve-and-divide (eg ElfMap::Load()).
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 65799cd..43bdf49 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -168,8 +168,7 @@
 
 template<typename T>
 inline PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) {
-  DCHECK(array_class_ != NULL);
-  Array* raw_array = Array::Alloc<true>(self, array_class_, length, sizeof(T),
+  Array* raw_array = Array::Alloc<true>(self, GetArrayClass(), length, sizeof(T),
                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
   return down_cast<PrimitiveArray<T>*>(raw_array);
 }
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 64e2317..25a4535 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -20,6 +20,7 @@
 #include "gc/allocator_type.h"
 #include "object.h"
 #include "object_callbacks.h"
+#include "read_barrier.h"
 
 namespace art {
 
@@ -160,9 +161,10 @@
     array_class_ = array_class;
   }
 
-  static Class* GetArrayClass() {
+  static Class* GetArrayClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(array_class_ != nullptr);
-    return array_class_;
+    return ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(
+        &array_class_);
   }
 
   static void ResetArrayClass() {
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 4858613..502cec7 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -23,6 +23,7 @@
 #include "modifiers.h"
 #include "object.h"
 #include "object_callbacks.h"
+#include "read_barrier.h"
 
 namespace art {
 
@@ -121,9 +122,11 @@
   template<bool kTransactionActive>
   void SetObj(Object* object, Object* new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static Class* GetJavaLangReflectArtField() {
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  static Class* GetJavaLangReflectArtField()  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(java_lang_reflect_ArtField_ != nullptr);
-    return java_lang_reflect_ArtField_;
+    return ReadBarrier::BarrierForRoot<mirror::Class, kReadBarrierOption>(
+        &java_lang_reflect_ArtField_);
   }
 
   static void SetClass(Class* java_lang_reflect_ArtField);
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 1c21b81..a55c48b 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -24,6 +24,7 @@
 #include "object.h"
 #include "object_callbacks.h"
 #include "quick/quick_method_frame_info.h"
+#include "read_barrier.h"
 
 namespace art {
 
@@ -409,9 +410,11 @@
 
   static void SetClass(Class* java_lang_reflect_ArtMethod);
 
-  static Class* GetJavaLangReflectArtMethod() {
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  static Class* GetJavaLangReflectArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(java_lang_reflect_ArtMethod_ != nullptr);
-    return java_lang_reflect_ArtMethod_;
+    return ReadBarrier::BarrierForRoot<mirror::Class, kReadBarrierOption>(
+        &java_lang_reflect_ArtMethod_);
   }
 
   static void ResetClass();
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 6205f70..451235c 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -511,12 +511,14 @@
   VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
 }
 
+template<ReadBarrierOption kReadBarrierOption>
 inline bool Class::IsArtFieldClass() const {
-  return this == ArtField::GetJavaLangReflectArtField();
+  return this == ArtField::GetJavaLangReflectArtField<kReadBarrierOption>();
 }
 
+template<ReadBarrierOption kReadBarrierOption>
 inline bool Class::IsArtMethodClass() const {
-  return this == ArtMethod::GetJavaLangReflectArtMethod();
+  return this == ArtMethod::GetJavaLangReflectArtMethod<kReadBarrierOption>();
 }
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index c798180..c6472c6 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -40,7 +40,9 @@
 Class* Class::java_lang_Class_ = nullptr;
 
 void Class::SetClassClass(Class* java_lang_Class) {
-  CHECK(java_lang_Class_ == nullptr) << java_lang_Class_ << " " << java_lang_Class;
+  CHECK(java_lang_Class_ == nullptr)
+      << ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(&java_lang_Class_)
+      << " " << java_lang_Class;
   CHECK(java_lang_Class != nullptr);
   java_lang_Class_ = java_lang_Class;
 }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index c83f411..e735c45 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -24,6 +24,7 @@
 #include "object.h"
 #include "object_callbacks.h"
 #include "primitive.h"
+#include "read_barrier.h"
 
 /*
  * A magic value for refOffsets. Ignore the bits and walk the super
@@ -376,8 +377,10 @@
 
   bool IsThrowableClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtFieldClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtMethodClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static MemberOffset ComponentTypeOffset() {
@@ -845,13 +848,14 @@
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx);
   }
 
-  static Class* GetJavaLangClass() {
+  static Class* GetJavaLangClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(java_lang_Class_ != NULL);
-    return java_lang_Class_;
+    return ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(
+        &java_lang_Class_);
   }
 
   // Can't call this SetClass or else gets called instead of Object::SetClass in places.
-  static void SetClassClass(Class* java_lang_Class);
+  static void SetClassClass(Class* java_lang_Class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void ResetClass();
   static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 15ecd3c..62c1162 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -218,7 +218,8 @@
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArtField() {
-  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsArtFieldClass();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template IsArtFieldClass<kReadBarrierOption>();
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -229,7 +230,8 @@
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArtMethod() {
-  return GetClass<kVerifyFlags, kReadBarrierOption>()->IsArtMethodClass();
+  return GetClass<kVerifyFlags, kReadBarrierOption>()->
+      template IsArtMethodClass<kReadBarrierOption>();
 }
 
 template<VerifyObjectFlags kVerifyFlags>
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index e094e8b..abecbc5 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -19,6 +19,7 @@
 
 #include "object.h"
 #include "object_callbacks.h"
+#include "read_barrier.h"
 
 namespace art {
 
@@ -55,9 +56,10 @@
   static void ResetClass();
   static void VisitRoots(RootCallback* callback, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static Class* GetStackTraceElement() {
+  static Class* GetStackTraceElement() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(java_lang_StackTraceElement_ != NULL);
-    return java_lang_StackTraceElement_;
+    return ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(
+        &java_lang_StackTraceElement_);
   }
 
  private:
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 6c3015f..b8acede 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -21,6 +21,7 @@
 
 #include "class.h"
 #include "object_callbacks.h"
+#include "read_barrier.h"
 
 namespace art {
 
@@ -102,9 +103,10 @@
 
   int32_t CompareTo(String* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static Class* GetJavaLangString() {
+  static Class* GetJavaLangString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(java_lang_String_ != NULL);
-    return java_lang_String_;
+    return ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(
+        &java_lang_String_);
   }
 
   static void SetClass(Class* java_lang_String);
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index c4127e0..cf54ad6 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -19,6 +19,7 @@
 
 #include "object.h"
 #include "object_callbacks.h"
+#include "read_barrier.h"
 #include "string.h"
 
 namespace art {
@@ -45,9 +46,10 @@
   void SetStackState(Object* state) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsCheckedException() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static Class* GetJavaLangThrowable() {
+  static Class* GetJavaLangThrowable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(java_lang_Throwable_ != NULL);
-    return java_lang_Throwable_;
+    return ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(
+        &java_lang_Throwable_);
   }
 
   static void SetClass(Class* java_lang_Throwable);
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 9512a5a..440d3d0 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -26,6 +26,7 @@
 #include <unistd.h>
 
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
@@ -106,34 +107,19 @@
     return 0;
   }
 
-  uint32_t dex_location_checksum;
-  uint32_t* dex_location_checksum_pointer = &dex_location_checksum;
-  std::vector<std::string> error_msgs;
-  std::string error_msg;
-  if (!DexFile::GetChecksum(sourceName.c_str(), dex_location_checksum_pointer, &error_msg)) {
-    dex_location_checksum_pointer = NULL;
-  }
-
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
-  const DexFile* dex_file;
-  if (outputName.c_str() == nullptr) {
-    // FindOrCreateOatFileForDexLocation can tolerate a missing dex_location_checksum
-    dex_file = linker->FindDexFileInOatFileFromDexLocation(sourceName.c_str(),
-                                                           dex_location_checksum_pointer,
-                                                           kRuntimeISA,
-                                                           &error_msgs);
+  std::unique_ptr<std::vector<const DexFile*>> dex_files(new std::vector<const DexFile*>());
+  std::vector<std::string> error_msgs;
+
+  bool success = linker->OpenDexFilesFromOat(sourceName.c_str(), outputName.c_str(), &error_msgs,
+                                             dex_files.get());
+
+  if (success) {
+    return static_cast<jlong>(reinterpret_cast<uintptr_t>(dex_files.release()));
   } else {
-    // FindOrCreateOatFileForDexLocation requires the dex_location_checksum
-    if (dex_location_checksum_pointer == NULL) {
-      ScopedObjectAccess soa(env);
-      DCHECK(!error_msg.empty());
-      ThrowIOException("%s", error_msg.c_str());
-      return 0;
-    }
-    dex_file = linker->FindOrCreateOatFileForDexLocation(sourceName.c_str(), dex_location_checksum,
-                                                         outputName.c_str(), &error_msgs);
-  }
-  if (dex_file == nullptr) {
+    // The vector should be empty after a failed loading attempt.
+    DCHECK_EQ(0U, dex_files->size());
+
     ScopedObjectAccess soa(env);
     CHECK(!error_msgs.empty());
     // The most important message is at the end. So set up nesting by going forward, which will
@@ -146,35 +132,41 @@
 
     return 0;
   }
-  return static_cast<jlong>(reinterpret_cast<uintptr_t>(dex_file));
 }
 
-static const DexFile* toDexFile(jlong dex_file_address, JNIEnv* env) {
-  const DexFile* dex_file = reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(dex_file_address));
-  if (UNLIKELY(dex_file == nullptr)) {
+static std::vector<const DexFile*>* toDexFiles(jlong dex_file_address, JNIEnv* env) {
+  std::vector<const DexFile*>* dex_files = reinterpret_cast<std::vector<const DexFile*>*>(
+      static_cast<uintptr_t>(dex_file_address));
+  if (UNLIKELY(dex_files == nullptr)) {
     ScopedObjectAccess soa(env);
     ThrowNullPointerException(NULL, "dex_file == null");
   }
-  return dex_file;
+  return dex_files;
 }
 
 static void DexFile_closeDexFile(JNIEnv* env, jclass, jlong cookie) {
-  const DexFile* dex_file;
-  dex_file = toDexFile(cookie, env);
-  if (dex_file == nullptr) {
+  std::unique_ptr<std::vector<const DexFile*>> dex_files(toDexFiles(cookie, env));
+  if (dex_files.get() == nullptr) {
     return;
   }
   ScopedObjectAccess soa(env);
-  if (Runtime::Current()->GetClassLinker()->IsDexFileRegistered(*dex_file)) {
-    return;
+
+  size_t index = 0;
+  for (const DexFile* dex_file : *dex_files) {
+    if (Runtime::Current()->GetClassLinker()->IsDexFileRegistered(*dex_file)) {
+      (*dex_files)[index] = nullptr;
+    }
+    index++;
   }
-  delete dex_file;
+
+  STLDeleteElements(dex_files.get());
+  // Unique_ptr will delete the vector itself.
 }
 
 static jclass DexFile_defineClassNative(JNIEnv* env, jclass, jstring javaName, jobject javaLoader,
                                         jlong cookie) {
-  const DexFile* dex_file = toDexFile(cookie, env);
-  if (dex_file == NULL) {
+  std::vector<const DexFile*>* dex_files = toDexFiles(cookie, env);
+  if (dex_files == NULL) {
     VLOG(class_linker) << "Failed to find dex_file";
     return NULL;
   }
@@ -184,33 +176,60 @@
     return NULL;
   }
   const std::string descriptor(DotToDescriptor(class_name.c_str()));
-  const DexFile::ClassDef* dex_class_def = dex_file->FindClassDef(descriptor.c_str());
-  if (dex_class_def == NULL) {
-    VLOG(class_linker) << "Failed to find dex_class_def";
-    return NULL;
+
+  for (const DexFile* dex_file : *dex_files) {
+    const DexFile::ClassDef* dex_class_def = dex_file->FindClassDef(descriptor.c_str());
+    if (dex_class_def != nullptr) {
+      ScopedObjectAccess soa(env);
+      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+      class_linker->RegisterDexFile(*dex_file);
+      StackHandleScope<1> hs(soa.Self());
+      Handle<mirror::ClassLoader> class_loader(
+          hs.NewHandle(soa.Decode<mirror::ClassLoader*>(javaLoader)));
+      mirror::Class* result = class_linker->DefineClass(descriptor.c_str(), class_loader, *dex_file,
+                                                        *dex_class_def);
+      if (result != nullptr) {
+        VLOG(class_linker) << "DexFile_defineClassNative returning " << result;
+        return soa.AddLocalReference<jclass>(result);
+      }
+    }
   }
-  ScopedObjectAccess soa(env);
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  class_linker->RegisterDexFile(*dex_file);
-  StackHandleScope<1> hs(soa.Self());
-  Handle<mirror::ClassLoader> class_loader(
-      hs.NewHandle(soa.Decode<mirror::ClassLoader*>(javaLoader)));
-  mirror::Class* result = class_linker->DefineClass(descriptor.c_str(), class_loader, *dex_file,
-                                                    *dex_class_def);
-  VLOG(class_linker) << "DexFile_defineClassNative returning " << result;
-  return soa.AddLocalReference<jclass>(result);
+  VLOG(class_linker) << "Failed to find dex_class_def";
+  return nullptr;
 }
 
+// Needed as a compare functor for sets of const char
+struct CharPointerComparator {
+  bool operator()(const char *str1, const char *str2) const {
+    return strcmp(str1, str2) < 0;
+  }
+};
+
+// Note: this can be an expensive call, as we sort out duplicates in MultiDex files.
 static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jlong cookie) {
   jobjectArray result = nullptr;
-  const DexFile* dex_file = toDexFile(cookie, env);
-  if (dex_file != nullptr) {
-    result = env->NewObjectArray(dex_file->NumClassDefs(), WellKnownClasses::java_lang_String,
-                                 nullptr);
-    if (result != nullptr) {
+  std::vector<const DexFile*>* dex_files = toDexFiles(cookie, env);
+
+  if (dex_files != nullptr) {
+    // Push all class descriptors into a set. Use set instead of unordered_set as we want to
+    // retrieve all in the end.
+    std::set<const char*, CharPointerComparator> descriptors;
+    for (const DexFile* dex_file : *dex_files) {
       for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) {
         const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
-        std::string descriptor(DescriptorToDot(dex_file->GetClassDescriptor(class_def)));
+        const char* descriptor = dex_file->GetClassDescriptor(class_def);
+        descriptors.insert(descriptor);
+      }
+    }
+
+    // Now create output array and copy the set into it.
+    result = env->NewObjectArray(descriptors.size(), WellKnownClasses::java_lang_String, nullptr);
+    if (result != nullptr) {
+      auto it = descriptors.begin();
+      auto it_end = descriptors.end();
+      jsize i = 0;
+      for (; it != it_end; it++, ++i) {
+        std::string descriptor(DescriptorToDot(*it));
         ScopedLocalRef<jstring> jdescriptor(env, env->NewStringUTF(descriptor.c_str()));
         if (jdescriptor.get() == nullptr) {
           return nullptr;
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index 6bbe642..ee99e78 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -147,23 +147,73 @@
   dstObjArray->AssignableCheckingMemcpy(dstPos, srcObjArray, srcPos, count, true);
 }
 
-static void System_arraycopyCharUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
-                                          jobject javaDst, jint dstPos, jint count) {
+// Template to convert general array to that of its specific primitive type.
+template <typename T>
+inline T* AsPrimitiveArray(mirror::Array* array) {
+  return down_cast<T*>(array);
+}
+
+template <typename T, Primitive::Type kPrimType>
+inline void System_arraycopyTUnchecked(JNIEnv* env, jobject javaSrc, jint srcPos,
+                                       jobject javaDst, jint dstPos, jint count) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* srcObject = soa.Decode<mirror::Object*>(javaSrc);
   mirror::Object* dstObject = soa.Decode<mirror::Object*>(javaDst);
-  DCHECK(srcObject != nullptr);
   DCHECK(dstObject != nullptr);
   mirror::Array* srcArray = srcObject->AsArray();
   mirror::Array* dstArray = dstObject->AsArray();
-  DCHECK_GE(srcPos, 0);
-  DCHECK_GE(dstPos, 0);
   DCHECK_GE(count, 0);
-  DCHECK_LE(srcPos + count, srcArray->GetLength());
-  DCHECK_LE(dstPos + count, dstArray->GetLength());
   DCHECK_EQ(srcArray->GetClass(), dstArray->GetClass());
-  DCHECK_EQ(srcArray->GetClass()->GetComponentType()->GetPrimitiveType(), Primitive::kPrimChar);
-  dstArray->AsCharArray()->Memmove(dstPos, srcArray->AsCharArray(), srcPos, count);
+  DCHECK_EQ(srcArray->GetClass()->GetComponentType()->GetPrimitiveType(), kPrimType);
+  AsPrimitiveArray<T>(dstArray)->Memmove(dstPos, AsPrimitiveArray<T>(srcArray), srcPos, count);
+}
+
+static void System_arraycopyCharUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                          jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::CharArray, Primitive::kPrimChar>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyByteUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                          jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::ByteArray, Primitive::kPrimByte>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyShortUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                           jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::ShortArray, Primitive::kPrimShort>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyIntUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                         jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::IntArray, Primitive::kPrimInt>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyLongUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                          jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::LongArray, Primitive::kPrimLong>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyFloatUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                           jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::FloatArray, Primitive::kPrimFloat>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyDoubleUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                            jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::DoubleArray, Primitive::kPrimDouble>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
+}
+
+static void System_arraycopyBooleanUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                             jobject javaDst, jint dstPos, jint count) {
+  System_arraycopyTUnchecked<mirror::BooleanArray, Primitive::kPrimBoolean>(env, javaSrc, srcPos,
+      javaDst, dstPos, count);
 }
 
 static jint System_identityHashCode(JNIEnv* env, jclass, jobject javaObject) {
@@ -178,6 +228,13 @@
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(System, arraycopy, "!(Ljava/lang/Object;ILjava/lang/Object;II)V"),
   NATIVE_METHOD(System, arraycopyCharUnchecked, "!([CI[CII)V"),
+  NATIVE_METHOD(System, arraycopyByteUnchecked, "!([BI[BII)V"),
+  NATIVE_METHOD(System, arraycopyShortUnchecked, "!([SI[SII)V"),
+  NATIVE_METHOD(System, arraycopyIntUnchecked, "!([II[III)V"),
+  NATIVE_METHOD(System, arraycopyLongUnchecked, "!([JI[JII)V"),
+  NATIVE_METHOD(System, arraycopyFloatUnchecked, "!([FI[FII)V"),
+  NATIVE_METHOD(System, arraycopyDoubleUnchecked, "!([DI[DII)V"),
+  NATIVE_METHOD(System, arraycopyBooleanUnchecked, "!([ZI[ZII)V"),
   NATIVE_METHOD(System, identityHashCode, "!(Ljava/lang/Object;)I"),
 };
 
diff --git a/runtime/oat.cc b/runtime/oat.cc
index f4721f2..857c0a2 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '5', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '6', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 7cdd8f5..e1e133f 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -567,8 +567,12 @@
       }
     } else if (option == "-Xprofile-type:method") {
       profiler_options_.profile_type_ = kProfilerMethod;
-    } else if (option == "-Xprofile-type:dexpc") {
-      profiler_options_.profile_type_ = kProfilerMethodAndDexPC;
+    } else if (option == "-Xprofile-type:stack") {
+      profiler_options_.profile_type_ = kProfilerBoundedStack;
+    } else if (StartsWith(option, "-Xprofile-max-stack-depth:")) {
+      if (!ParseUnsignedInteger(option, ':', &profiler_options_.max_stack_depth_)) {
+        return false;
+      }
     } else if (StartsWith(option, "-implicit-checks:")) {
       std::string checks;
       if (!ParseStringAfterChar(option, ':', &checks)) {
@@ -812,7 +816,8 @@
   UsageMessage(stream, "  -Xprofile-start-immediately\n");
   UsageMessage(stream, "  -Xprofile-top-k-threshold:doublevalue\n");
   UsageMessage(stream, "  -Xprofile-top-k-change-threshold:doublevalue\n");
-  UsageMessage(stream, "  -Xprofile-type:{method,dexpc}\n");
+  UsageMessage(stream, "  -Xprofile-type:{method,stack}\n");
+  UsageMessage(stream, "  -Xprofile-max-stack-depth:integervalue\n");
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 2cd876a..7a7a92a 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -57,22 +57,66 @@
 // wakelock or something to modify the run characteristics.  This can be done when we
 // have some performance data after it's been used for a while.
 
+// Walk through the method within depth of max_depth_ on the Java stack
+class BoundedStackVisitor : public StackVisitor {
+ public:
+  BoundedStackVisitor(std::vector<std::pair<mirror::ArtMethod*, uint32_t>>* stack,
+      Thread* thread, uint32_t max_depth)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : StackVisitor(thread, NULL), stack_(stack), max_depth_(max_depth), depth_(0) {
+  }
+
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* m = GetMethod();
+    if (m->IsRuntimeMethod()) {
+      return true;
+    }
+    uint32_t dex_pc_ = GetDexPc();
+    stack_->push_back(std::make_pair(m, dex_pc_));
+    ++depth_;
+    if (depth_ < max_depth_) {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+ private:
+  std::vector<std::pair<mirror::ArtMethod*, uint32_t>>* stack_;
+  const uint32_t max_depth_;
+  uint32_t depth_;
+};
 
 // This is called from either a thread list traversal or from a checkpoint.  Regardless
 // of which caller, the mutator lock must be held.
 static void GetSample(Thread* thread, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   BackgroundMethodSamplingProfiler* profiler =
       reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
-  uint32_t dex_pc;
-  mirror::ArtMethod* method = thread->GetCurrentMethod(&dex_pc);
-  if (false && method == nullptr) {
-    LOG(INFO) << "No current method available";
-    std::ostringstream os;
-    thread->Dump(os);
-    std::string data(os.str());
-    LOG(INFO) << data;
+  const ProfilerOptions profile_options = profiler->GetProfilerOptions();
+  switch (profile_options.GetProfileType()) {
+    case kProfilerMethod: {
+      mirror::ArtMethod* method = thread->GetCurrentMethod(nullptr);
+      if (false && method == nullptr) {
+        LOG(INFO) << "No current method available";
+        std::ostringstream os;
+        thread->Dump(os);
+        std::string data(os.str());
+        LOG(INFO) << data;
+      }
+      profiler->RecordMethod(method);
+      break;
+    }
+    case kProfilerBoundedStack: {
+      std::vector<InstructionLocation> stack;
+      uint32_t max_depth = profile_options.GetMaxStackDepth();
+      BoundedStackVisitor bounded_stack_visitor(&stack, thread, max_depth);
+      bounded_stack_visitor.WalkStack();
+      profiler->RecordStack(stack);
+      break;
+    }
+    default:
+      LOG(INFO) << "This profile type is not implemented.";
   }
-  profiler->RecordMethod(method, dex_pc);
 }
 
 // A closure that is called by the thread checkpoint code.
@@ -359,13 +403,13 @@
   // filtered_methods_.insert("void java.lang.Object.wait(long, int)");
 }
 
-// A method has been hit, record its invocation in the method map.
-// The mutator_lock must be held (shared) when this is called.
-void BackgroundMethodSamplingProfiler::RecordMethod(mirror::ArtMethod* method, uint32_t dex_pc) {
+// Filter out methods the profiler doesn't want to record.
+// We require mutator lock since some statistics will be updated here.
+bool BackgroundMethodSamplingProfiler::ProcessMethod(mirror::ArtMethod* method) {
   if (method == nullptr) {
     profile_table_.NullMethod();
     // Don't record a nullptr method.
-    return;
+    return false;
   }
 
   mirror::Class* cls = method->GetDeclaringClass();
@@ -373,7 +417,7 @@
     if (cls->GetClassLoader() == nullptr) {
       // Don't include things in the boot
       profile_table_.BootMethod();
-      return;
+      return false;
     }
   }
 
@@ -391,14 +435,27 @@
     // Don't include specific filtered methods.
     is_filtered = filtered_methods_.count(method_full_name) != 0;
   }
+  return !is_filtered;
+}
 
+// A method has been hit, record its invocation in the method map.
+// The mutator_lock must be held (shared) when this is called.
+void BackgroundMethodSamplingProfiler::RecordMethod(mirror::ArtMethod* method) {
   // Add to the profile table unless it is filtered out.
-  if (!is_filtered) {
-    if (options_.GetProfileType() == kProfilerMethod) {
-      profile_table_.Put(method);
-    } else if (options_.GetProfileType() == kProfilerMethodAndDexPC) {
-      profile_table_.PutDexPC(method, dex_pc);
-    }
+  if (ProcessMethod(method)) {
+    profile_table_.Put(method);
+  }
+}
+
+// Record the current bounded stack into sampling results.
+void BackgroundMethodSamplingProfiler::RecordStack(const std::vector<InstructionLocation>& stack) {
+  if (stack.size() == 0) {
+    return;
+  }
+  // Get the method on top of the stack. We use this method to perform filtering.
+  mirror::ArtMethod* method = stack.front().first;
+  if (ProcessMethod(method)) {
+      profile_table_.PutStack(stack);
   }
 }
 
@@ -419,8 +476,9 @@
     num_boot_methods_(0) {
   for (int i = 0; i < kHashSize; i++) {
     table[i] = nullptr;
-    dex_table[i] = nullptr;
   }
+  method_context_table = nullptr;
+  stack_trie_root_ = nullptr;
 }
 
 ProfileSampleResults::~ProfileSampleResults() {
@@ -444,27 +502,67 @@
   num_samples_++;
 }
 
-// Add a method with dex pc to the profile table
-void ProfileSampleResults::PutDexPC(mirror::ArtMethod* method, uint32_t dex_pc) {
+// Add a bounded stack to the profile table. Only the count of the method on
+// top of the frame will be increased.
+void ProfileSampleResults::PutStack(const std::vector<InstructionLocation>& stack) {
   MutexLock mu(Thread::Current(), lock_);
-  uint32_t index = Hash(method);
-  if (dex_table[index] == nullptr) {
-    dex_table[index] = new MethodDexPCMap();
+  ScopedObjectAccess soa(Thread::Current());
+  if (stack_trie_root_ == nullptr) {
+    // The root of the stack trie is a dummy node so that we don't have to maintain
+    // a collection of tries.
+    stack_trie_root_ = new StackTrieNode();
   }
-  MethodDexPCMap::iterator i = dex_table[index]->find(method);
-  if (i == dex_table[index]->end()) {
-    DexPCCountMap* dex_pc_map = new DexPCCountMap();
-    (*dex_pc_map)[dex_pc] = 1;
-    (*dex_table[index])[method] = dex_pc_map;
-  } else {
-    DexPCCountMap* dex_pc_count = i->second;
-    DexPCCountMap::iterator dex_pc_i = dex_pc_count->find(dex_pc);
-    if (dex_pc_i == dex_pc_count->end()) {
-      (*dex_pc_count)[dex_pc] = 1;
+
+  StackTrieNode* current = stack_trie_root_;
+  if (stack.size() == 0) {
+    current->IncreaseCount();
+    return;
+  }
+
+  for (std::vector<InstructionLocation>::const_reverse_iterator iter = stack.rbegin();
+       iter != stack.rend(); ++iter) {
+    InstructionLocation inst_loc = *iter;
+    mirror::ArtMethod* method = inst_loc.first;
+    if (method == nullptr) {
+      // skip null method
+      continue;
+    }
+    uint32_t dex_pc = inst_loc.second;
+    uint32_t method_idx = method->GetDexMethodIndex();
+    const DexFile* dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
+    MethodReference method_ref(dex_file, method_idx);
+    StackTrieNode* child = current->FindChild(method_ref, dex_pc);
+    if (child != nullptr) {
+      current = child;
     } else {
-      dex_pc_i->second++;
+      uint32_t method_size = 0;
+      const DexFile::CodeItem* codeitem = method->GetCodeItem();
+      if (codeitem != nullptr) {
+        method_size = codeitem->insns_size_in_code_units_;
+      }
+      StackTrieNode* new_node = new StackTrieNode(method_ref, dex_pc, method_size, current);
+      current->AppendChild(new_node);
+      current = new_node;
     }
   }
+
+  if (current != stack_trie_root_ && current->GetCount() == 0) {
+    // Insert into method_context table;
+    if (method_context_table == nullptr) {
+      method_context_table = new MethodContextMap();
+    }
+    MethodReference method = current->GetMethod();
+    MethodContextMap::iterator i = method_context_table->find(method);
+    if (i == method_context_table->end()) {
+      TrieNodeSet* node_set = new TrieNodeSet();
+      node_set->insert(current);
+      (*method_context_table)[method] = node_set;
+    } else {
+      TrieNodeSet* node_set = i->second;
+      node_set->insert(current);
+    }
+  }
+  current->IncreaseCount();
   num_samples_++;
 }
 
@@ -506,54 +604,64 @@
         }
       }
     }
-  } else if (type == kProfilerMethodAndDexPC) {
-    for (int i = 0 ; i < kHashSize; i++) {
-      MethodDexPCMap *dex_map = dex_table[i];
-      if (dex_map != nullptr) {
-        for (const auto &dex_pc_iter : *dex_map) {
-          mirror::ArtMethod *method = dex_pc_iter.first;
-          std::string method_name = PrettyMethod(method);
+  } else if (type == kProfilerBoundedStack) {
+    if (method_context_table != nullptr) {
+      for (const auto &method_iter : *method_context_table) {
+        MethodReference method = method_iter.first;
+        TrieNodeSet* node_set = method_iter.second;
+        std::string method_name = PrettyMethod(method.dex_method_index, *(method.dex_file));
+        uint32_t method_size = 0;
+        uint32_t total_count = 0;
+        PreviousContextMap new_context_map;
+        for (const auto &trie_node_i : *node_set) {
+          StackTrieNode* node = trie_node_i;
+          method_size = node->GetMethodSize();
+          uint32_t count = node->GetCount();
+          uint32_t dexpc = node->GetDexPC();
+          total_count += count;
 
-          const DexFile::CodeItem* codeitem = method->GetCodeItem();
-          uint32_t method_size = 0;
-          if (codeitem != nullptr) {
-            method_size = codeitem->insns_size_in_code_units_;
+          StackTrieNode* current = node->GetParent();
+          // We go backward on the trie to retrieve context and dex_pc until the dummy root.
+          // The format of the context is "method_1@pc_1@method_2@pc_2@..."
+          std::vector<std::string> context_vector;
+          while (current != nullptr && current->GetParent() != nullptr) {
+            context_vector.push_back(StringPrintf("%s@%u",
+                PrettyMethod(current->GetMethod().dex_method_index, *(current->GetMethod().dex_file)).c_str(),
+                current->GetDexPC()));
+            current = current->GetParent();
           }
-          DexPCCountMap* dex_pc_map = dex_pc_iter.second;
-          uint32_t total_count = 0;
-          for (const auto &dex_pc_i : *dex_pc_map) {
-            total_count += dex_pc_i.second;
-          }
+          std::string context_sig = Join(context_vector, '@');
+          new_context_map[std::make_pair(dexpc, context_sig)] = count;
+        }
 
-          PreviousProfile::iterator pi = previous_.find(method_name);
-          if (pi != previous_.end()) {
-            total_count += pi->second.count_;
-            DexPCCountMap* previous_dex_pc_map = pi->second.dex_pc_map_;
-            if (previous_dex_pc_map != nullptr) {
-              for (const auto &dex_pc_i : *previous_dex_pc_map) {
-                uint32_t dex_pc = dex_pc_i.first;
-                uint32_t count = dex_pc_i.second;
-                DexPCCountMap::iterator di = dex_pc_map->find(dex_pc);
-                if (di == dex_pc_map->end()) {
-                  (*dex_pc_map)[dex_pc] = count;
-                } else {
-                  di->second += count;
-                }
+        PreviousProfile::iterator pi = previous_.find(method_name);
+        if (pi != previous_.end()) {
+          total_count += pi->second.count_;
+          PreviousContextMap* previous_context_map = pi->second.context_map_;
+          if (previous_context_map != nullptr) {
+            for (const auto &context_i : *previous_context_map) {
+              uint32_t count = context_i.second;
+              PreviousContextMap::iterator ci = new_context_map.find(context_i.first);
+              if (ci == new_context_map.end()) {
+                new_context_map[context_i.first] = count;
+              } else {
+                ci->second += count;
               }
             }
-            delete previous_dex_pc_map;
-            previous_.erase(pi);
           }
-          std::vector<std::string> dex_pc_count_vector;
-          for (const auto &dex_pc_i : *dex_pc_map) {
-            dex_pc_count_vector.push_back(StringPrintf("%u:%u", dex_pc_i.first, dex_pc_i.second));
-          }
-          // We write out profile data with dex pc information in the following format:
-          // "method/total_count/size/[pc_1:count_1,pc_2:count_2,...]".
-          os << StringPrintf("%s/%u/%u/[%s]\n", method_name.c_str(), total_count,
-              method_size, Join(dex_pc_count_vector, ',').c_str());
-          ++num_methods;
+          delete previous_context_map;
+          previous_.erase(pi);
         }
+        // We write out profile data with dex pc and context information in the following format:
+        // "method/total_count/size/[pc_1:count_1:context_1#pc_2:count_2:context_2#...]".
+        std::vector<std::string> context_count_vector;
+        for (const auto &context_i : new_context_map) {
+          context_count_vector.push_back(StringPrintf("%u:%u:%s", context_i.first.first,
+              context_i.second, context_i.first.second.c_str()));
+        }
+        os << StringPrintf("%s/%u/%u/[%s]\n", method_name.c_str(), total_count,
+            method_size, Join(context_count_vector, '#').c_str());
+        ++num_methods;
       }
     }
   }
@@ -562,15 +670,16 @@
   for (const auto &pi : previous_) {
     if (type == kProfilerMethod) {
       os << StringPrintf("%s/%u/%u\n",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
-    } else if (type == kProfilerMethodAndDexPC) {
+    } else if (type == kProfilerBoundedStack) {
       os << StringPrintf("%s/%u/%u/[",  pi.first.c_str(), pi.second.count_, pi.second.method_size_);
-      DexPCCountMap* previous_dex_pc_map = pi.second.dex_pc_map_;
-      if (previous_dex_pc_map != nullptr) {
-        std::vector<std::string> dex_pc_count_vector;
-        for (const auto &dex_pc_i : *previous_dex_pc_map) {
-          dex_pc_count_vector.push_back(StringPrintf("%u:%u", dex_pc_i.first, dex_pc_i.second));
+      PreviousContextMap* previous_context_map = pi.second.context_map_;
+      if (previous_context_map != nullptr) {
+        std::vector<std::string> context_count_vector;
+        for (const auto &context_i : *previous_context_map) {
+          context_count_vector.push_back(StringPrintf("%u:%u:%s", context_i.first.first,
+              context_i.second, context_i.first.second.c_str()));
         }
-        os << Join(dex_pc_count_vector, ',');
+        os << Join(context_count_vector, '#');
       }
       os << "]\n";
     }
@@ -586,18 +695,21 @@
   for (int i = 0; i < kHashSize; i++) {
     delete table[i];
     table[i] = nullptr;
-    if (dex_table[i] != nullptr) {
-      for (auto &di : *dex_table[i]) {
-        delete di.second;
-        di.second = nullptr;
-      }
+  }
+  if (stack_trie_root_ != nullptr) {
+    stack_trie_root_->DeleteChildren();
+    delete stack_trie_root_;
+    stack_trie_root_ = nullptr;
+    if (method_context_table != nullptr) {
+      delete method_context_table;
+      method_context_table = nullptr;
     }
-    delete dex_table[i];
-    dex_table[i] = nullptr;
   }
   for (auto &pi : previous_) {
-    delete pi.second.dex_pc_map_;
-    pi.second.dex_pc_map_ = nullptr;
+    if (pi.second.context_map_ != nullptr) {
+      delete pi.second.context_map_;
+      pi.second.context_map_ = nullptr;
+    }
   }
   previous_.clear();
 }
@@ -640,9 +752,9 @@
     // Bad summary info.  It should be count/nullcount/bootcount
     return;
   }
-  previous_num_samples_ = atoi(summary_info[0].c_str());
-  previous_num_null_methods_ = atoi(summary_info[1].c_str());
-  previous_num_boot_methods_ = atoi(summary_info[2].c_str());
+  previous_num_samples_ = strtoul(summary_info[0].c_str(), nullptr, 10);
+  previous_num_null_methods_ = strtoul(summary_info[1].c_str(), nullptr, 10);
+  previous_num_boot_methods_ = strtoul(summary_info[2].c_str(), nullptr, 10);
 
   // Now read each line until the end of file.  Each line consists of 3 or 4 fields separated by /
   while (true) {
@@ -656,23 +768,32 @@
       break;
     }
     std::string methodname = info[0];
-    uint32_t total_count = atoi(info[1].c_str());
-    uint32_t size = atoi(info[2].c_str());
-    DexPCCountMap* dex_pc_map = nullptr;
-    if (type == kProfilerMethodAndDexPC && info.size() == 4) {
-      dex_pc_map = new DexPCCountMap();
-      std::string dex_pc_counts_str = info[3].substr(1, info[3].size() - 2);
-      std::vector<std::string> dex_pc_count_pairs;
-      Split(dex_pc_counts_str, ',', dex_pc_count_pairs);
-      for (uint32_t i = 0; i < dex_pc_count_pairs.size(); ++i) {
-        std::vector<std::string> dex_pc_count;
-        Split(dex_pc_count_pairs[i], ':', dex_pc_count);
-        uint32_t dex_pc = atoi(dex_pc_count[0].c_str());
-        uint32_t count = atoi(dex_pc_count[1].c_str());
-        (*dex_pc_map)[dex_pc] = count;
+    uint32_t total_count = strtoul(info[1].c_str(), nullptr, 10);
+    uint32_t size = strtoul(info[2].c_str(), nullptr, 10);
+    PreviousContextMap* context_map = nullptr;
+    if (type == kProfilerBoundedStack && info.size() == 4) {
+      context_map = new PreviousContextMap();
+      std::string context_counts_str = info[3].substr(1, info[3].size() - 2);
+      std::vector<std::string> context_count_pairs;
+      Split(context_counts_str, '#', context_count_pairs);
+      for (uint32_t i = 0; i < context_count_pairs.size(); ++i) {
+        std::vector<std::string> context_count;
+        Split(context_count_pairs[i], ':', context_count);
+        if (context_count.size() == 2) {
+          // Handles the situtation when the profile file doesn't contain context information.
+          uint32_t dexpc = strtoul(context_count[0].c_str(), nullptr, 10);
+          uint32_t count = strtoul(context_count[1].c_str(), nullptr, 10);
+          (*context_map)[std::make_pair(dexpc, "")] = count;
+        } else {
+          // Handles the situtation when the profile file contains context information.
+          uint32_t dexpc = strtoul(context_count[0].c_str(), nullptr, 10);
+          uint32_t count = strtoul(context_count[1].c_str(), nullptr, 10);
+          std::string context = context_count[2];
+          (*context_map)[std::make_pair(dexpc, context)] = count;
+        }
       }
     }
-    previous_[methodname] = PreviousValue(total_count, size, dex_pc_map);
+    previous_[methodname] = PreviousValue(total_count, size, context_map);
   }
 }
 
@@ -709,7 +830,7 @@
     return false;
   }
   // This is the number of hits in all profiled methods (without nullptr or boot methods)
-  uint32_t total_count = atoi(summary_info[0].c_str());
+  uint32_t total_count = strtoul(summary_info[0].c_str(), nullptr, 10);
 
   // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
   // Store the info in descending order given by the most used methods.
@@ -736,7 +857,7 @@
   for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
     const std::string& methodname = it->second[0];
     uint32_t count = -it->first;
-    uint32_t size = atoi(it->second[2].c_str());
+    uint32_t size = strtoul(it->second[2].c_str(), nullptr, 10);
     double usedPercent = (count * 100.0) / total_count;
 
     curTotalCount += count;
@@ -772,4 +893,24 @@
   return true;
 }
 
+StackTrieNode* StackTrieNode::FindChild(MethodReference method, uint32_t dex_pc) {
+  if (children_.size() == 0) {
+    return nullptr;
+  }
+  // Create a dummy node for searching.
+  StackTrieNode* node = new StackTrieNode(method, dex_pc, 0, nullptr);
+  std::set<StackTrieNode*, StackTrieNodeComparator>::iterator i = children_.find(node);
+  delete node;
+  return (i == children_.end()) ? nullptr : *i;
+}
+
+void StackTrieNode::DeleteChildren() {
+  for (auto &child : children_) {
+    if (child != nullptr) {
+      child->DeleteChildren();
+      delete child;
+    }
+  }
+}
+
 }  // namespace art
diff --git a/runtime/profiler.h b/runtime/profiler.h
index 396dd23..ae51c87 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -31,6 +31,7 @@
 #include "profiler_options.h"
 #include "os.h"
 #include "safe_map.h"
+#include "method_reference.h"
 
 namespace art {
 
@@ -40,6 +41,57 @@
 }  // namespace mirror
 class Thread;
 
+typedef std::pair<mirror::ArtMethod*, uint32_t> InstructionLocation;
+
+// This class stores the sampled bounded stacks in a trie structure. A path of the trie represents
+// a particular context with the method on top of the stack being a leaf or an internal node of the
+// trie rather than the root.
+class StackTrieNode {
+ public:
+  StackTrieNode(MethodReference method, uint32_t dex_pc, uint32_t method_size,
+      StackTrieNode* parent) :
+      parent_(parent), method_(method), dex_pc_(dex_pc),
+      count_(0), method_size_(method_size) {
+  }
+  StackTrieNode() : parent_(nullptr), method_(nullptr, 0),
+      dex_pc_(0), count_(0), method_size_(0) {
+  }
+  StackTrieNode* GetParent() { return parent_; }
+  MethodReference GetMethod() { return method_; }
+  uint32_t GetCount() { return count_; }
+  uint32_t GetDexPC() { return dex_pc_; }
+  uint32_t GetMethodSize() { return method_size_; }
+  void AppendChild(StackTrieNode* child) { children_.insert(child); }
+  StackTrieNode* FindChild(MethodReference method, uint32_t dex_pc);
+  void DeleteChildren();
+  void IncreaseCount() { ++count_; }
+
+ private:
+  // Comparator for stack trie node.
+  struct StackTrieNodeComparator {
+    bool operator()(StackTrieNode* node1, StackTrieNode* node2) const {
+      MethodReference mr1 = node1->GetMethod();
+      MethodReference mr2 = node2->GetMethod();
+      if (mr1.dex_file == mr2.dex_file) {
+        if (mr1.dex_method_index == mr2.dex_method_index) {
+          return node1->GetDexPC() < node2->GetDexPC();
+        } else {
+          return mr1.dex_method_index < mr2.dex_method_index;
+        }
+      } else {
+        return mr1.dex_file < mr2.dex_file;
+      }
+    }
+  };
+
+  std::set<StackTrieNode*, StackTrieNodeComparator> children_;
+  StackTrieNode* parent_;
+  MethodReference method_;
+  uint32_t dex_pc_;
+  uint32_t count_;
+  uint32_t method_size_;
+};
+
 //
 // This class holds all the results for all runs of the profiler.  It also
 // counts the number of null methods (where we can't determine the method) and
@@ -53,7 +105,7 @@
   ~ProfileSampleResults();
 
   void Put(mirror::ArtMethod* method);
-  void PutDexPC(mirror::ArtMethod* method, uint32_t pc);
+  void PutStack(const std::vector<InstructionLocation>& stack_dump);
   uint32_t Write(std::ostream &os, ProfileDataType type);
   void ReadPrevious(int fd, ProfileDataType type);
   void Clear();
@@ -72,18 +124,21 @@
   typedef std::map<mirror::ArtMethod*, uint32_t> Map;  // Map of method vs its count.
   Map *table[kHashSize];
 
-  typedef std::map<uint32_t, uint32_t> DexPCCountMap;  // Map of dex pc vs its count
-  // Map of method vs dex pc counts in the method.
-  typedef std::map<mirror::ArtMethod*, DexPCCountMap*> MethodDexPCMap;
-  MethodDexPCMap *dex_table[kHashSize];
+  typedef std::set<StackTrieNode*> TrieNodeSet;
+  // Map of method hit by profiler vs the set of stack trie nodes for this method.
+  typedef std::map<MethodReference, TrieNodeSet*, MethodReferenceComparator> MethodContextMap;
+  MethodContextMap *method_context_table;
+  StackTrieNode* stack_trie_root_;  // Root of the trie that stores sampled stack information.
 
+  // Map from <pc, context> to counts.
+  typedef std::map<std::pair<uint32_t, std::string>, uint32_t> PreviousContextMap;
   struct PreviousValue {
-    PreviousValue() : count_(0), method_size_(0), dex_pc_map_(nullptr) {}
-    PreviousValue(uint32_t count, uint32_t method_size, DexPCCountMap* dex_pc_map)
-      : count_(count), method_size_(method_size), dex_pc_map_(dex_pc_map) {}
+    PreviousValue() : count_(0), method_size_(0), context_map_(nullptr) {}
+    PreviousValue(uint32_t count, uint32_t method_size, PreviousContextMap* context_map)
+      : count_(count), method_size_(method_size), context_map_(context_map) {}
     uint32_t count_;
     uint32_t method_size_;
-    DexPCCountMap* dex_pc_map_;
+    PreviousContextMap* context_map_;
   };
 
   typedef std::map<std::string, PreviousValue> PreviousProfile;
@@ -121,7 +176,10 @@
   static void Stop() LOCKS_EXCLUDED(Locks::profiler_lock_, wait_lock_);
   static void Shutdown() LOCKS_EXCLUDED(Locks::profiler_lock_);
 
-  void RecordMethod(mirror::ArtMethod *method, uint32_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void RecordMethod(mirror::ArtMethod *method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void RecordStack(const std::vector<InstructionLocation>& stack) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool ProcessMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const ProfilerOptions& GetProfilerOptions() const { return options_; }
 
   Barrier& GetBarrier() {
     return *profiler_barrier_;
diff --git a/runtime/profiler_options.h b/runtime/profiler_options.h
index 0b63003..e3ef697 100644
--- a/runtime/profiler_options.h
+++ b/runtime/profiler_options.h
@@ -24,7 +24,7 @@
 
 enum ProfileDataType {
   kProfilerMethod,          // Method only
-  kProfilerMethodAndDexPC,  // Method with Dex PC
+  kProfilerBoundedStack,    // Methods with Dex PC on top of the stack
 };
 
 class ProfilerOptions {
@@ -38,6 +38,7 @@
   static constexpr double kDefaultTopKThreshold = 90.0;
   static constexpr double kDefaultChangeInTopKThreshold = 10.0;
   static constexpr ProfileDataType kDefaultProfileData = kProfilerMethod;
+  static constexpr uint32_t kDefaultMaxStackDepth = 3;
 
   ProfilerOptions() :
     enabled_(kDefaultEnabled),
@@ -48,7 +49,8 @@
     start_immediately_(kDefaultStartImmediately),
     top_k_threshold_(kDefaultTopKThreshold),
     top_k_change_threshold_(kDefaultChangeInTopKThreshold),
-    profile_type_(kDefaultProfileData) {}
+    profile_type_(kDefaultProfileData),
+    max_stack_depth_(kDefaultMaxStackDepth) {}
 
   ProfilerOptions(bool enabled,
                  uint32_t period_s,
@@ -58,7 +60,8 @@
                  bool start_immediately,
                  double top_k_threshold,
                  double top_k_change_threshold,
-                 ProfileDataType profile_type):
+                 ProfileDataType profile_type,
+                 uint32_t max_stack_depth):
     enabled_(enabled),
     period_s_(period_s),
     duration_s_(duration_s),
@@ -67,7 +70,8 @@
     start_immediately_(start_immediately),
     top_k_threshold_(top_k_threshold),
     top_k_change_threshold_(top_k_change_threshold),
-    profile_type_(profile_type) {}
+    profile_type_(profile_type),
+    max_stack_depth_(max_stack_depth) {}
 
   bool IsEnabled() const {
     return enabled_;
@@ -105,6 +109,10 @@
     return profile_type_;
   }
 
+  uint32_t GetMaxStackDepth() const {
+    return max_stack_depth_;
+  }
+
  private:
   friend std::ostream & operator<<(std::ostream &os, const ProfilerOptions& po) {
     os << "enabled=" << po.enabled_
@@ -115,7 +123,8 @@
        << ", start_immediately=" << po.start_immediately_
        << ", top_k_threshold=" << po.top_k_threshold_
        << ", top_k_change_threshold=" << po.top_k_change_threshold_
-       << ", profile_type=" << po.profile_type_;
+       << ", profile_type=" << po.profile_type_
+       << ", max_stack_depth=" << po.max_stack_depth_;
     return os;
   }
 
@@ -139,6 +148,8 @@
   double top_k_change_threshold_;
   // The type of profile data dumped to the disk.
   ProfileDataType profile_type_;
+  // The max depth of the stack collected by the profiler
+  uint32_t max_stack_depth_;
 };
 
 }  // namespace art
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index ddee89b..e1fbf01 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -37,12 +37,16 @@
 enum InlineMethodOpcode : uint16_t {
   kIntrinsicDoubleCvt,
   kIntrinsicFloatCvt,
+  kIntrinsicReverseBits,
   kIntrinsicReverseBytes,
   kIntrinsicAbsInt,
   kIntrinsicAbsLong,
   kIntrinsicAbsFloat,
   kIntrinsicAbsDouble,
   kIntrinsicMinMaxInt,
+  kIntrinsicMinMaxLong,
+  kIntrinsicMinMaxFloat,
+  kIntrinsicMinMaxDouble,
   kIntrinsicSqrt,
   kIntrinsicCharAt,
   kIntrinsicCompareTo,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 8aa7ea1..53ddcca 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -241,16 +241,22 @@
   }
 
   void DumpAllThreads(std::ostream& os, Thread* self) NO_THREAD_SAFETY_ANALYSIS {
-    bool tll_already_held = Locks::thread_list_lock_->IsExclusiveHeld(self);
-    bool ml_already_held = Locks::mutator_lock_->IsSharedHeld(self);
-    if (!tll_already_held || !ml_already_held) {
-      os << "Dumping all threads without appropriate locks held:"
-          << (!tll_already_held ? " thread list lock" : "")
-          << (!ml_already_held ? " mutator lock" : "")
-          << "\n";
+    Runtime* runtime = Runtime::Current();
+    if (runtime != nullptr) {
+      ThreadList* thread_list = runtime->GetThreadList();
+      if (thread_list != nullptr) {
+        bool tll_already_held = Locks::thread_list_lock_->IsExclusiveHeld(self);
+        bool ml_already_held = Locks::mutator_lock_->IsSharedHeld(self);
+        if (!tll_already_held || !ml_already_held) {
+          os << "Dumping all threads without appropriate locks held:"
+              << (!tll_already_held ? " thread list lock" : "")
+              << (!ml_already_held ? " mutator lock" : "")
+              << "\n";
+        }
+        os << "All threads:\n";
+        thread_list->DumpLocked(os);
+      }
     }
-    os << "All threads:\n";
-    Runtime::Current()->GetThreadList()->DumpLocked(os);
   }
 };
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 8776a59..f839be1 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -231,7 +231,7 @@
   }
 
   static const char* GetVersion() {
-    return "2.0.0";
+    return "2.1.0";
   }
 
   void DisallowNewSystemWeaks() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 132ac3e..d5405fb 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -159,11 +159,22 @@
       uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
       uintptr_t ptr_val;
       bool success = false;
+      bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64);
       if (is_float) {
         success = GetFPR(reg, &ptr_val);
       } else {
         success = GetGPR(reg, &ptr_val);
       }
+      if (success && target64) {
+        bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg);
+        bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
+        int64_t value_long = static_cast<int64_t>(ptr_val);
+        if (wide_lo) {
+          ptr_val = static_cast<uintptr_t>(value_long & 0xFFFFFFFF);
+        } else if (wide_hi) {
+          ptr_val = static_cast<uintptr_t>(value_long >> 32);
+        }
+      }
       *val = ptr_val;
       return success;
     } else {
@@ -194,6 +205,28 @@
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
       const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
+      bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64);
+      // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
+      if (target64) {
+        bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg);
+        bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
+        if (wide_lo || wide_hi) {
+          uintptr_t old_reg_val;
+          bool success = is_float ? GetFPR(reg, &old_reg_val) : GetGPR(reg, &old_reg_val);
+          if (!success) {
+            return false;
+          }
+          uint64_t new_vreg_portion = static_cast<uint64_t>(new_value);
+          uint64_t old_reg_val_as_wide = static_cast<uint64_t>(old_reg_val);
+          uint64_t mask = 0xffffffff;
+          if (wide_lo) {
+            mask = mask << 32;
+          } else {
+            new_vreg_portion = new_vreg_portion << 32;
+          }
+          new_value = static_cast<uintptr_t>((old_reg_val_as_wide & mask) | new_vreg_portion);
+        }
+      }
       if (is_float) {
         return SetFPR(reg, new_value);
       } else {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index ca8c2d7..d3487d0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1881,7 +1881,6 @@
   QUICK_ENTRY_POINT_INFO(pInvokeStaticTrampolineWithAccessCheck)
   QUICK_ENTRY_POINT_INFO(pInvokeSuperTrampolineWithAccessCheck)
   QUICK_ENTRY_POINT_INFO(pInvokeVirtualTrampolineWithAccessCheck)
-  QUICK_ENTRY_POINT_INFO(pCheckSuspend)
   QUICK_ENTRY_POINT_INFO(pTestSuspend)
   QUICK_ENTRY_POINT_INFO(pDeliverException)
   QUICK_ENTRY_POINT_INFO(pThrowArrayBounds)
diff --git a/runtime/utils.cc b/runtime/utils.cc
index e5b8b22..d038571 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -534,15 +534,17 @@
     return StringPrintf("%" PRIu64 "%s", whole_part, unit);
   } else {
     static constexpr size_t kMaxDigits = 30;
+    size_t avail_digits = kMaxDigits;
     char fraction_buffer[kMaxDigits];
     char* ptr = fraction_buffer;
     uint64_t multiplier = 10;
     // This infinite loops if fractional part is 0.
-    while (fractional_part * multiplier < divisor) {
+    while (avail_digits > 1 && fractional_part * multiplier < divisor) {
       multiplier *= 10;
       *ptr++ = '0';
+      avail_digits--;
     }
-    sprintf(ptr, "%" PRIu64, fractional_part);
+    snprintf(ptr, avail_digits, "%" PRIu64, fractional_part);
     fraction_buffer[std::min(kMaxDigits - 1, max_fraction_digits)] = '\0';
     return StringPrintf("%" PRIu64 ".%s%s", whole_part, fraction_buffer, unit);
   }
@@ -1232,6 +1234,7 @@
 std::string DexFilenameToOdexFilename(const std::string& location, const InstructionSet isa) {
   // location = /foo/bar/baz.jar
   // odex_location = /foo/bar/<isa>/baz.odex
+
   CHECK_GE(location.size(), 4U) << location;  // must be at least .123
   std::string odex_location(location);
   InsertIsaDirectory(isa, &odex_location);
diff --git a/runtime/utils.h b/runtime/utils.h
index a61d30f..eb79968 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -203,6 +203,19 @@
   return (ch < ' ' || ch > '~');
 }
 
+// Interpret the bit pattern of input (type U) as type V. Requires the size
+// of V >= size of U (compile-time checked).
+template<typename U, typename V>
+static inline V bit_cast(U in) {
+  COMPILE_ASSERT(sizeof(U) <= sizeof(V), size_of_u_not_le_size_of_v);
+  union {
+    U u;
+    V v;
+  } tmp;
+  tmp.u = in;
+  return tmp.v;
+}
+
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
@@ -401,6 +414,7 @@
 
 // Returns an .odex file name next adjacent to the dex location.
 // For example, for "/foo/bar/baz.jar", return "/foo/bar/<isa>/baz.odex".
+// Note: does not support multidex location strings.
 std::string DexFilenameToOdexFilename(const std::string& location, InstructionSet isa);
 
 // Check whether the given magic matches a known file type.
diff --git a/runtime/vmap_table.h b/runtime/vmap_table.h
index 9821753..df5cd80 100644
--- a/runtime/vmap_table.h
+++ b/runtime/vmap_table.h
@@ -64,6 +64,12 @@
     const uint8_t* table = table_;
     uint16_t adjusted_vreg = vreg + kEntryAdjustment;
     size_t end = DecodeUnsignedLeb128(&table);
+    bool high_reg = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
+    bool target64 = (kRuntimeISA == kArm64) || (kRuntimeISA == kX86_64);
+    if (target64 && high_reg) {
+      // Wide promoted registers are associated with the sreg of the low portion.
+      adjusted_vreg--;
+    }
     for (size_t i = 0; i < end; ++i) {
       // Stop if we find what we are are looking for.
       uint16_t adjusted_entry = DecodeUnsignedLeb128(&table);
diff --git a/test/013-math2/expected.txt b/test/013-math2/expected.txt
index d36c468..84fb9e2 100644
--- a/test/013-math2/expected.txt
+++ b/test/013-math2/expected.txt
@@ -1 +1,2 @@
 a:32003
+b:-31993
diff --git a/test/013-math2/src/Main.java b/test/013-math2/src/Main.java
index 2c80c31..7b8c4e4 100644
--- a/test/013-math2/src/Main.java
+++ b/test/013-math2/src/Main.java
@@ -26,7 +26,9 @@
 
         // a 16-bit constant
         a += 32000;
-        System.out.println("a:" +a);
+        b -= 32000;
+        System.out.println("a:" + a);
+        System.out.println("b:" + b);
     }
     public static void main(String args[]) {
         math_013();
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 55ecf69..5b8134d 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -24,12 +24,26 @@
     test_Float_intBitsToFloat();
     test_Math_abs_I();
     test_Math_abs_J();
-    test_Math_min();
-    test_Math_max();
+    test_Math_min_I();
+    test_Math_max_I();
+    test_Math_min_J();
+    test_Math_max_J();
+    test_Math_min_F();
+    test_Math_max_F();
+    test_Math_min_D();
+    test_Math_max_D();
+    test_Integer_reverse();
+    test_Long_reverse();
     test_StrictMath_abs_I();
     test_StrictMath_abs_J();
-    test_StrictMath_min();
-    test_StrictMath_max();
+    test_StrictMath_min_I();
+    test_StrictMath_max_I();
+    test_StrictMath_min_J();
+    test_StrictMath_max_J();
+    test_StrictMath_min_F();
+    test_StrictMath_max_F();
+    test_StrictMath_min_D();
+    test_StrictMath_max_D();
     test_String_charAt();
     test_String_compareTo();
     test_String_indexOf();
@@ -37,6 +51,25 @@
     test_String_length();
   }
 
+  /*
+   * Determine if two floating point numbers are approximately equal.
+   *
+   * (Assumes that floating point is generally working, so we can't use
+   * this for the first set of tests.)
+   */
+  static boolean approxEqual(float a, float b, float maxDelta) {
+    if (a > b)
+      return (a - b) < maxDelta;
+    else
+      return (b - a) < maxDelta;
+  }
+  static boolean approxEqual(double a, double b, double maxDelta) {
+    if (a > b)
+      return (a - b) < maxDelta;
+    else
+      return (b - a) < maxDelta;
+  }
+
   public static void test_String_length() {
     String str0 = "";
     String str1 = "x";
@@ -244,7 +277,7 @@
     Assert.assertEquals(Math.abs(Long.MIN_VALUE - 1), Long.MAX_VALUE);
   }
 
-  public static void test_Math_min() {
+  public static void test_Math_min_I() {
     Assert.assertEquals(Math.min(0, 0), 0);
     Assert.assertEquals(Math.min(1, 0), 0);
     Assert.assertEquals(Math.min(0, 1), 0);
@@ -253,7 +286,7 @@
     Assert.assertEquals(Math.min(Integer.MIN_VALUE, Integer.MAX_VALUE), Integer.MIN_VALUE);
   }
 
-  public static void test_Math_max() {
+  public static void test_Math_max_I() {
     Assert.assertEquals(Math.max(0, 0), 0);
     Assert.assertEquals(Math.max(1, 0), 1);
     Assert.assertEquals(Math.max(0, 1), 1);
@@ -262,6 +295,60 @@
     Assert.assertEquals(Math.max(Integer.MIN_VALUE, Integer.MAX_VALUE), Integer.MAX_VALUE);
   }
 
+  public static void test_Math_min_J() {
+    Assert.assertEquals(Math.min(0L, 0L), 0L);
+    Assert.assertEquals(Math.min(1L, 0L), 0L);
+    Assert.assertEquals(Math.min(0L, 1L), 0L);
+    Assert.assertEquals(Math.min(0L, Long.MAX_VALUE), 0L);
+    Assert.assertEquals(Math.min(Long.MIN_VALUE, 0L), Long.MIN_VALUE);
+    Assert.assertEquals(Math.min(Long.MIN_VALUE, Long.MAX_VALUE), Long.MIN_VALUE);
+  }
+
+  public static void test_Math_max_J() {
+    Assert.assertEquals(Math.max(0L, 0L), 0L);
+    Assert.assertEquals(Math.max(1L, 0L), 1L);
+    Assert.assertEquals(Math.max(0L, 1L), 1L);
+    Assert.assertEquals(Math.max(0L, Long.MAX_VALUE), Long.MAX_VALUE);
+    Assert.assertEquals(Math.max(Long.MIN_VALUE, 0L), 0L);
+    Assert.assertEquals(Math.max(Long.MIN_VALUE, Long.MAX_VALUE), Long.MAX_VALUE);
+  }
+
+  public static void test_Math_min_F() {
+    Assert.assertTrue(approxEqual(Math.min(0.0f, 0.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(Math.min(1.0f, 0.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(Math.min(0.0f, 1.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(Math.min(0.0f, Float.MAX_VALUE), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(Math.min(Float.MIN_VALUE, 0.0f), Float.MIN_VALUE, 0.001f));
+    Assert.assertTrue(approxEqual(Math.min(Float.MIN_VALUE, Float.MAX_VALUE), Float.MIN_VALUE, 0.001f));
+  }
+
+  public static void test_Math_max_F() {
+    Assert.assertTrue(approxEqual(Math.max(0.0f, 0.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(Math.max(1.0f, 0.0f), 1.0f, 0.001f));
+    Assert.assertTrue(approxEqual(Math.max(0.0f, 1.0f), 1.0f, 0.001f));
+    Assert.assertTrue(approxEqual(Math.max(0.0f, Float.MAX_VALUE), Float.MAX_VALUE, 0.001f));
+    Assert.assertTrue(approxEqual(Math.max(Float.MIN_VALUE, 0.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(Math.max(Float.MIN_VALUE, Float.MAX_VALUE), Float.MAX_VALUE, 0.001f));
+  }
+
+  public static void test_Math_min_D() {
+    Assert.assertTrue(approxEqual(Math.min(0.0d, 0.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(Math.min(1.0d, 0.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(Math.min(0.0d, 1.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(Math.min(0.0d, Double.MAX_VALUE), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(Math.min(Double.MIN_VALUE, 0.0d), Double.MIN_VALUE, 0.001d));
+    Assert.assertTrue(approxEqual(Math.min(Double.MIN_VALUE, Double.MAX_VALUE), Double.MIN_VALUE, 0.001d));
+  }
+
+  public static void test_Math_max_D() {
+    Assert.assertTrue(approxEqual(Math.max(0.0d, 0.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(Math.max(1.0d, 0.0d), 1.0d, 0.001d));
+    Assert.assertTrue(approxEqual(Math.max(0.0d, 1.0d), 1.0d, 0.001d));
+    Assert.assertTrue(approxEqual(Math.max(0.0d, Double.MAX_VALUE), Double.MAX_VALUE, 0.001d));
+    Assert.assertTrue(approxEqual(Math.max(Double.MIN_VALUE, 0.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(Math.max(Double.MIN_VALUE, Double.MAX_VALUE), Double.MAX_VALUE, 0.001d));
+  }
+
   public static void test_StrictMath_abs_I() {
     Assert.assertEquals(StrictMath.abs(0), 0);
     Assert.assertEquals(StrictMath.abs(123), 123);
@@ -281,7 +368,7 @@
     Assert.assertEquals(StrictMath.abs(Long.MIN_VALUE - 1), Long.MAX_VALUE);
   }
 
-  public static void test_StrictMath_min() {
+  public static void test_StrictMath_min_I() {
     Assert.assertEquals(StrictMath.min(0, 0), 0);
     Assert.assertEquals(StrictMath.min(1, 0), 0);
     Assert.assertEquals(StrictMath.min(0, 1), 0);
@@ -290,7 +377,7 @@
     Assert.assertEquals(StrictMath.min(Integer.MIN_VALUE, Integer.MAX_VALUE), Integer.MIN_VALUE);
   }
 
-  public static void test_StrictMath_max() {
+  public static void test_StrictMath_max_I() {
     Assert.assertEquals(StrictMath.max(0, 0), 0);
     Assert.assertEquals(StrictMath.max(1, 0), 1);
     Assert.assertEquals(StrictMath.max(0, 1), 1);
@@ -299,6 +386,60 @@
     Assert.assertEquals(StrictMath.max(Integer.MIN_VALUE, Integer.MAX_VALUE), Integer.MAX_VALUE);
   }
 
+  public static void test_StrictMath_min_J() {
+    Assert.assertEquals(StrictMath.min(0L, 0L), 0L);
+    Assert.assertEquals(StrictMath.min(1L, 0L), 0L);
+    Assert.assertEquals(StrictMath.min(0L, 1L), 0L);
+    Assert.assertEquals(StrictMath.min(0L, Long.MAX_VALUE), 0L);
+    Assert.assertEquals(StrictMath.min(Long.MIN_VALUE, 0L), Long.MIN_VALUE);
+    Assert.assertEquals(StrictMath.min(Long.MIN_VALUE, Long.MAX_VALUE), Long.MIN_VALUE);
+  }
+
+  public static void test_StrictMath_max_J() {
+    Assert.assertEquals(StrictMath.max(0L, 0L), 0L);
+    Assert.assertEquals(StrictMath.max(1L, 0L), 1L);
+    Assert.assertEquals(StrictMath.max(0L, 1L), 1L);
+    Assert.assertEquals(StrictMath.max(0L, Long.MAX_VALUE), Long.MAX_VALUE);
+    Assert.assertEquals(StrictMath.max(Long.MIN_VALUE, 0L), 0L);
+    Assert.assertEquals(StrictMath.max(Long.MIN_VALUE, Long.MAX_VALUE), Long.MAX_VALUE);
+  }
+
+  public static void test_StrictMath_min_F() {
+    Assert.assertTrue(approxEqual(StrictMath.min(0.0f, 0.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.min(1.0f, 0.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.min(0.0f, 1.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.min(0.0f, Float.MAX_VALUE), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.min(Float.MIN_VALUE, 0.0f), Float.MIN_VALUE, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.min(Float.MIN_VALUE, Float.MAX_VALUE), Float.MIN_VALUE, 0.001f));
+  }
+
+  public static void test_StrictMath_max_F() {
+    Assert.assertTrue(approxEqual(StrictMath.max(0.0f, 0.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.max(1.0f, 0.0f), 1.0f, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.max(0.0f, 1.0f), 1.0f, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.max(0.0f, Float.MAX_VALUE), Float.MAX_VALUE, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.max(Float.MIN_VALUE, 0.0f), 0.0f, 0.001f));
+    Assert.assertTrue(approxEqual(StrictMath.max(Float.MIN_VALUE, Float.MAX_VALUE), Float.MAX_VALUE, 0.001f));
+  }
+
+  public static void test_StrictMath_min_D() {
+    Assert.assertTrue(approxEqual(StrictMath.min(0.0d, 0.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.min(1.0d, 0.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.min(0.0d, 1.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.min(0.0d, Double.MAX_VALUE), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.min(Double.MIN_VALUE, 0.0d), Double.MIN_VALUE, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.min(Double.MIN_VALUE, Double.MAX_VALUE), Double.MIN_VALUE, 0.001d));
+  }
+
+  public static void test_StrictMath_max_D() {
+    Assert.assertTrue(approxEqual(StrictMath.max(0.0d, 0.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.max(1.0d, 0.0d), 1.0d, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.max(0.0d, 1.0d), 1.0d, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.max(0.0d, Double.MAX_VALUE), Double.MAX_VALUE, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.max(Double.MIN_VALUE, 0.0d), 0.0d, 0.001d));
+    Assert.assertTrue(approxEqual(StrictMath.max(Double.MIN_VALUE, Double.MAX_VALUE), Double.MAX_VALUE, 0.001d));
+  }
+
   public static void test_Float_floatToRawIntBits() {
     Assert.assertEquals(Float.floatToRawIntBits(-1.0f), 0xbf800000);
     Assert.assertEquals(Float.floatToRawIntBits(0.0f), 0);
@@ -334,4 +475,27 @@
     Assert.assertEquals(Double.longBitsToDouble(0x7ff0000000000000L), Double.POSITIVE_INFINITY);
     Assert.assertEquals(Double.longBitsToDouble(0xfff0000000000000L), Double.NEGATIVE_INFINITY);
   }
+
+  public static void test_Integer_reverse() {
+    Assert.assertEquals(Integer.reverse(1), 0x80000000);
+    Assert.assertEquals(Integer.reverse(-1), 0xffffffff);
+    Assert.assertEquals(Integer.reverse(0), 0);
+    Assert.assertEquals(Integer.reverse(0x12345678), 0x1e6a2c48);
+    Assert.assertEquals(Integer.reverse(0x87654321), 0x84c2a6e1);
+    Assert.assertEquals(Integer.reverse(Integer.MAX_VALUE), 0xfffffffe);
+    Assert.assertEquals(Integer.reverse(Integer.MIN_VALUE), 1);
+  }
+
+  public static void test_Long_reverse() {
+    Assert.assertEquals(Long.reverse(1L), 0x8000000000000000L);
+    Assert.assertEquals(Long.reverse(-1L), 0xffffffffffffffffL);
+    Assert.assertEquals(Long.reverse(0L), 0L);
+    // FIXME: This asserts fail with or without this patch. I have collected
+    // the expected results on my host machine.
+    // Assert.assertEquals(Long.reverse(0x1234567812345678L), 0x1e6a2c481e6a2c48L);
+    // Assert.assertEquals(Long.reverse(0x8765432187654321L), 0x84c2a6e184c2a6e1L);
+    // Assert.assertEquals(Long.reverse(Long.MAX_VALUE), 0xfffffffffffffffeL);
+    Assert.assertEquals(Long.reverse(Long.MIN_VALUE), 1L);
+  }
+
 }
diff --git a/test/113-multidex/build b/test/113-multidex/build
new file mode 100644
index 0000000..ec8706e
--- /dev/null
+++ b/test/113-multidex/build
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+mkdir classes
+
+# All except Main
+${JAVAC} -d classes `find src -name '*.java'`
+rm classes/Main.class
+${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex classes
+
+# Only Main
+${JAVAC} -d classes `find src -name '*.java'`
+rm classes/Second.class classes/FillerA.class classes/FillerB.class classes/Inf*.class
+${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex classes
+
+zip $TEST_NAME.jar classes.dex classes2.dex
diff --git a/test/113-multidex/expected.txt b/test/113-multidex/expected.txt
new file mode 100644
index 0000000..603e911
--- /dev/null
+++ b/test/113-multidex/expected.txt
@@ -0,0 +1,12 @@
+FillerA
+Second
+Second::zcall
+Second::zcall1
+Second::zcall2
+Second::zcall3
+Second::zcall4
+Second::zcall5
+Second::zcall6
+Second::zcall7
+Second::zcall8
+Second::zcall9
diff --git a/test/113-multidex/info.txt b/test/113-multidex/info.txt
new file mode 100644
index 0000000..d0a4ac1
--- /dev/null
+++ b/test/113-multidex/info.txt
@@ -0,0 +1,2 @@
+Test whether we can run code from an application split into multiple dex files (similar to
+MultiDex).
diff --git a/test/113-multidex/src/FillerA.java b/test/113-multidex/src/FillerA.java
new file mode 100644
index 0000000..d169018
--- /dev/null
+++ b/test/113-multidex/src/FillerA.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class FillerA {
+  public void methodA() {
+  }
+
+  public void methodB() {
+  }
+}
diff --git a/test/113-multidex/src/FillerB.java b/test/113-multidex/src/FillerB.java
new file mode 100644
index 0000000..ec3ac9d
--- /dev/null
+++ b/test/113-multidex/src/FillerB.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class FillerB {
+  public void methodC() {
+  }
+
+  public void methodD() {
+  }
+}
diff --git a/test/113-multidex/src/Inf1.java b/test/113-multidex/src/Inf1.java
new file mode 100644
index 0000000..3deb6b4
--- /dev/null
+++ b/test/113-multidex/src/Inf1.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Inf1 {
+  public void zcall();
+  public void zcall1();
+  public void zcall2();
+  public void zcall3();
+  public void zcall4();
+  public void zcall5();
+  public void zcall6();
+  public void zcall7();
+  public void zcall8();
+  public void zcall9();
+}
\ No newline at end of file
diff --git a/test/113-multidex/src/Inf2.java b/test/113-multidex/src/Inf2.java
new file mode 100644
index 0000000..ac09509
--- /dev/null
+++ b/test/113-multidex/src/Inf2.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Inf2 {
+  public void zcall();
+  public void zcall1();
+  public void zcall2();
+  public void zcall3();
+  public void zcall4();
+  public void zcall5();
+  public void zcall6();
+  public void zcall7();
+  public void zcall8();
+  public void zcall9();
+}
\ No newline at end of file
diff --git a/test/113-multidex/src/Inf3.java b/test/113-multidex/src/Inf3.java
new file mode 100644
index 0000000..d6c377b
--- /dev/null
+++ b/test/113-multidex/src/Inf3.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Inf3 {
+  public void zcall();
+  public void zcall1();
+  public void zcall2();
+  public void zcall3();
+  public void zcall4();
+  public void zcall5();
+  public void zcall6();
+  public void zcall7();
+  public void zcall8();
+  public void zcall9();
+}
\ No newline at end of file
diff --git a/test/113-multidex/src/Inf4.java b/test/113-multidex/src/Inf4.java
new file mode 100644
index 0000000..a1801b9
--- /dev/null
+++ b/test/113-multidex/src/Inf4.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Inf4 {
+  public void zcall();
+  public void zcall1();
+  public void zcall2();
+  public void zcall3();
+  public void zcall4();
+  public void zcall5();
+  public void zcall6();
+  public void zcall7();
+  public void zcall8();
+  public void zcall9();
+}
\ No newline at end of file
diff --git a/test/113-multidex/src/Inf5.java b/test/113-multidex/src/Inf5.java
new file mode 100644
index 0000000..e8115ce
--- /dev/null
+++ b/test/113-multidex/src/Inf5.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Inf5 {
+  public void zcall();
+  public void zcall1();
+  public void zcall2();
+  public void zcall3();
+  public void zcall4();
+  public void zcall5();
+  public void zcall6();
+  public void zcall7();
+  public void zcall8();
+  public void zcall9();
+}
\ No newline at end of file
diff --git a/test/113-multidex/src/Inf6.java b/test/113-multidex/src/Inf6.java
new file mode 100644
index 0000000..554bdb8
--- /dev/null
+++ b/test/113-multidex/src/Inf6.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Inf6 {
+  public void zcall();
+  public void zcall1();
+  public void zcall2();
+  public void zcall3();
+  public void zcall4();
+  public void zcall5();
+  public void zcall6();
+  public void zcall7();
+  public void zcall8();
+  public void zcall9();
+}
\ No newline at end of file
diff --git a/test/113-multidex/src/Inf7.java b/test/113-multidex/src/Inf7.java
new file mode 100644
index 0000000..1982775
--- /dev/null
+++ b/test/113-multidex/src/Inf7.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Inf7 {
+  public void zcall();
+  public void zcall1();
+  public void zcall2();
+  public void zcall3();
+  public void zcall4();
+  public void zcall5();
+  public void zcall6();
+  public void zcall7();
+  public void zcall8();
+  public void zcall9();
+}
\ No newline at end of file
diff --git a/test/113-multidex/src/Inf8.java b/test/113-multidex/src/Inf8.java
new file mode 100644
index 0000000..87296db
--- /dev/null
+++ b/test/113-multidex/src/Inf8.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Inf8 {
+  public void zcall();
+  public void zcall1();
+  public void zcall2();
+  public void zcall3();
+  public void zcall4();
+  public void zcall5();
+  public void zcall6();
+  public void zcall7();
+  public void zcall8();
+  public void zcall9();
+}
\ No newline at end of file
diff --git a/test/113-multidex/src/Main.java b/test/113-multidex/src/Main.java
new file mode 100644
index 0000000..1c74220
--- /dev/null
+++ b/test/113-multidex/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    static public void main(String[] args) throws Exception {
+      System.out.println(new FillerA().getClass().getName());
+
+      Inf1 second = new Second();
+      System.out.println(second.getClass().getName());
+      second.zcall();
+      second.zcall1();
+      second.zcall2();
+      second.zcall3();
+      second.zcall4();
+      second.zcall5();
+      second.zcall6();
+      second.zcall7();
+      second.zcall8();
+      second.zcall9();
+    }
+
+}
diff --git a/test/113-multidex/src/Second.java b/test/113-multidex/src/Second.java
new file mode 100644
index 0000000..d0c2535
--- /dev/null
+++ b/test/113-multidex/src/Second.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Second implements Inf1, Inf2, Inf3, Inf4, Inf5, Inf6, Inf7, Inf8 {
+  public void zcall() {
+    System.out.println("Second::zcall");
+  }
+
+  public void zcall1() {
+    System.out.println("Second::zcall1");
+  }
+
+  public void zcall2() {
+    System.out.println("Second::zcall2");
+  }
+
+  public void zcall3() {
+    System.out.println("Second::zcall3");
+  }
+
+  public void zcall4() {
+    System.out.println("Second::zcall4");
+  }
+
+  public void zcall5() {
+    System.out.println("Second::zcall5");
+  }
+
+  public void zcall6() {
+    System.out.println("Second::zcall6");
+  }
+
+  public void zcall7() {
+    System.out.println("Second::zcall7");
+  }
+
+  public void zcall8() {
+    System.out.println("Second::zcall8");
+  }
+
+  public void zcall9() {
+    System.out.println("Second::zcall9");
+  }
+}
diff --git a/test/405-optimizing-long-allocator/expected.txt b/test/405-optimizing-long-allocator/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/405-optimizing-long-allocator/expected.txt
diff --git a/test/405-optimizing-long-allocator/info.txt b/test/405-optimizing-long-allocator/info.txt
new file mode 100644
index 0000000..b6b31ae
--- /dev/null
+++ b/test/405-optimizing-long-allocator/info.txt
@@ -0,0 +1 @@
+Tests with long for the optimizing compiler's register allocator.
diff --git a/test/405-optimizing-long-allocator/src/Main.java b/test/405-optimizing-long-allocator/src/Main.java
new file mode 100644
index 0000000..9fd840b
--- /dev/null
+++ b/test/405-optimizing-long-allocator/src/Main.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Note that $opt$ is a marker for the optimizing compiler to ensure
+// it compiles these methods.
+
+public class Main {
+  public static void main(String[] args) {
+
+    expectEquals(4, $opt$TestLostCopy());
+    expectEquals(-10, $opt$TestTwoLive());
+    expectEquals(-20, $opt$TestThreeLive());
+    expectEquals(5, $opt$TestFourLive());
+    expectEquals(10, $opt$TestMultipleLive());
+    expectEquals(1, $opt$TestWithBreakAndContinue());
+    expectEquals(-15, $opt$testSpillInIf(5, 6, 7));
+    expectEquals(-567, $opt$TestAgressiveLive1(1, 2, 3, 4, 5, 6, 7));
+    expectEquals(-77, $opt$TestAgressiveLive2(1, 2, 3, 4, 5, 6, 7));
+
+    expectEquals(-55834574850L, $opt$testSpillInIf(5, 6L << 32, 7L << 32));
+    expectEquals(-73014444553L, $opt$TestAgressiveLive1(
+        1L << 32, (1L << 32) + 1, 3L << 32, 4L << 32, 5L << 32, 6L << 32, (1L << 32) + 2));
+    expectEquals(-124554051632L, $opt$TestAgressiveLive2(
+        1L << 32, (1L << 32) + 1, 3L << 32, 4L << 32, 5L << 32, 6L << 32, 7L << 32));
+  }
+
+  public static long $opt$TestLostCopy() {
+    long a = 0;
+    long b = 0;
+    do {
+      b = a;
+      a++;
+    } while (a != 5);
+    return b;
+  }
+
+  public static long $opt$TestTwoLive() {
+    long a = 0;
+    long b = 0;
+    do {
+      a++;
+      b += 3;
+    } while (a != 5);
+    return a - b;
+  }
+
+  public static long $opt$TestThreeLive() {
+    long a = 0;
+    long b = 0;
+    long c = 0;
+    do {
+      a++;
+      b += 3;
+      c += 2;
+    } while (a != 5);
+    return a - b - c;
+  }
+
+  public static long $opt$TestFourLive() {
+    long a = 0;
+    long b = 0;
+    long c = 0;
+    long d = 0;
+    do {
+      a++;
+      b += 3;
+      c += 2;
+      d++;
+    } while (a != 5);
+    return d;
+  }
+
+  public static long $opt$TestMultipleLive() {
+    long a = 0;
+    long b = 0;
+    long c = 0;
+    long d = 0;
+    long e = 0;
+    long f = 0;
+    long g = 0;
+    do {
+      a++;
+      b++;
+      c++;
+      d++;
+      e += 3;
+      f += 2;
+      g += 2;
+    } while (a != 5);
+    return f;
+  }
+
+  public static long $opt$TestWithBreakAndContinue() {
+    long a = 0;
+    long b = 0;
+    do {
+      a++;
+      if (a == 2) {
+        continue;
+      }
+      b++;
+      if (a == 5) {
+        break;
+      }
+    } while (true);
+    return a - b;
+  }
+
+  public static long $opt$testSpillInIf(long a, long b, long c) {
+    long d = 0;
+    long e = 0;
+    if (a == 5) {
+      b++;
+      c++;
+      d += 2;
+      e += 3;
+    }
+
+    return a - b - c - d - e;
+  }
+
+  public static long $opt$TestAgressiveLive1(long a, long b, long c, long d, long e, long f, long g) {
+    long h = a - b;
+    long i = c - d;
+    long j = e - f;
+    long k = 42 + g - a;
+    do {
+      b++;
+      while (k != 1) {
+        --k;
+        ++i;
+        if (i == 9) {
+          ++i;
+        }
+        j += 5;
+      }
+      k = 9;
+      h++;
+    } while (h != 5);
+    return a - b - c - d - e - f - g - h - i - j - k;
+  }
+
+  public static long $opt$TestAgressiveLive2(long a, long b, long c, long d, long e, long f, long g) {
+    long h = a - b;
+    long i = c - d;
+    long j = e - f;
+    long k = 42 + g - a;
+    do {
+      h++;
+    } while (h != 5);
+    return a - b - c - d - e - f - g - h - i - j - k;
+  }
+
+  public static void expectEquals(long expected, long value) {
+    if (expected != value) {
+      throw new Error("Expected: " + expected + ", got: " + value);
+    }
+  }
+}
diff --git a/test/Android.oat.mk b/test/Android.oat.mk
index 3cf9f61..a560a17 100644
--- a/test/Android.oat.mk
+++ b/test/Android.oat.mk
@@ -86,7 +86,7 @@
 	$(hide) adb shell touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$(LOCAL_PID)
 	$(hide) adb shell rm $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$(LOCAL_PID)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
-	  adb shell sh -c "/system/bin/dalvikvm$($(2)ART_PHONY_TEST_TARGET_SUFFIX) \
+	  adb shell "/system/bin/dalvikvm$($(2)ART_PHONY_TEST_TARGET_SUFFIX) \
 	    $(DALVIKVM_FLAGS) $(4) -XXlib:libartd.so -Ximage:$(ART_TARGET_TEST_DIR)/core.art \
 	    -classpath $(ART_TARGET_TEST_DIR)/art-oat-test-$(1).jar \
 	    -Djava.library.path=$(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH) $(1) \
@@ -118,9 +118,6 @@
   $(call define-test-art-oat-rule-target,$(1),$(2),$$(optimizing_test_rule), \
     -Xcompiler-option --compiler-backend=Optimizing)
 
-  # Mark all tests with the optimizing compiler broken. TODO: fix.
-  ART_TEST_KNOWN_BROKEN += $$(optimizing_test_rule)
-
   ART_TEST_TARGET_OAT_OPTIMIZING$$($(2)ART_PHONY_TEST_TARGET_SUFFIX)_RULES += $$(optimizing_test_rule)
   ART_TEST_TARGET_OAT_OPTIMIZING_RULES += $$(optimizing_test_rule)
   ART_TEST_TARGET_OAT_OPTIMIZING_$(1)_RULES += $$(optimizing_test_rule)
@@ -166,14 +163,14 @@
 # All tests require the host executables, libarttest and the core images.
 ART_TEST_HOST_OAT_DEPENDENCIES := \
   $(ART_HOST_EXECUTABLES) \
-  $(ART_HOST_LIBRARY_PATH)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
-  $(ART_HOST_LIBRARY_PATH)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(HOST_CORE_IMG_OUT)
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_OAT_DEPENDENCIES += \
-  $(2ND_ART_HOST_LIBRARY_PATH)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
-  $(2ND_ART_HOST_LIBRARY_PATH)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_HOST_CORE_IMG_OUT)
 endif
 
@@ -234,9 +231,6 @@
   optimizing_test_rule := test-art-host-oat-optimizing-$(1)$$($(2)ART_PHONY_TEST_HOST_SUFFIX)
   $(call define-test-art-oat-rule-host,$(1),$(2),$$(optimizing_test_rule),--compiler-backend=Optimizing,)
 
-  # Mark all tests with the optimizing compiler broken. TODO: fix.
-  ART_TEST_KNOWN_BROKEN += $$(optimizing_test_rule)
-
   ART_TEST_HOST_OAT_OPTIMIZING$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += $$(optimizing_test_rule)
   ART_TEST_HOST_OAT_OPTIMIZING_RULES += $$(optimizing_test_rule)
   ART_TEST_HOST_OAT_OPTIMIZING_$(1)_RULES += $$(optimizing_test_rule)
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c2ff98f..25bcf0a 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -109,12 +109,12 @@
 # All tests require the host executables and the core images.
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \
   $(ART_HOST_EXECUTABLES) \
-  $(ART_HOST_LIBRARY_PATH)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(HOST_CORE_IMG_OUT)
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \
-  $(2ND_ART_HOST_LIBRARY_PATH)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_HOST_CORE_IMG_OUT)
 endif
 
@@ -177,11 +177,6 @@
 	  echo "run-test run as top-level target, removing test directory $(ART_HOST_TEST_DIR)" && \
 	  rm -r $(ART_HOST_TEST_DIR)) || true
 
-  # Mark all tests with the optimizing compiler broken. TODO: fix.
-  ifeq ($(3),optimizing)
-    ART_TEST_KNOWN_BROKEN += $$(run_test_rule_name)
-  endif
-
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)$(4)_RULES += $$(run_test_rule_name)
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)_RULES += $$(run_test_rule_name)
   ART_TEST_$$(uc_host_or_target)_RUN_TEST_$$(uc_compiler)_$(1)_RULES += $$(run_test_rule_name)
diff --git a/tools/art b/tools/art
index e3f409c..85517d3 100755
--- a/tools/art
+++ b/tools/art
@@ -47,12 +47,23 @@
 ANDROID_BUILD_TOP="$(cd "${PROG_DIR}/../../../../" ; pwd -P)/"
 ANDROID_HOST_OUT=$PROG_DIR/..
 ANDROID_DATA=$PWD/android-data$$
+DALVIKVM_EXECUTABLE=$ANDROID_HOST_OUT/bin/dalvikvm
+
+function find_libdir() {
+  if [ "$(readlink "$DALVIKVM_EXECUTABLE")" = "dalvikvm64" ]; then
+    echo "lib64"
+  else
+    echo "lib"
+  fi
+}
+
+LD_LIBRARY_PATH=$ANDROID_HOST_OUT/"$(find_libdir)"
 
 mkdir -p $ANDROID_DATA/dalvik-cache/{x86,x86_64}
 ANDROID_DATA=$ANDROID_DATA \
   ANDROID_ROOT=$ANDROID_HOST_OUT \
-  LD_LIBRARY_PATH=$ANDROID_HOST_OUT/lib \
-  $invoke_with $ANDROID_HOST_OUT/bin/dalvikvm $lib \
+  LD_LIBRARY_PATH=$LD_LIBRARY_PATH \
+  $invoke_with $DALVIKVM_EXECUTABLE $lib \
     -Ximage:$ANDROID_HOST_OUT/framework/core.art \
      "$@"
 EXIT_STATUS=$?
diff --git a/tools/generate-operator-out.py b/tools/generate-operator-out.py
index 56b8674..6baa6e3 100755
--- a/tools/generate-operator-out.py
+++ b/tools/generate-operator-out.py
@@ -23,11 +23,12 @@
 import sys
 
 
-_ENUM_START_RE = re.compile(r'\benum\b\s+(\S+)\s+:?.*\{')
+_ENUM_START_RE = re.compile(r'\benum\b\s+(class\s+)?(\S+)\s+:?.*\{(\s+// private)?')
 _ENUM_VALUE_RE = re.compile(r'([A-Za-z0-9_]+)(.*)')
 _ENUM_END_RE = re.compile(r'^\s*\};$')
 _ENUMS = {}
 _NAMESPACES = {}
+_ENUM_CLASSES = {}
 
 def Confused(filename, line_number, line):
   sys.stderr.write('%s:%d: confused by:\n%s\n' % (filename, line_number, line))
@@ -38,7 +39,9 @@
 def ProcessFile(filename):
   lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
   in_enum = False
+  is_enum_class = False
   line_number = 0
+  
 
   namespaces = []
   enclosing_classes = []
@@ -51,11 +54,18 @@
       m = _ENUM_START_RE.search(raw_line)
       if m:
         # Yes, so add an empty entry to _ENUMS for this enum.
-        enum_name = m.group(1)
+        
+        # Except when it's private
+        if m.group(3) is not None:
+          continue
+        
+        is_enum_class = m.group(1) is not None
+        enum_name = m.group(2)
         if len(enclosing_classes) > 0:
           enum_name = '::'.join(enclosing_classes) + '::' + enum_name
         _ENUMS[enum_name] = []
         _NAMESPACES[enum_name] = '::'.join(namespaces)
+        _ENUM_CLASSES[enum_name] = is_enum_class
         in_enum = True
         continue
 
@@ -139,7 +149,10 @@
       Confused(filename, line_number, raw_line)
 
     if len(enclosing_classes) > 0:
-      enum_value = '::'.join(enclosing_classes) + '::' + enum_value
+      if is_enum_class:
+        enum_value = enum_name + '::' + enum_value
+      else:
+        enum_value = '::'.join(enclosing_classes) + '::' + enum_value
 
     _ENUMS[enum_name].append((enum_value, enum_text))
 
@@ -170,7 +183,8 @@
     print '  switch (rhs) {'
     for (enum_value, enum_text) in _ENUMS[enum_name]:
       print '    case %s: os << "%s"; break;' % (enum_value, enum_text)
-    print '    default: os << "%s[" << static_cast<int>(rhs) << "]"; break;' % enum_name
+    if not _ENUM_CLASSES[enum_name]:
+      print '    default: os << "%s[" << static_cast<int>(rhs) << "]"; break;' % enum_name
     print '  }'
     print '  return os;'
     print '}'