Merge "Visit methods in stack frames during root visits."
diff --git a/Android.mk b/Android.mk
index 2196d59..57185ca 100644
--- a/Android.mk
+++ b/Android.mk
@@ -151,7 +151,7 @@
 # host test targets
 
 .PHONY: test-art-host-vixl
-ifneq ($(BUILD_HOST_64bit),)
+ifneq ($(HOST_IS_64_BIT),)
 test-art-host-vixl: $(ANDROID_HOST_OUT)/bin/cctest_vixl
 	$(ANDROID_HOST_OUT)/bin/cctest_vixl --run_all
 	@echo vixl PASSED
diff --git a/build/Android.common.mk b/build/Android.common.mk
index c108558..cc600bd 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -37,11 +37,7 @@
 ART_BUILD_HOST_NDEBUG ?= $(WITH_HOST_DALVIK)
 ART_BUILD_HOST_DEBUG ?= $(WITH_HOST_DALVIK)
 
-ifeq ($(BUILD_HOST_64bit),)
-ART_HOST_ARCH := x86
-else
-ART_HOST_ARCH := x86_64
-endif
+ART_HOST_ARCH := $(HOST_ARCH)
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),false)
 $(info Disabling ART_BUILD_TARGET_NDEBUG)
@@ -254,7 +250,9 @@
   ifeq ($(TARGET_CPU_SMP),false)
     ART_TARGET_CFLAGS += -DANDROID_SMP=0
   else
-    $(error TARGET_CPU_SMP must be (true|false), found $(TARGET_CPU_SMP))
+    $(warning TARGET_CPU_SMP should be (true|false), found $(TARGET_CPU_SMP))
+    # Make sure we emit barriers for the worst case.
+    ART_TARGET_CFLAGS += -DANDROID_SMP=1
   endif
 endif
 ART_TARGET_CFLAGS += $(ART_DEFAULT_GC_TYPE_CFLAGS)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 406c2a1..e9db47e 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -79,6 +79,7 @@
 	compiler/optimizing/codegen_test.cc \
 	compiler/optimizing/dominator_test.cc \
 	compiler/optimizing/find_loops_test.cc \
+	compiler/optimizing/linearize_test.cc \
 	compiler/optimizing/liveness_test.cc \
 	compiler/optimizing/pretty_printer_test.cc \
 	compiler/optimizing/ssa_test.cc \
@@ -249,3 +250,9 @@
     $(foreach file,$(COMPILER_GTEST_HOST_SRC_FILES), $(eval $(call build-art-test,host,$(file),art/compiler,libartd-compiler)))
   endif
 endif
+
+# Used outside the art project to get a list of the current tests
+RUNTIME_TARGET_GTEST_MAKE_TARGETS :=
+$(foreach file, $(RUNTIME_GTEST_TARGET_SRC_FILES), $(eval RUNTIME_TARGET_GTEST_MAKE_TARGETS += $$(notdir $$(basename $$(file)))))
+COMPILER_TARGET_GTEST_MAKE_TARGETS :=
+$(foreach file, $(COMPILER_GTEST_TARGET_SRC_FILES), $(eval COMPILER_TARGET_GTEST_MAKE_TARGETS += $$(notdir $$(basename $$(file)))))
diff --git a/build/Android.libcxx.mk b/build/Android.libcxx.mk
index 3dd1eb7..f84e957 100644
--- a/build/Android.libcxx.mk
+++ b/build/Android.libcxx.mk
@@ -14,7 +14,10 @@
 # limitations under the License.
 #
 
+LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.libcxx.mk
+
 ifneq ($(LOCAL_IS_HOST_MODULE),true)
   include external/stlport/libstlport.mk
+  LOCAL_CFLAGS += -DART_WITH_STLPORT=1
   # include external/libcxx/libcxx.mk
 endif
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 844b53c..c3c9961 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -22,7 +22,7 @@
 
 #include "instruction_set.h"
 #include "utils.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace llvm {
   class Function;
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 79a85db..76838d7 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -111,7 +111,7 @@
       mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     case kX86_64:
-      mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
+      mir_to_lir = X86_64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     default:
       LOG(FATAL) << "Unexpected instruction set: " << cu->instruction_set;
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 05ab8ca..5b4492f 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -126,6 +126,104 @@
   kMirOpCheck,
   kMirOpCheckPart2,
   kMirOpSelect,
+
+  // Vector opcodes:
+  // TypeSize is an encoded field giving the element type and the vector size.
+  // It is encoded as OpSize << 16 | (number of bits in vector)
+  //
+  // Destination and source are integers that will be interpreted by the
+  // backend that supports Vector operations.  Backends are permitted to support only
+  // certain vector register sizes.
+  //
+  // At this point, only two operand instructions are supported.  Three operand instructions
+  // could be supported by using a bit in TypeSize and arg[0] where needed.
+
+  // @brief MIR to move constant data to a vector register
+  // vA: number of bits in register
+  // vB: destination
+  // args[0]~args[3]: up to 128 bits of data for initialization
+  kMirOpConstVector,
+
+  // @brief MIR to move a vectorized register to another
+  // vA: TypeSize
+  // vB: destination
+  // vC: source
+  kMirOpMoveVector,
+
+  // @brief Packed multiply of units in two vector registers: vB = vB .* vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedMultiply,
+
+  // @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedAddition,
+
+  // @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedSubtract,
+
+  // @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: immediate
+  kMirOpPackedShiftLeft,
+
+  // @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: immediate
+  kMirOpPackedSignedShiftRight,
+
+  // @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: immediate
+  kMirOpPackedUnsignedShiftRight,
+
+  // @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedAnd,
+
+  // @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedOr,
+
+  // @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
+  // vA: TypeSize
+  // vB: destination and source
+  // vC: source
+  kMirOpPackedXor,
+
+  // @brief Reduce a 128-bit packed element into a single VR by taking lower bits
+  // @details Instruction does a horizontal addition of the packed elements and then adds it to VR
+  // vA: TypeSize
+  // vB: destination and source VR (not vector register)
+  // vC: source (vector register)
+  kMirOpPackedAddReduce,
+
+  // @brief Extract a packed element into a single VR.
+  // vA: TypeSize
+  // vB: destination VR (not vector register)
+  // vC: source (vector register)
+  // arg[0]: The index to use for extraction from vector register (which packed element)
+  kMirOpPackedReduce,
+
+  // @brief Create a vector value, with all TypeSize values equal to vC
+  // vA: TypeSize
+  // vB: destination vector register
+  // vC: source VR (not vector register)
+  kMirOpPackedSet,
+
   kMirOpLast,
 };
 
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index a224de2..3bc060b 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -147,8 +147,8 @@
     // Instruction::MOVE_RESULT,
     // Instruction::MOVE_RESULT_WIDE,
     // Instruction::MOVE_RESULT_OBJECT,
-    // Instruction::MOVE_EXCEPTION,
-    // Instruction::RETURN_VOID,
+    Instruction::MOVE_EXCEPTION,
+    Instruction::RETURN_VOID,
     // Instruction::RETURN,
     // Instruction::RETURN_WIDE,
     // Instruction::RETURN_OBJECT,
@@ -163,6 +163,283 @@
     // Instruction::CONST_STRING,
     // Instruction::CONST_STRING_JUMBO,
     // Instruction::CONST_CLASS,
+    Instruction::MONITOR_ENTER,
+    Instruction::MONITOR_EXIT,
+    // Instruction::CHECK_CAST,
+    // Instruction::INSTANCE_OF,
+    // Instruction::ARRAY_LENGTH,
+    // Instruction::NEW_INSTANCE,
+    // Instruction::NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY,
+    // Instruction::FILLED_NEW_ARRAY_RANGE,
+    // Instruction::FILL_ARRAY_DATA,
+    Instruction::THROW,
+    // Instruction::GOTO,
+    // Instruction::GOTO_16,
+    // Instruction::GOTO_32,
+    // Instruction::PACKED_SWITCH,
+    // Instruction::SPARSE_SWITCH,
+    // Instruction::CMPL_FLOAT,
+    // Instruction::CMPG_FLOAT,
+    // Instruction::CMPL_DOUBLE,
+    // Instruction::CMPG_DOUBLE,
+    // Instruction::CMP_LONG,
+    // Instruction::IF_EQ,
+    // Instruction::IF_NE,
+    // Instruction::IF_LT,
+    // Instruction::IF_GE,
+    // Instruction::IF_GT,
+    // Instruction::IF_LE,
+    // Instruction::IF_EQZ,
+    // Instruction::IF_NEZ,
+    // Instruction::IF_LTZ,
+    // Instruction::IF_GEZ,
+    // Instruction::IF_GTZ,
+    // Instruction::IF_LEZ,
+    // Instruction::UNUSED_3E,
+    // Instruction::UNUSED_3F,
+    // Instruction::UNUSED_40,
+    // Instruction::UNUSED_41,
+    // Instruction::UNUSED_42,
+    // Instruction::UNUSED_43,
+    // Instruction::AGET,
+    // Instruction::AGET_WIDE,
+    // Instruction::AGET_OBJECT,
+    // Instruction::AGET_BOOLEAN,
+    // Instruction::AGET_BYTE,
+    // Instruction::AGET_CHAR,
+    // Instruction::AGET_SHORT,
+    // Instruction::APUT,
+    // Instruction::APUT_WIDE,
+    // Instruction::APUT_OBJECT,
+    // Instruction::APUT_BOOLEAN,
+    // Instruction::APUT_BYTE,
+    // Instruction::APUT_CHAR,
+    // Instruction::APUT_SHORT,
+    // Instruction::IGET,
+    // Instruction::IGET_WIDE,
+    // Instruction::IGET_OBJECT,
+    // Instruction::IGET_BOOLEAN,
+    // Instruction::IGET_BYTE,
+    // Instruction::IGET_CHAR,
+    // Instruction::IGET_SHORT,
+    // Instruction::IPUT,
+    // Instruction::IPUT_WIDE,
+    // Instruction::IPUT_OBJECT,
+    // Instruction::IPUT_BOOLEAN,
+    // Instruction::IPUT_BYTE,
+    // Instruction::IPUT_CHAR,
+    // Instruction::IPUT_SHORT,
+    Instruction::SGET,
+    // Instruction::SGET_WIDE,
+    Instruction::SGET_OBJECT,
+    // Instruction::SGET_BOOLEAN,
+    // Instruction::SGET_BYTE,
+    // Instruction::SGET_CHAR,
+    // Instruction::SGET_SHORT,
+    Instruction::SPUT,
+    // Instruction::SPUT_WIDE,
+    // Instruction::SPUT_OBJECT,
+    // Instruction::SPUT_BOOLEAN,
+    // Instruction::SPUT_BYTE,
+    // Instruction::SPUT_CHAR,
+    // Instruction::SPUT_SHORT,
+    Instruction::INVOKE_VIRTUAL,
+    Instruction::INVOKE_SUPER,
+    Instruction::INVOKE_DIRECT,
+    Instruction::INVOKE_STATIC,
+    Instruction::INVOKE_INTERFACE,
+    // Instruction::RETURN_VOID_BARRIER,
+    // Instruction::INVOKE_VIRTUAL_RANGE,
+    // Instruction::INVOKE_SUPER_RANGE,
+    // Instruction::INVOKE_DIRECT_RANGE,
+    // Instruction::INVOKE_STATIC_RANGE,
+    // Instruction::INVOKE_INTERFACE_RANGE,
+    // Instruction::UNUSED_79,
+    // Instruction::UNUSED_7A,
+    // Instruction::NEG_INT,
+    // Instruction::NOT_INT,
+    // Instruction::NEG_LONG,
+    // Instruction::NOT_LONG,
+    // Instruction::NEG_FLOAT,
+    // Instruction::NEG_DOUBLE,
+    // Instruction::INT_TO_LONG,
+    // Instruction::INT_TO_FLOAT,
+    // Instruction::INT_TO_DOUBLE,
+    // Instruction::LONG_TO_INT,
+    // Instruction::LONG_TO_FLOAT,
+    // Instruction::LONG_TO_DOUBLE,
+    // Instruction::FLOAT_TO_INT,
+    // Instruction::FLOAT_TO_LONG,
+    // Instruction::FLOAT_TO_DOUBLE,
+    // Instruction::DOUBLE_TO_INT,
+    // Instruction::DOUBLE_TO_LONG,
+    // Instruction::DOUBLE_TO_FLOAT,
+    // Instruction::INT_TO_BYTE,
+    // Instruction::INT_TO_CHAR,
+    // Instruction::INT_TO_SHORT,
+    // Instruction::ADD_INT,
+    // Instruction::SUB_INT,
+    // Instruction::MUL_INT,
+    // Instruction::DIV_INT,
+    // Instruction::REM_INT,
+    // Instruction::AND_INT,
+    // Instruction::OR_INT,
+    // Instruction::XOR_INT,
+    // Instruction::SHL_INT,
+    // Instruction::SHR_INT,
+    // Instruction::USHR_INT,
+    // Instruction::ADD_LONG,
+    // Instruction::SUB_LONG,
+    // Instruction::MUL_LONG,
+    // Instruction::DIV_LONG,
+    // Instruction::REM_LONG,
+    // Instruction::AND_LONG,
+    // Instruction::OR_LONG,
+    // Instruction::XOR_LONG,
+    // Instruction::SHL_LONG,
+    // Instruction::SHR_LONG,
+    // Instruction::USHR_LONG,
+    // Instruction::ADD_FLOAT,
+    // Instruction::SUB_FLOAT,
+    // Instruction::MUL_FLOAT,
+    // Instruction::DIV_FLOAT,
+    // Instruction::REM_FLOAT,
+    // Instruction::ADD_DOUBLE,
+    // Instruction::SUB_DOUBLE,
+    // Instruction::MUL_DOUBLE,
+    // Instruction::DIV_DOUBLE,
+    // Instruction::REM_DOUBLE,
+    // Instruction::ADD_INT_2ADDR,
+    // Instruction::SUB_INT_2ADDR,
+    // Instruction::MUL_INT_2ADDR,
+    // Instruction::DIV_INT_2ADDR,
+    // Instruction::REM_INT_2ADDR,
+    // Instruction::AND_INT_2ADDR,
+    // Instruction::OR_INT_2ADDR,
+    // Instruction::XOR_INT_2ADDR,
+    // Instruction::SHL_INT_2ADDR,
+    // Instruction::SHR_INT_2ADDR,
+    // Instruction::USHR_INT_2ADDR,
+    // Instruction::ADD_LONG_2ADDR,
+    // Instruction::SUB_LONG_2ADDR,
+    // Instruction::MUL_LONG_2ADDR,
+    // Instruction::DIV_LONG_2ADDR,
+    // Instruction::REM_LONG_2ADDR,
+    // Instruction::AND_LONG_2ADDR,
+    // Instruction::OR_LONG_2ADDR,
+    // Instruction::XOR_LONG_2ADDR,
+    // Instruction::SHL_LONG_2ADDR,
+    // Instruction::SHR_LONG_2ADDR,
+    // Instruction::USHR_LONG_2ADDR,
+    // Instruction::ADD_FLOAT_2ADDR,
+    // Instruction::SUB_FLOAT_2ADDR,
+    // Instruction::MUL_FLOAT_2ADDR,
+    // Instruction::DIV_FLOAT_2ADDR,
+    // Instruction::REM_FLOAT_2ADDR,
+    // Instruction::ADD_DOUBLE_2ADDR,
+    // Instruction::SUB_DOUBLE_2ADDR,
+    // Instruction::MUL_DOUBLE_2ADDR,
+    // Instruction::DIV_DOUBLE_2ADDR,
+    // Instruction::REM_DOUBLE_2ADDR,
+    // Instruction::ADD_INT_LIT16,
+    // Instruction::RSUB_INT,
+    // Instruction::MUL_INT_LIT16,
+    // Instruction::DIV_INT_LIT16,
+    // Instruction::REM_INT_LIT16,
+    // Instruction::AND_INT_LIT16,
+    // Instruction::OR_INT_LIT16,
+    // Instruction::XOR_INT_LIT16,
+    Instruction::ADD_INT_LIT8,
+    // Instruction::RSUB_INT_LIT8,
+    // Instruction::MUL_INT_LIT8,
+    // Instruction::DIV_INT_LIT8,
+    // Instruction::REM_INT_LIT8,
+    // Instruction::AND_INT_LIT8,
+    // Instruction::OR_INT_LIT8,
+    // Instruction::XOR_INT_LIT8,
+    // Instruction::SHL_INT_LIT8,
+    // Instruction::SHR_INT_LIT8,
+    // Instruction::USHR_INT_LIT8,
+    // Instruction::IGET_QUICK,
+    // Instruction::IGET_WIDE_QUICK,
+    // Instruction::IGET_OBJECT_QUICK,
+    // Instruction::IPUT_QUICK,
+    // Instruction::IPUT_WIDE_QUICK,
+    // Instruction::IPUT_OBJECT_QUICK,
+    // Instruction::INVOKE_VIRTUAL_QUICK,
+    // Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
+    // Instruction::UNUSED_EB,
+    // Instruction::UNUSED_EC,
+    // Instruction::UNUSED_ED,
+    // Instruction::UNUSED_EE,
+    // Instruction::UNUSED_EF,
+    // Instruction::UNUSED_F0,
+    // Instruction::UNUSED_F1,
+    // Instruction::UNUSED_F2,
+    // Instruction::UNUSED_F3,
+    // Instruction::UNUSED_F4,
+    // Instruction::UNUSED_F5,
+    // Instruction::UNUSED_F6,
+    // Instruction::UNUSED_F7,
+    // Instruction::UNUSED_F8,
+    // Instruction::UNUSED_F9,
+    // Instruction::UNUSED_FA,
+    // Instruction::UNUSED_FB,
+    // Instruction::UNUSED_FC,
+    // Instruction::UNUSED_FD,
+    // Instruction::UNUSED_FE,
+    // Instruction::UNUSED_FF,
+
+    // ----- ExtendedMIROpcode -----
+    // kMirOpPhi,
+    // kMirOpCopy,
+    // kMirOpFusedCmplFloat,
+    // kMirOpFusedCmpgFloat,
+    // kMirOpFusedCmplDouble,
+    // kMirOpFusedCmpgDouble,
+    // kMirOpFusedCmpLong,
+    // kMirOpNop,
+    // kMirOpNullCheck,
+    // kMirOpRangeCheck,
+    // kMirOpDivZeroCheck,
+    kMirOpCheck,
+    // kMirOpCheckPart2,
+    // kMirOpSelect,
+    // kMirOpLast,
+};
+
+// TODO: Remove this when we are able to compile everything.
+int x86_64_support_list[] = {
+    Instruction::NOP,
+    // Instruction::MOVE,
+    // Instruction::MOVE_FROM16,
+    // Instruction::MOVE_16,
+    // Instruction::MOVE_WIDE,
+    // Instruction::MOVE_WIDE_FROM16,
+    // Instruction::MOVE_WIDE_16,
+    // Instruction::MOVE_OBJECT,
+    // Instruction::MOVE_OBJECT_FROM16,
+    // Instruction::MOVE_OBJECT_16,
+    // Instruction::MOVE_RESULT,
+    // Instruction::MOVE_RESULT_WIDE,
+    // Instruction::MOVE_RESULT_OBJECT,
+    // Instruction::MOVE_EXCEPTION,
+    Instruction::RETURN_VOID,
+    Instruction::RETURN,
+    // Instruction::RETURN_WIDE,
+    Instruction::RETURN_OBJECT,
+    // Instruction::CONST_4,
+    // Instruction::CONST_16,
+    // Instruction::CONST,
+    // Instruction::CONST_HIGH16,
+    // Instruction::CONST_WIDE_16,
+    // Instruction::CONST_WIDE_32,
+    // Instruction::CONST_WIDE,
+    // Instruction::CONST_WIDE_HIGH16,
+    // Instruction::CONST_STRING,
+    // Instruction::CONST_STRING_JUMBO,
+    // Instruction::CONST_CLASS,
     // Instruction::MONITOR_ENTER,
     // Instruction::MONITOR_EXIT,
     // Instruction::CHECK_CAST,
@@ -230,20 +507,20 @@
     // Instruction::IPUT_BYTE,
     // Instruction::IPUT_CHAR,
     // Instruction::IPUT_SHORT,
-    // Instruction::SGET,
+    Instruction::SGET,
     // Instruction::SGET_WIDE,
-    // Instruction::SGET_OBJECT,
-    // Instruction::SGET_BOOLEAN,
-    // Instruction::SGET_BYTE,
-    // Instruction::SGET_CHAR,
-    // Instruction::SGET_SHORT,
-    // Instruction::SPUT,
+    Instruction::SGET_OBJECT,
+    Instruction::SGET_BOOLEAN,
+    Instruction::SGET_BYTE,
+    Instruction::SGET_CHAR,
+    Instruction::SGET_SHORT,
+    Instruction::SPUT,
     // Instruction::SPUT_WIDE,
-    // Instruction::SPUT_OBJECT,
-    // Instruction::SPUT_BOOLEAN,
-    // Instruction::SPUT_BYTE,
-    // Instruction::SPUT_CHAR,
-    // Instruction::SPUT_SHORT,
+    Instruction::SPUT_OBJECT,
+    Instruction::SPUT_BOOLEAN,
+    Instruction::SPUT_BYTE,
+    Instruction::SPUT_CHAR,
+    Instruction::SPUT_SHORT,
     Instruction::INVOKE_VIRTUAL,
     Instruction::INVOKE_SUPER,
     Instruction::INVOKE_DIRECT,
@@ -409,29 +686,39 @@
     // kMirOpLast,
 };
 
+// Z : boolean
+// B : byte
+// S : short
+// C : char
+// I : int
+// L : long
+// F : float
+// D : double
+// L : reference(object, array)
+// V : void
+// (ARM64) Current calling conversion only support 32bit softfp
+//         which has problems with long, float, double
+constexpr char arm64_supported_types[] = "ZBSCILV";
+// (x84_64) We still have troubles with compiling longs/doubles/floats
+constexpr char x86_64_supported_types[] = "ZBSCILV";
+
 // TODO: Remove this when we are able to compile everything.
-static bool CanCompileShorty(const char* shorty) {
+static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
   uint32_t shorty_size = strlen(shorty);
   CHECK_GE(shorty_size, 1u);
   // Set a limitation on maximum number of parameters.
   // Note : there is an implied "method*" parameter, and probably "this" as well.
   // 1 is for the return type. Currently, we only accept 2 parameters at the most.
+  // (x86_64): For now we have the same limitation. But we might want to split this
+  //           check in future into two separate cases for arm64 and x86_64.
   if (shorty_size > (1 + 2)) {
     return false;
   }
-  // Z : boolean
-  // B : byte
-  // S : short
-  // C : char
-  // I : int
-  // L : long
-  // F : float
-  // D : double
-  // L : reference(object, array)
-  // V : void
-  // Current calling conversion only support 32bit softfp
-  // which has problems with long, float, double
-  constexpr char supported_types[] = "ZBSCILV";
+
+  const char* supported_types = arm64_supported_types;
+  if (instruction_set == kX86_64) {
+    supported_types = x86_64_supported_types;
+  }
   for (uint32_t i = 0; i < shorty_size; i++) {
     if (strchr(supported_types, shorty[i]) == nullptr) {
       return false;
@@ -445,14 +732,21 @@
 static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
                              CompilationUnit& cu) {
   // There is some limitation with current ARM 64 backend.
-  if (cu.instruction_set == kArm64) {
+  if (cu.instruction_set == kArm64 || cu.instruction_set == kX86_64) {
     // Check if we can compile the prototype.
     const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
-    if (!CanCompileShorty(shorty)) {
+    if (!CanCompileShorty(shorty, cu.instruction_set)) {
       VLOG(compiler) << "Unsupported shorty : " << shorty;
       return false;
     }
 
+    const int *support_list = arm64_support_list;
+    int support_list_size = arraysize(arm64_support_list);
+    if (cu.instruction_set == kX86_64) {
+      support_list = x86_64_support_list;
+      support_list_size = arraysize(x86_64_support_list);
+    }
+
     for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) {
       BasicBlock *bb = cu.mir_graph->GetBasicBlock(idx);
       if (bb == NULL) continue;
@@ -460,8 +754,8 @@
       for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
         int opcode = mir->dalvikInsn.opcode;
         // Check if we support the byte code.
-        if (std::find(arm64_support_list, arm64_support_list + arraysize(arm64_support_list),
-            opcode) == arm64_support_list + arraysize(arm64_support_list)) {
+        if (std::find(support_list, support_list + support_list_size,
+            opcode) == support_list + support_list_size) {
           if (opcode < kMirOpFirst) {
             VLOG(compiler) << "Unsupported dalvik byte code : "
                            << mir->dalvikInsn.opcode;
@@ -480,7 +774,7 @@
           uint32_t invoke_method_idx = mir->dalvikInsn.vB;
           const char* invoke_method_shorty = dex_file.GetMethodShorty(
               dex_file.GetMethodId(invoke_method_idx));
-          if (!CanCompileShorty(invoke_method_shorty)) {
+          if (!CanCompileShorty(invoke_method_shorty, cu.instruction_set)) {
             VLOG(compiler) << "Unsupported to invoke '"
                            << PrettyMethod(invoke_method_idx, dex_file)
                            << "' with shorty : " << invoke_method_shorty;
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 6d67afb..7049f8c 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_DEX_LOCAL_VALUE_NUMBERING_H_
 
 #include "compiler_internals.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utils/scoped_arena_allocator.h"
 #include "utils/scoped_arena_containers.h"
 
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index c3b5a25..1c9e2e2 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -23,7 +23,7 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utils/scoped_arena_containers.h"
 
 namespace art {
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index ca90a83..ba4224e 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -45,6 +45,20 @@
   "Check1",
   "Check2",
   "Select",
+  "ConstVector",
+  "MoveVector",
+  "PackedMultiply",
+  "PackedAddition",
+  "PackedSubtract",
+  "PackedShiftLeft",
+  "PackedSignedShiftRight",
+  "PackedUnsignedShiftRight",
+  "PackedAnd",
+  "PackedOr",
+  "PackedXor",
+  "PackedAddReduce",
+  "PackedReduce",
+  "PackedSet",
 };
 
 MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
@@ -798,13 +812,35 @@
                 bb->first_mir_insn ? " | " : " ");
         for (mir = bb->first_mir_insn; mir; mir = mir->next) {
             int opcode = mir->dalvikInsn.opcode;
-            fprintf(file, "    {%04x %s %s %s\\l}%s\\\n", mir->offset,
-                    mir->ssa_rep ? GetDalvikDisassembly(mir) :
-                    (opcode < kMirOpFirst) ?  Instruction::Name(mir->dalvikInsn.opcode) :
-                    extended_mir_op_names_[opcode - kMirOpFirst],
-                    (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ",
-                    (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ",
-                    mir->next ? " | " : " ");
+            if (opcode > kMirOpSelect && opcode < kMirOpLast) {
+              if (opcode == kMirOpConstVector) {
+                fprintf(file, "    {%04x %s %d %d %d %d %d %d\\l}%s\\\n", mir->offset,
+                        extended_mir_op_names_[kMirOpConstVector - kMirOpFirst],
+                        mir->dalvikInsn.vA,
+                        mir->dalvikInsn.vB,
+                        mir->dalvikInsn.arg[0],
+                        mir->dalvikInsn.arg[1],
+                        mir->dalvikInsn.arg[2],
+                        mir->dalvikInsn.arg[3],
+                        mir->next ? " | " : " ");
+              } else {
+                fprintf(file, "    {%04x %s %d %d %d\\l}%s\\\n", mir->offset,
+                        extended_mir_op_names_[opcode - kMirOpFirst],
+                        mir->dalvikInsn.vA,
+                        mir->dalvikInsn.vB,
+                        mir->dalvikInsn.vC,
+                        mir->next ? " | " : " ");
+              }
+            } else {
+              fprintf(file, "    {%04x %s %s %s\\l}%s\\\n", mir->offset,
+                      mir->ssa_rep ? GetDalvikDisassembly(mir) :
+                      (opcode < kMirOpFirst) ?
+                        Instruction::Name(mir->dalvikInsn.opcode) :
+                        extended_mir_op_names_[opcode - kMirOpFirst],
+                      (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ",
+                      (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ",
+                      mir->next ? " | " : " ");
+            }
         }
         fprintf(file, "  }\"];\n\n");
     } else if (bb->block_type == kExceptionHandling) {
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 7ae4b02..f98e366 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -151,6 +151,9 @@
   rxzr = rx31,
   rwsp = rw31,
   rsp = rx31,
+  // TODO: rx4 is an argument register in C ABI which is not a good idea,
+  // But we need to decide to use caller save register in C ABI or callee save register.
+  // Because it will result to different implementation in the trampoline.
   rA64_SUSPEND = rx4,
   rA64_SELF = rx18,
   rA64_SP = rx31,
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 8accd0a..93caf89 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -176,7 +176,7 @@
                  kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10, kFmtBitBlt, 23, 22,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
                  "cmn", "!0R, #!1d!2T", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b20001f),
+    ENCODING_MAP(WIDE(kA64Cmp3Rro), SF_VARIANTS(0x6b00001f),
                  kFmtRegROrSp, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
                  "cmp", "!0R, !1r!2o", kFixupNone),
@@ -637,7 +637,7 @@
               }
 
               // Now check that the requirements are satisfied.
-              RegStorage reg(operand);
+              RegStorage reg(operand | RegStorage::kValid);
               const char *expected = nullptr;
               if (want_float) {
                 if (!reg.IsFloat()) {
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 1bcf19b..136a04f 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -194,137 +194,101 @@
  * details see monitor.cc.
  */
 void Arm64Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
+  // x0/w0 = object
+  // w1    = thin lock thread id
+  // x2    = address of lock word
+  // w3    = lock word / store failure
+  // TUNING: How much performance we get when we inline this?
+  // Since we've already flush all register.
   FlushAllRegs();
-  // FIXME: need separate LoadValues for object references.
-  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
+  LoadValueDirectFixed(rl_src, rs_w0);
   LockCallTemps();  // Prepare for explicit register usage
-  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
-  if (kArchVariantHasGoodBranchPredictor) {
-    LIR* null_check_branch = nullptr;
-    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
-      null_check_branch = nullptr;  // No null check.
-    } else {
-      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (Runtime::Current()->ExplicitNullChecks()) {
-        null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
-      }
-    }
-    Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_x2);
-    NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
-    MarkPossibleNullPointerException(opt_flags);
-    LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
-    NewLIR4(kA64Stxr3wrX, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
-    LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
-
-
-    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
-    not_unlocked_branch->target = slow_path_target;
-    if (null_check_branch != nullptr) {
-      null_check_branch->target = slow_path_target;
-    }
-    // TODO: move to a slow path.
-    // Go expensive route - artLockObjectFromCode(obj);
-    LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_rA64_LR);
-    ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
-    MarkSafepointPC(call_inst);
-
-    LIR* success_target = NewLIR0(kPseudoTargetLabel);
-    lock_success_branch->target = success_target;
-    GenMemBarrier(kLoadLoad);
+  LIR* null_check_branch = nullptr;
+  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+    null_check_branch = nullptr;  // No null check.
   } else {
-    // Explicit null-check as slow-path is entered using an IT.
-    GenNullCheck(rs_x0, opt_flags);
-    Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_x2);
-    MarkPossibleNullPointerException(opt_flags);
-    NewLIR3(kA64Ldxr2rX, rx1, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
-    OpRegImm(kOpCmp, rs_x1, 0);
-    OpIT(kCondEq, "");
-    NewLIR4(kA64Stxr3wrX/*eq*/, rx1, rx2, rx0, mirror::Object::MonitorOffset().Int32Value() >> 2);
-    OpRegImm(kOpCmp, rs_x1, 0);
-    OpIT(kCondNe, "T");
-    // Go expensive route - artLockObjectFromCode(self, obj);
-    LoadWordDisp/*ne*/(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(),
-                       rs_rA64_LR);
-    ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
-    MarkSafepointPC(call_inst);
-    GenMemBarrier(kLoadLoad);
+    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
+    }
   }
+  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
+  OpRegRegImm(kOpAdd, rs_x2, rs_x0, mirror::Object::MonitorOffset().Int32Value());
+  NewLIR2(kA64Ldxr2rX, rw3, rx2);
+  MarkPossibleNullPointerException(opt_flags);
+  LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_x1, 0, NULL);
+  NewLIR3(kA64Stxr3wrX, rw3, rw1, rx2);
+  LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_x1, 0, NULL);
+
+  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+  not_unlocked_branch->target = slow_path_target;
+  if (null_check_branch != nullptr) {
+    null_check_branch->target = slow_path_target;
+  }
+  // TODO: move to a slow path.
+  // Go expensive route - artLockObjectFromCode(obj);
+  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pLockObject).Int32Value(), rs_rA64_LR);
+  ClobberCallerSave();
+  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+  MarkSafepointPC(call_inst);
+
+  LIR* success_target = NewLIR0(kPseudoTargetLabel);
+  lock_success_branch->target = success_target;
+  GenMemBarrier(kLoadLoad);
 }
 
 /*
  * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more
- * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock
+ * details see monitor.cc. Note the code below doesn't use ldxr/stxr as the code holds the lock
  * and can only give away ownership if its suspended.
  */
 void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
+  // x0/w0 = object
+  // w1    = thin lock thread id
+  // w2    = lock word
+  // TUNING: How much performance we get when we inline this?
+  // Since we've already flush all register.
   FlushAllRegs();
-  LoadValueDirectFixed(rl_src, rs_x0);  // Get obj
+  LoadValueDirectFixed(rl_src, rs_w0);  // Get obj
   LockCallTemps();  // Prepare for explicit register usage
   LIR* null_check_branch = nullptr;
-  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_x2);
-  constexpr bool kArchVariantHasGoodBranchPredictor = false;  // TODO: true if cortex-A15.
-  if (kArchVariantHasGoodBranchPredictor) {
-    if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
-      null_check_branch = nullptr;  // No null check.
-    } else {
-      // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
-      if (Runtime::Current()->ExplicitNullChecks()) {
-        null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
-      }
-    }
-    Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1);
-    MarkPossibleNullPointerException(opt_flags);
-    LoadConstantNoClobber(rs_x3, 0);
-    LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_x1, rs_x2, NULL);
-    GenMemBarrier(kStoreLoad);
-    Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
-    LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
-
-    LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
-    slow_unlock_branch->target = slow_path_target;
-    if (null_check_branch != nullptr) {
-      null_check_branch->target = slow_path_target;
-    }
-    // TODO: move to a slow path.
-    // Go expensive route - artUnlockObjectFromCode(obj);
-    LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_rA64_LR);
-    ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
-    MarkSafepointPC(call_inst);
-
-    LIR* success_target = NewLIR0(kPseudoTargetLabel);
-    unlock_success_branch->target = success_target;
+  if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) {
+    null_check_branch = nullptr;  // No null check.
   } else {
-    // Explicit null-check as slow-path is entered using an IT.
-    GenNullCheck(rs_x0, opt_flags);
-    Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x1);  // Get lock
-    MarkPossibleNullPointerException(opt_flags);
-    Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_x2);
-    LoadConstantNoClobber(rs_x3, 0);
-    // Is lock unheld on lock or held by us (==thread_id) on unlock?
-    OpRegReg(kOpCmp, rs_x1, rs_x2);
-    OpIT(kCondEq, "EE");
-    Store32Disp/*eq*/(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_x3);
-    // Go expensive route - UnlockObjectFromCode(obj);
-    LoadWordDisp/*ne*/(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(),
-                       rs_rA64_LR);
-    ClobberCallerSave();
-    LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rA64_LR);
-    MarkSafepointPC(call_inst);
-    GenMemBarrier(kStoreLoad);
+    // If the null-check fails its handled by the slow-path to reduce exception related meta-data.
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      null_check_branch = OpCmpImmBranch(kCondEq, rs_x0, 0, NULL);
+    }
   }
+  Load32Disp(rs_rA64_SELF, Thread::ThinLockIdOffset<8>().Int32Value(), rs_w1);
+  Load32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_w2);
+  MarkPossibleNullPointerException(opt_flags);
+  LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_w1, rs_w2, NULL);
+  GenMemBarrier(kStoreLoad);
+  Store32Disp(rs_x0, mirror::Object::MonitorOffset().Int32Value(), rs_xzr);
+  LIR* unlock_success_branch = OpUnconditionalBranch(NULL);
+
+  LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+  slow_unlock_branch->target = slow_path_target;
+  if (null_check_branch != nullptr) {
+    null_check_branch->target = slow_path_target;
+  }
+  // TODO: move to a slow path.
+  // Go expensive route - artUnlockObjectFromCode(obj);
+  LoadWordDisp(rs_rA64_SELF, QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject).Int32Value(), rs_rA64_LR);
+  ClobberCallerSave();
+  LIR* call_inst = OpReg(kOpBlx, rs_rA64_LR);
+  MarkSafepointPC(call_inst);
+
+  LIR* success_target = NewLIR0(kPseudoTargetLabel);
+  unlock_success_branch->target = success_target;
 }
 
 void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage reset_reg = AllocTemp();
   Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
-  LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rA64_SELF, ex_offset, reset_reg);
-  FreeTemp(reset_reg);
+  Store32Disp(rs_rA64_SELF, ex_offset, rs_xzr);
   StoreValue(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 6caacc8..10be0d6 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -38,16 +38,27 @@
      rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
 static const RegStorage dp_regs_arr[] =
     {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
-     rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15};
+     rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
+     rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
+     rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
 static const RegStorage reserved_regs_arr[] =
     {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR};
+// TUING: Are there too many temp registers and too less promote target?
+// This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler
+// Note: we are not able to call to C function directly if it un-match C ABI.
+// Currently, rs_rA64_SELF is not a callee save register which does not match C ABI.
 static const RegStorage core_temps_arr[] =
-    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x12};
+    {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
+     rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16,
+     rs_x17};
 static const RegStorage sp_temps_arr[] =
     {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
-     rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15};
+     rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
+     rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31};
 static const RegStorage dp_temps_arr[] =
-    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7};
+    {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7,
+     rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
+     rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
 
 static const std::vector<RegStorage> core_regs(core_regs_arr,
     core_regs_arr + arraysize(core_regs_arr));
@@ -877,12 +888,13 @@
   rl_src.home = false;
   MarkLive(rl_src);
 
-  // TODO(Arm64): compress the Method pointer?
-  StoreValueWide(rl_method, rl_src);
+  // rl_method might be 32-bit, but ArtMethod* on stack is 64-bit, so always flush it.
+  StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
 
-  // If Method* has been promoted, explicitly flush
+  // If Method* has been promoted, load it,
+  // otherwise, rl_method is the 32-bit value on [sp], and has already been loaded.
   if (rl_method.location == kLocPhysReg) {
-    StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+    StoreValue(rl_method, rl_src);
   }
 
   if (cu_->num_ins == 0) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 784dfaf..6f81238 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1203,4 +1203,8 @@
   return loc;
 }
 
+void Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
+  LOG(FATAL) << "Unknown MIR opcode not supported on this architecture";
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 2c4ca88..10c2459 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -975,7 +975,18 @@
     case kMirOpSelect:
       GenSelect(bb, mir);
       break;
+    case kMirOpPhi:
+    case kMirOpNop:
+    case kMirOpNullCheck:
+    case kMirOpRangeCheck:
+    case kMirOpDivZeroCheck:
+    case kMirOpCheck:
+    case kMirOpCheckPart2:
+      // Ignore these known opcodes
+      break;
     default:
+      // Give the backends a chance to handle unknown extended MIR opcodes.
+      GenMachineSpecificExtendedMethodMIR(bb, mir);
       break;
   }
 }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 836d2ac..3201b60 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -174,6 +174,8 @@
                           ArenaAllocator* const arena);
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
+Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                          ArenaAllocator* const arena);
 
 // Utility macros to traverse the LIR list.
 #define NEXT_LIR(lir) (lir->next)
@@ -1178,6 +1180,14 @@
     virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) = 0;
     virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
 
+    /*
+     * @brief Handle Machine Specific MIR Extended opcodes.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is not standard extended MIR.
+     * @note Base class implementation will abort for unknown opcodes.
+     */
+    virtual void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
+
     /**
      * @brief Lowers the kMirOpSelect MIR into LIR.
      * @param bb The basic block in which the MIR is from.
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index b8481e2..9200106 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -63,17 +63,24 @@
 { kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16TI8", "fs:[!0d],!1d" }, \
   \
 { kX86 ## opname ## 32MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 64MR,  kMemReg64,  mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64MR", "[!0r+!1d],!2r" }, \
 { kX86 ## opname ## 32AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 64AR,  kArrayReg64, mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
 { kX86 ## opname ## 32TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32TR", "fs:[!0d],!1r" }, \
 { kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RR", "!0r,!1r" }, \
 { kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,         0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,         0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
 { kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 64RT,  kReg64Thread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RT", "!0r,fs:[!1d]" }, \
 { kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 64RI,  kReg64Imm,            IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
 { kX86 ## opname ## 32MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32MI", "[!0r+!1d],!2d" }, \
 { kX86 ## opname ## 32AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
 { kX86 ## opname ## 32TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32TI", "fs:[!0d],!1d" }, \
 { kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 64RI8, kReg64Imm,            IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64RI8", "!0r,!1d" }, \
 { kX86 ## opname ## 32MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \
 { kX86 ## opname ## 32AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
 { kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32TI8", "fs:[!0d],!1d" }
@@ -164,16 +171,22 @@
   { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" },
 
   { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" },
+  { kX86Mov64MR, kMemReg64,  IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,         0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" },
   { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov64AR, kArrayReg64, IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,        0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" },
   { kX86Mov32RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" },
   { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" },
+  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,         0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" },
   { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,         0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
   { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" },
+  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" },
   { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" },
   { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2d" },
   { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" },
+  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4 }, "Mov64TI", "fs:[!0d],!1d" },
 
   { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
 
@@ -307,6 +320,11 @@
   { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0 }, "FstpsM", "[!0r,!1d]" },
   { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
 
+  EXT_0F_ENCODING_MAP(Mova128,    0x66, 0x6F, REG_DEF0),
+  { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128MR", "[!0r+!1d],!2r" },
+  { kX86Mova128AR, kArrayReg, IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0 }, "Mova128AR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+
   EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
   { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
   { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
@@ -376,7 +394,7 @@
   { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
 };
 
-static size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
+size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
   size_t size = 0;
   if (entry->skeleton.prefix1 > 0) {
     ++size;
@@ -392,8 +410,10 @@
     }
   }
   ++size;  // modrm
-  if (has_sib || RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()) {
+  if (has_sib || RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()
+      || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
     // SP requires a SIB byte.
+    // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
     ++size;
   }
   if (displacement != 0 || RegStorage::RegNum(base) == rs_rBP.GetRegNum()) {
@@ -421,16 +441,19 @@
       return 3;  // 1 byte of opcode + 2 prefixes
     case kRegOpcode:  // lir operands - 0: reg
       return ComputeSize(entry, 0, 0, false) - 1;  // substract 1 for modrm
+    case kReg64:
     case kReg:  // lir operands - 0: reg
       return ComputeSize(entry, 0, 0, false);
     case kMem:  // lir operands - 0: base, 1: disp
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
     case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+    case kMemReg64:
     case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
     case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+    case kArrayReg64:
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
     case kThreadReg:  // lir operands - 0: disp, 1: reg
@@ -443,8 +466,10 @@
       return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
     case kRegArray:   // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
       return ComputeSize(entry, lir->operands[1], lir->operands[4], true);
+    case kReg64Thread:  // lir operands - 0: reg, 1: disp
     case kRegThread:  // lir operands - 0: reg, 1: disp
       return ComputeSize(entry, 0, 0x12345678, false);  // displacement size is always 32bit
+    case kReg64Imm:
     case kRegImm: {  // lir operands - 0: reg, 1: immediate
       size_t size = ComputeSize(entry, 0, 0, false);
       if (entry->skeleton.ax_opcode == 0) {
@@ -551,7 +576,12 @@
 
 void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) {
   if (entry->skeleton.prefix1 != 0) {
-    code_buffer_.push_back(entry->skeleton.prefix1);
+    if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) {
+      // 64 bit adresses by GS, not FS
+      code_buffer_.push_back(THREAD_PREFIX_GS);
+    } else {
+      code_buffer_.push_back(entry->skeleton.prefix1);
+    }
     if (entry->skeleton.prefix2 != 0) {
       code_buffer_.push_back(entry->skeleton.prefix2);
     }
@@ -605,6 +635,19 @@
   }
 }
 
+void X86Mir2Lir::EmitModrmThread(uint8_t reg_or_opcode) {
+  if (Gen64Bit()) {
+    // Absolute adressing for GS access.
+    uint8_t modrm = (0 << 6) | (reg_or_opcode << 3) | rs_rX86_SP.GetRegNum();
+    code_buffer_.push_back(modrm);
+    uint8_t sib = (0/*TIMES_1*/ << 6) | (rs_rX86_SP.GetRegNum() << 3) | rs_rBP.GetRegNum();
+    code_buffer_.push_back(sib);
+  } else {
+    uint8_t modrm = (0 << 6) | (reg_or_opcode << 3) | rs_rBP.GetRegNum();
+    code_buffer_.push_back(modrm);
+  }
+}
+
 void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp) {
   DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8);
   DCHECK_LT(RegStorage::RegNum(base), 8);
@@ -754,8 +797,7 @@
         << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   }
   DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (0 << 6) | (RegStorage::RegNum(reg) << 3) | rs_rBP.GetRegNum();
-  code_buffer_.push_back(modrm);
+  EmitModrmThread(RegStorage::RegNum(reg));
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -810,14 +852,7 @@
 }
 
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  if (entry->skeleton.prefix1 != 0) {
-    code_buffer_.push_back(entry->skeleton.prefix1);
-    if (entry->skeleton.prefix2 != 0) {
-      code_buffer_.push_back(entry->skeleton.prefix2);
-    }
-  } else {
-    DCHECK_EQ(0, entry->skeleton.prefix2);
-  }
+  EmitPrefix(entry);
   if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
     code_buffer_.push_back(entry->skeleton.ax_opcode);
   } else {
@@ -837,8 +872,7 @@
 
 void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) {
   EmitPrefixAndOpcode(entry);
-  uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | rs_rBP.GetRegNum();
-  code_buffer_.push_back(modrm);
+  EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -931,14 +965,7 @@
 }
 
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
-  if (entry->skeleton.prefix1 != 0) {
-    code_buffer_.push_back(entry->skeleton.prefix1);
-    if (entry->skeleton.prefix2 != 0) {
-      code_buffer_.push_back(entry->skeleton.prefix2);
-    }
-  } else {
-    DCHECK_EQ(0, entry->skeleton.prefix2);
-  }
+  EmitPrefix(entry);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
@@ -1075,8 +1102,7 @@
 void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) {
   DCHECK_NE(entry->skeleton.prefix1, 0);
   EmitPrefixAndOpcode(entry);
-  uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | rs_rBP.GetRegNum();
-  code_buffer_.push_back(modrm);
+  EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -1317,6 +1343,7 @@
       case kRegOpcode:  // lir operands - 0: reg
         EmitOpRegOpcode(entry, lir->operands[0]);
         break;
+      case kReg64:
       case kReg:  // lir operands - 0: reg
         EmitOpReg(entry, lir->operands[0]);
         break;
@@ -1326,6 +1353,7 @@
       case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
         EmitOpArray(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]);
         break;
+      case kMemReg64:
       case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
         EmitMemReg(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
@@ -1336,6 +1364,7 @@
         EmitArrayImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
         break;
+      case kArrayReg64:
       case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
         EmitArrayReg(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
@@ -1347,6 +1376,7 @@
         EmitRegArray(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
         break;
+      case kReg64Thread:  // lir operands - 0: reg, 1: disp
       case kRegThread:  // lir operands - 0: reg, 1: disp
         EmitRegThread(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1370,6 +1400,7 @@
         EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                       lir->operands[3]);
         break;
+      case kReg64Imm:
       case kRegImm:  // lir operands - 0: reg, 1: immediate
         EmitRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1482,6 +1513,26 @@
 void X86Mir2Lir::AssignOffsets() {
   int offset = AssignInsnOffsets();
 
+  if (const_vectors_ != nullptr) {
+    /* assign offsets to vector literals */
+
+    // First, get offset to 12 mod 16 to align to 16 byte boundary.
+    // This will ensure that the vector is 16 byte aligned, as the procedure is
+    // always aligned at at 4 mod 16.
+    int align_size = (16-4) - (offset & 0xF);
+    if (align_size < 0) {
+      align_size += 16;
+    }
+
+    offset += align_size;
+
+    // Now assign each literal the right offset.
+    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
+      p->offset = offset;
+      offset += 16;
+    }
+  }
+
   /* Const values have to be word aligned */
   offset = RoundUp(offset, 4);
 
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index cf2b10a..4673cc0 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -156,8 +156,13 @@
   }
   NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec));
   NewLIR2(kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData), rs_rX86_ARG0,
-                          rs_rX86_ARG1, true);
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0,
+                            rs_rX86_ARG1, true);
+  } else {
+    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData), rs_rX86_ARG0,
+                            rs_rX86_ARG1, true);
+  }
 }
 
 void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
@@ -180,7 +185,11 @@
   int ct_offset = Is64BitInstructionSet(cu_->instruction_set) ?
       Thread::CardTableOffset<8>().Int32Value() :
       Thread::CardTableOffset<4>().Int32Value();
-  NewLIR2(kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
+  if (Gen64Bit()) {
+    NewLIR2(kX86Mov64RT, reg_card_base.GetReg(), ct_offset);
+  } else {
+    NewLIR2(kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
+  }
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
   LIR* target = NewLIR0(kPseudoTargetLabel);
@@ -201,8 +210,7 @@
   LockTemp(rs_rX86_ARG2);
 
   /* Build frame, return address already on stack */
-  // TODO: 64 bit.
-  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - 4);
+  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
 
   /*
    * We can safely skip the stack overflow check if we're
@@ -233,7 +241,7 @@
                            false /* MarkSafepointPC */, false /* UseLink */);
         } else {
           m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow),
-                                     false /* MarkSafepointPC */, false /* UseLink */);
+                           false /* MarkSafepointPC */, false /* UseLink */);
         }
       }
 
@@ -248,7 +256,7 @@
     // mov esp, ebp
     // in case a signal comes in that's not using an alternate signal stack and the large frame may
     // have moved us outside of the reserved area at the end of the stack.
-    // cmp rX86_SP, fs:[stack_end_]; jcc throw_slowpath
+    // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
     if (Is64BitInstructionSet(cu_->instruction_set)) {
       OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
     } else {
@@ -286,7 +294,7 @@
   NewLIR0(kPseudoMethodExit);
   UnSpillCoreRegs();
   /* Remove frame except for return address */
-  stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - 4);
+  stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
   NewLIR0(kX86Ret);
 }
 
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index ef8c33c..cc0e1f2 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -24,7 +24,7 @@
 
 class X86Mir2Lir FINAL : public Mir2Lir {
   public:
-    X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
+    X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
 
     // Required for target - codegen helpers.
     bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
@@ -325,10 +325,12 @@
     std::vector<uint8_t>* ReturnCallFrameInformation();
 
   private:
+    size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib);
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
     void EmitPrefixAndOpcode(const X86EncodingMap* entry);
     void EmitDisp(uint8_t base, int disp);
+    void EmitModrmThread(uint8_t reg_or_opcode);
     void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp);
     void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale, int disp);
     void EmitImm(const X86EncodingMap* entry, int imm);
@@ -406,6 +408,22 @@
     bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
 
     /*
+     * @brief Load 128 bit constant into vector register.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirConstVector
+     * @note vA is the TypeSize for the register.
+     * @note vB is the destination XMM register. arg[0..3] are 32 bit constant values.
+     */
+    void GenConst128(BasicBlock* bb, MIR* mir);
+
+    /*
+     * @brief Generate code for a vector opcode.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is a non-standard opcode.
+     */
+    void GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir);
+
+    /*
      * @brief Return the correct x86 opcode for the Dex operation
      * @param op Dex opcode for the operation
      * @param loc Register location of the operand
@@ -578,6 +596,8 @@
      */
     void AnalyzeDoubleUse(RegLocation rl_use);
 
+    bool Gen64Bit() const  { return gen64bit_; }
+
     // Information derived from analysis of MIR
 
     // The compiler temporary for the code address of the method.
@@ -606,6 +626,25 @@
 
     // Epilogue increment of stack pointer.
     LIR* stack_increment_;
+
+    // 64-bit mode
+    bool gen64bit_;
+
+    // The list of const vector literals.
+    LIR *const_vectors_;
+
+    /*
+     * @brief Search for a matching vector literal
+     * @param mir A kMirOpConst128b MIR instruction to match.
+     * @returns pointer to matching LIR constant, or nullptr if not found.
+     */
+    LIR *ScanVectorLiteral(MIR *mir);
+
+    /*
+     * @brief Add a constant vector literal
+     * @param mir A kMirOpConst128b MIR instruction to match.
+     */
+    LIR *AddVectorLiteral(MIR *mir);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 22e554e..aec39ab 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -49,8 +49,13 @@
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
-                                              false);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
+                                                false);
+      } else {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
+                                                false);
+      }
       rl_result = GetReturn(true);
       StoreValue(rl_dest, rl_result);
       return;
@@ -106,8 +111,13 @@
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
-                                              false);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
+                                                false);
+      } else {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
+                                                false);
+      }
       rl_result = GetReturnWide(true);
       StoreValueWide(rl_dest, rl_result);
       return;
@@ -268,10 +278,18 @@
       GenLongToFP(rl_dest, rl_src, false /* is_double */);
       return;
     case Instruction::FLOAT_TO_LONG:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pF2l), rl_dest, rl_src);
+      } else {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
+      }
       return;
     case Instruction::DOUBLE_TO_LONG:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pD2l), rl_dest, rl_src);
+      } else {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
+      }
       return;
     default:
       LOG(INFO) << "Unexpected opcode: " << opcode;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 368234e..fbb1785 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -909,8 +909,13 @@
       }
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
-      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                    new_index, m2l_->TargetReg(kArg1), true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
+                                      new_index, m2l_->TargetReg(kArg1), true);
+      } else {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
+                                      new_index, m2l_->TargetReg(kArg1), true);
+      }
     }
 
    private:
@@ -944,8 +949,13 @@
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
       m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
-      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                    m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
+                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      } else {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
+                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      }
     }
 
    private:
@@ -1390,12 +1400,22 @@
 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
   DCHECK_EQ(kX86_64, cu_->instruction_set);
   X86OpCode opcode = kX86Bkpt;
-  switch (op) {
-  case kOpCmp: opcode = kX86Cmp32RT;  break;
-  case kOpMov: opcode = kX86Mov32RT;  break;
-  default:
-    LOG(FATAL) << "Bad opcode: " << op;
-    break;
+  if (Gen64Bit() && r_dest.Is64BitSolo()) {
+    switch (op) {
+    case kOpCmp: opcode = kX86Cmp64RT;  break;
+    case kOpMov: opcode = kX86Mov64RT;  break;
+    default:
+      LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
+      break;
+    }
+  } else {
+    switch (op) {
+    case kOpCmp: opcode = kX86Cmp32RT;  break;
+    case kOpMov: opcode = kX86Mov32RT;  break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+    }
   }
   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
 }
@@ -1862,8 +1882,8 @@
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
-  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
-    (sizeof(mirror::Class*) * type_idx);
+  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
+    (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
 
   if (rl_method.location == kLocPhysReg) {
     if (use_declaring_class) {
@@ -1917,8 +1937,13 @@
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // Caller function returns Class* in kArg0.
-    CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
-                         type_idx, true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
+    } else {
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
+    }
     OpRegCopy(class_reg, TargetReg(kRet0));
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));
   } else if (use_declaring_class) {
@@ -1931,14 +1956,18 @@
     LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                  class_reg);
     int32_t offset_of_type =
-        mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*)
+        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + (sizeof(mirror::HeapReference<mirror::Class*>)
         * type_idx);
     LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!can_assume_type_is_in_dex_cache) {
       // Need to test presence of type in dex cache at runtime.
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
       // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0.
-      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
+      } else {
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
+      }
       OpRegCopy(TargetReg(kArg2), TargetReg(kRet0));  // Align usage with fast path.
       LoadValueDirectFixed(rl_src, TargetReg(kArg0));  /* Reload Ref. */
       // Rejoin code paths
@@ -1972,7 +2001,11 @@
       branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
     }
     OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
-    OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial));
+    } else {
+      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
+    }
   }
   // TODO: only clobber when type isn't final?
   ClobberCallerSave();
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e3312a2..237c68c 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -26,53 +26,117 @@
 
 namespace art {
 
-static const RegStorage core_regs_arr[] = {
-    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP, rs_rBP, rs_rSI, rs_rDI
+static const RegStorage core_regs_arr_32[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
+};
+static const RegStorage core_regs_arr_64[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
 #ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
 #endif
 };
-static const RegStorage sp_regs_arr[] = {
+static const RegStorage sp_regs_arr_32[] = {
+    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
+};
+static const RegStorage sp_regs_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 #ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
 #endif
 };
-static const RegStorage dp_regs_arr[] = {
+static const RegStorage dp_regs_arr_32[] = {
+    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
+};
+static const RegStorage dp_regs_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 #ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 #endif
 };
-static const RegStorage reserved_regs_arr[] = {rs_rX86_SP};
-static const RegStorage core_temps_arr[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
-static const RegStorage sp_temps_arr[] = {
+static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
+static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
+static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
+static const RegStorage core_temps_arr_64[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
+#ifdef TARGET_REX_SUPPORT
+    rs_r8, rs_r9, rs_r10, rs_r11
+#endif
+};
+static const RegStorage sp_temps_arr_32[] = {
+    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
+};
+static const RegStorage sp_temps_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 #ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
 #endif
 };
-static const RegStorage dp_temps_arr[] = {
+static const RegStorage dp_temps_arr_32[] = {
+    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
+};
+static const RegStorage dp_temps_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 #ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 #endif
 };
 
-static const std::vector<RegStorage> core_regs(core_regs_arr,
-    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
-static const std::vector<RegStorage> sp_regs(sp_regs_arr,
-    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
-static const std::vector<RegStorage> dp_regs(dp_regs_arr,
-    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
-static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
-    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
-static const std::vector<RegStorage> core_temps(core_temps_arr,
-    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr,
-    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr,
-    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
+static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
+    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
+static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
+    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
+static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
+    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
+static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
+    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
+static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
+    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
+static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
+    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
+static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
+    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
+static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
+    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
+static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
+    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
+static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
+    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
+static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
+    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
+static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
+    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
+static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
+    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
+static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
+    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
+
+RegStorage rs_rX86_SP;
+
+X86NativeRegisterPool rX86_ARG0;
+X86NativeRegisterPool rX86_ARG1;
+X86NativeRegisterPool rX86_ARG2;
+X86NativeRegisterPool rX86_ARG3;
+X86NativeRegisterPool rX86_FARG0;
+X86NativeRegisterPool rX86_FARG1;
+X86NativeRegisterPool rX86_FARG2;
+X86NativeRegisterPool rX86_FARG3;
+X86NativeRegisterPool rX86_RET0;
+X86NativeRegisterPool rX86_RET1;
+X86NativeRegisterPool rX86_INVOKE_TGT;
+X86NativeRegisterPool rX86_COUNT;
+
+RegStorage rs_rX86_ARG0;
+RegStorage rs_rX86_ARG1;
+RegStorage rs_rX86_ARG2;
+RegStorage rs_rX86_ARG3;
+RegStorage rs_rX86_FARG0;
+RegStorage rs_rX86_FARG1;
+RegStorage rs_rX86_FARG2;
+RegStorage rs_rX86_FARG3;
+RegStorage rs_rX86_RET0;
+RegStorage rs_rX86_RET1;
+RegStorage rs_rX86_INVOKE_TGT;
+RegStorage rs_rX86_COUNT;
 
 RegLocation X86Mir2Lir::LocCReturn() {
   return x86_loc_c_return;
@@ -478,8 +542,13 @@
 }
 
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
-                                        core_temps, sp_temps, dp_temps);
+  if (Gen64Bit()) {
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, sp_regs_64, dp_regs_64, reserved_regs_64,
+                                        core_temps_64, sp_temps_64, dp_temps_64);
+  } else {
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, sp_regs_32, dp_regs_32, reserved_regs_32,
+                                        core_temps_32, sp_temps_32, dp_temps_32);
+  }
 
   // Target-specific adjustments.
 
@@ -523,11 +592,11 @@
   }
   // Spill mask not including fake return address register
   uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-  int offset = frame_size_ - (4 * num_core_spills_);
+  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
-      offset += 4;
+      offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
 }
@@ -538,11 +607,11 @@
   }
   // Spill mask not including fake return address register
   uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-  int offset = frame_size_ - (4 * num_core_spills_);
+  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
-      offset += 4;
+      offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
 }
@@ -566,27 +635,74 @@
   return RegClassBySize(size);
 }
 
-X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
+X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit)
     : Mir2Lir(cu, mir_graph, arena),
       base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
       method_address_insns_(arena, 100, kGrowableArrayMisc),
       class_type_address_insns_(arena, 100, kGrowableArrayMisc),
       call_method_insns_(arena, 100, kGrowableArrayMisc),
-      stack_decrement_(nullptr), stack_increment_(nullptr) {
+      stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit),
+      const_vectors_(nullptr) {
+  store_method_addr_used_ = false;
   if (kIsDebugBuild) {
     for (int i = 0; i < kX86Last; i++) {
       if (X86Mir2Lir::EncodingMap[i].opcode != i) {
         LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
-            << " is wrong: expecting " << i << ", seeing "
-            << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
+                   << " is wrong: expecting " << i << ", seeing "
+                   << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
       }
     }
   }
+  if (Gen64Bit()) {
+    rs_rX86_SP = rs_rX86_SP_64;
+
+    rs_rX86_ARG0 = rs_rDI;
+    rs_rX86_ARG1 = rs_rSI;
+    rs_rX86_ARG2 = rs_rDX;
+    rs_rX86_ARG3 = rs_rCX;
+    rX86_ARG0 = rDI;
+    rX86_ARG1 = rSI;
+    rX86_ARG2 = rDX;
+    rX86_ARG3 = rCX;
+    // TODO: ARG4(r8), ARG5(r9), floating point args.
+  } else {
+    rs_rX86_SP = rs_rX86_SP_32;
+
+    rs_rX86_ARG0 = rs_rAX;
+    rs_rX86_ARG1 = rs_rCX;
+    rs_rX86_ARG2 = rs_rDX;
+    rs_rX86_ARG3 = rs_rBX;
+    rX86_ARG0 = rAX;
+    rX86_ARG1 = rCX;
+    rX86_ARG2 = rDX;
+    rX86_ARG3 = rBX;
+  }
+  rs_rX86_FARG0 = rs_rAX;
+  rs_rX86_FARG1 = rs_rCX;
+  rs_rX86_FARG2 = rs_rDX;
+  rs_rX86_FARG3 = rs_rBX;
+  rs_rX86_RET0 = rs_rAX;
+  rs_rX86_RET1 = rs_rDX;
+  rs_rX86_INVOKE_TGT = rs_rAX;
+  rs_rX86_COUNT = rs_rCX;
+  rX86_FARG0 = rAX;
+  rX86_FARG1 = rCX;
+  rX86_FARG2 = rDX;
+  rX86_FARG3 = rBX;
+  rX86_RET0 = rAX;
+  rX86_RET1 = rDX;
+  rX86_INVOKE_TGT = rAX;
+  rX86_COUNT = rCX;
 }
 
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena) {
-  return new X86Mir2Lir(cu, mir_graph, arena);
+  return new X86Mir2Lir(cu, mir_graph, arena, false);
+}
+
+Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                          ArenaAllocator* const arena) {
+  return new X86Mir2Lir(cu, mir_graph, arena, true);
 }
 
 // Not used in x86
@@ -724,12 +840,46 @@
   return call;
 }
 
+/*
+ * @brief Enter a 32 bit quantity into a buffer
+ * @param buf buffer.
+ * @param data Data value.
+ */
+
+static void PushWord(std::vector<uint8_t>&buf, int32_t data) {
+  buf.push_back(data & 0xff);
+  buf.push_back((data >> 8) & 0xff);
+  buf.push_back((data >> 16) & 0xff);
+  buf.push_back((data >> 24) & 0xff);
+}
+
 void X86Mir2Lir::InstallLiteralPools() {
   // These are handled differently for x86.
   DCHECK(code_literal_list_ == nullptr);
   DCHECK(method_literal_list_ == nullptr);
   DCHECK(class_literal_list_ == nullptr);
 
+  // Align to 16 byte boundary.  We have implicit knowledge that the start of the method is
+  // on a 4 byte boundary.   How can I check this if it changes (other than aligned loads
+  // will fail at runtime)?
+  if (const_vectors_ != nullptr) {
+    int align_size = (16-4) - (code_buffer_.size() & 0xF);
+    if (align_size < 0) {
+      align_size += 16;
+    }
+
+    while (align_size > 0) {
+      code_buffer_.push_back(0);
+      align_size--;
+    }
+    for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
+      PushWord(code_buffer_, p->operands[0]);
+      PushWord(code_buffer_, p->operands[1]);
+      PushWord(code_buffer_, p->operands[2]);
+      PushWord(code_buffer_, p->operands[3]);
+    }
+  }
+
   // Handle the fixups for methods.
   for (uint32_t i = 0; i < method_address_insns_.Size(); i++) {
       LIR* p = method_address_insns_.Get(i);
@@ -960,18 +1110,6 @@
 }
 
 /*
- * @brief Enter a 32 bit quantity into the FDE buffer
- * @param buf FDE buffer.
- * @param data Data value.
- */
-static void PushWord(std::vector<uint8_t>&buf, int data) {
-  buf.push_back(data & 0xff);
-  buf.push_back((data >> 8) & 0xff);
-  buf.push_back((data >> 16) & 0xff);
-  buf.push_back((data >> 24) & 0xff);
-}
-
-/*
  * @brief Enter an 'advance LOC' into the FDE buffer
  * @param buf FDE buffer.
  * @param increment Amount by which to increase the current location.
@@ -1121,4 +1259,73 @@
   return cfi_info;
 }
 
+void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
+  switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
+    case kMirOpConstVector:
+      GenConst128(bb, mir);
+      break;
+    default:
+      break;
+  }
+}
+
+void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
+  int type_size = mir->dalvikInsn.vA;
+  // We support 128 bit vectors.
+  DCHECK_EQ(type_size & 0xFFFF, 128);
+  int reg = mir->dalvikInsn.vB;
+  DCHECK_LT(reg, 8);
+  uint32_t *args = mir->dalvikInsn.arg;
+  // Check for all 0 case.
+  if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) {
+    NewLIR2(kX86XorpsRR, reg, reg);
+    return;
+  }
+  // Okay, load it from the constant vector area.
+  LIR *data_target = ScanVectorLiteral(mir);
+  if (data_target == nullptr) {
+    data_target = AddVectorLiteral(mir);
+  }
+
+  // Address the start of the method.
+  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+  rl_method = LoadValue(rl_method, kCoreReg);
+
+  // Load the proper value from the literal area.
+  // We don't know the proper offset for the value, so pick one that will force
+  // 4 byte offset.  We will fix this up in the assembler later to have the right
+  // value.
+  LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(),  256 /* bogus */);
+  load->flags.fixup = kFixupLoad;
+  load->target = data_target;
+  SetMemRefType(load, true, kLiteral);
+}
+
+LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
+  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
+  for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
+    if (args[0] == p->operands[0] && args[1] == p->operands[1] &&
+        args[2] == p->operands[2] && args[3] == p->operands[3]) {
+      return p;
+    }
+  }
+  return nullptr;
+}
+
+LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) {
+  LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData));
+  int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
+  new_value->operands[0] = args[0];
+  new_value->operands[1] = args[1];
+  new_value->operands[2] = args[2];
+  new_value->operands[3] = args[3];
+  new_value->next = const_vectors_;
+  if (const_vectors_ == nullptr) {
+    estimated_native_code_size_ += 12;  // Amount needed to align to 16 byte boundary.
+  }
+  estimated_native_code_size_ += 16;  // Space for one vector.
+  const_vectors_ = new_value;
+  return new_value;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 1da4f17..e9592a6 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -130,32 +130,42 @@
   X86OpCode opcode = kX86Bkpt;
   bool byte_imm = IS_SIMM8(value);
   DCHECK(!r_dest_src1.IsFloat());
-  switch (op) {
-    case kOpLsl: opcode = kX86Sal32RI; break;
-    case kOpLsr: opcode = kX86Shr32RI; break;
-    case kOpAsr: opcode = kX86Sar32RI; break;
-    case kOpAdd: opcode = byte_imm ? kX86Add32RI8 : kX86Add32RI; break;
-    case kOpOr:  opcode = byte_imm ? kX86Or32RI8  : kX86Or32RI;  break;
-    case kOpAdc: opcode = byte_imm ? kX86Adc32RI8 : kX86Adc32RI; break;
-    // case kOpSbb: opcode = kX86Sbb32RI; break;
-    case kOpAnd: opcode = byte_imm ? kX86And32RI8 : kX86And32RI; break;
-    case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
-    case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
-    case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
-    case kOpMov:
-      /*
-       * Moving the constant zero into register can be specialized as an xor of the register.
-       * However, that sets eflags while the move does not. For that reason here, always do
-       * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
-       */
-      opcode = kX86Mov32RI;
-      break;
-    case kOpMul:
-      opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
-      return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), value);
-    default:
-      LOG(FATAL) << "Bad case in OpRegImm " << op;
+  if (r_dest_src1.Is64Bit()) {
+    switch (op) {
+      case kOpAdd: opcode = byte_imm ? kX86Add64RI8 : kX86Add64RI; break;
+      case kOpSub: opcode = byte_imm ? kX86Sub64RI8 : kX86Sub64RI; break;
+      default:
+        LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op;
+    }
+  } else {
+    switch (op) {
+      case kOpLsl: opcode = kX86Sal32RI; break;
+      case kOpLsr: opcode = kX86Shr32RI; break;
+      case kOpAsr: opcode = kX86Sar32RI; break;
+      case kOpAdd: opcode = byte_imm ? kX86Add32RI8 : kX86Add32RI; break;
+      case kOpOr:  opcode = byte_imm ? kX86Or32RI8  : kX86Or32RI;  break;
+      case kOpAdc: opcode = byte_imm ? kX86Adc32RI8 : kX86Adc32RI; break;
+      // case kOpSbb: opcode = kX86Sbb32RI; break;
+      case kOpAnd: opcode = byte_imm ? kX86And32RI8 : kX86And32RI; break;
+      case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
+      case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
+      case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
+      case kOpMov:
+        /*
+         * Moving the constant zero into register can be specialized as an xor of the register.
+         * However, that sets eflags while the move does not. For that reason here, always do
+         * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
+         */
+        opcode = kX86Mov32RI;
+        break;
+      case kOpMul:
+        opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
+        return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), value);
+      default:
+        LOG(FATAL) << "Bad case in OpRegImm " << op;
+    }
   }
+  CHECK(!r_dest_src1.Is64Bit() || X86Mir2Lir::EncodingMap[opcode].kind == kReg64Imm) << "OpRegImm(" << op << ")";
   return NewLIR2(opcode, r_dest_src1.GetReg(), value);
 }
 
@@ -464,7 +474,7 @@
                      r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
     } else if (op == kOpAdd) {  // lea add special case
       return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src.GetReg() /* base */,
-                     r4sib_no_index /* index */, 0 /* scale */, value /* disp */);
+                     rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, 0 /* scale */, value /* disp */);
     }
     OpRegCopy(r_dest, r_src);
   }
@@ -578,6 +588,13 @@
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
       break;
+    case kWord:
+      if (Gen64Bit()) {
+        opcode = is_array ? kX86Mov64RA  : kX86Mov64RM;
+        CHECK_EQ(is_array, false);
+        CHECK_EQ(r_dest.IsFloat(), false);
+        break;
+      }  // else fall-through to k32 case
     case k32:
     case kSingle:
     case kReference:  // TODO: update for reference decompression on 64-bit targets.
@@ -689,10 +706,6 @@
 
 LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
                               OpSize size) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
   return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_dest,
                              size);
 }
@@ -711,11 +724,23 @@
       if (r_src.IsFloat()) {
         opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
       } else {
-        opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
+        if (Gen64Bit()) {
+          opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
+        } else {
+          // TODO(64): pair = true;
+          opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
+        }
       }
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
       break;
+    case kWord:
+      if (Gen64Bit()) {
+        opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
+        CHECK_EQ(is_array, false);
+        CHECK_EQ(r_src.IsFloat(), false);
+        break;
+      }  // else fall-through to k32 case
     case k32:
     case kSingle:
     case kReference:
@@ -785,10 +810,6 @@
 
 LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement,
                                RegStorage r_src, OpSize size) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
   return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size);
 }
 
@@ -845,6 +866,9 @@
     case kMirOpFusedCmpgDouble:
       AnalyzeFPInstruction(opcode, bb, mir);
       break;
+    case kMirOpConstVector:
+      store_method_addr_ = true;
+      break;
     default:
       // Ignore the rest.
       break;
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 77d716f..adfed0c 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -124,9 +124,10 @@
   rDX            = r2,
   r3             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3,
   rBX            = r3,
-  r4sp           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
-  rX86_SP        = r4sp,
-  r4sib_no_index = r4sp,
+  r4sp_32        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
+  rX86_SP_32     = r4sp_32,
+  r4sp_64        = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 4,
+  rX86_SP_64     = r4sp_64,
   r5             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5,
   rBP            = r5,
   r5sib_no_base  = r5,
@@ -191,8 +192,9 @@
 constexpr RegStorage rs_rDX = rs_r2;
 constexpr RegStorage rs_r3(RegStorage::kValid | r3);
 constexpr RegStorage rs_rBX = rs_r3;
-constexpr RegStorage rs_r4sp(RegStorage::kValid | r4sp);
-constexpr RegStorage rs_rX86_SP = rs_r4sp;
+constexpr RegStorage rs_rX86_SP_64(RegStorage::kValid | r4sp_64);
+constexpr RegStorage rs_rX86_SP_32(RegStorage::kValid | r4sp_32);
+extern RegStorage rs_rX86_SP;
 constexpr RegStorage rs_r5(RegStorage::kValid | r5);
 constexpr RegStorage rs_rBP = rs_r5;
 constexpr RegStorage rs_r6(RegStorage::kValid | r6);
@@ -228,35 +230,31 @@
 constexpr RegStorage rs_qr6(RegStorage::kValid | qr6);
 constexpr RegStorage rs_qr7(RegStorage::kValid | qr7);
 
-// TODO: elminate these #defines?
-#define rX86_ARG0 rAX
-#define rs_rX86_ARG0 rs_rAX
-#define rX86_ARG1 rCX
-#define rs_rX86_ARG1 rs_rCX
-#define rX86_ARG2 rDX
-#define rs_rX86_ARG2 rs_rDX
-#define rX86_ARG3 rBX
-#define rs_rX86_ARG3 rs_rBX
-#define rX86_FARG0 rAX
-#define rs_rX86_FARG0 rs_rAX
-#define rX86_FARG1 rCX
-#define rs_rX86_FARG1 rs_rCX
-#define rX86_FARG2 rDX
-#define rs_rX86_FARG2 rs_rDX
-#define rX86_FARG3 rBX
-#define rs_rX86_FARG3 rs_rBX
-#define rX86_RET0 rAX
-#define rs_rX86_RET0 rs_rAX
-#define rX86_RET1 rDX
-#define rs_rX86_RET1 rs_rDX
-#define rX86_INVOKE_TGT rAX
-#define rs_rX86_INVOKE_TGT rs_rAX
-#define rX86_LR RegStorage::kInvalidRegVal
-#define rX86_SUSPEND RegStorage::kInvalidRegVal
-#define rX86_SELF RegStorage::kInvalidRegVal
-#define rX86_COUNT rCX
-#define rs_rX86_COUNT rs_rCX
-#define rX86_PC RegStorage::kInvalidRegVal
+extern X86NativeRegisterPool rX86_ARG0;
+extern X86NativeRegisterPool rX86_ARG1;
+extern X86NativeRegisterPool rX86_ARG2;
+extern X86NativeRegisterPool rX86_ARG3;
+extern X86NativeRegisterPool rX86_FARG0;
+extern X86NativeRegisterPool rX86_FARG1;
+extern X86NativeRegisterPool rX86_FARG2;
+extern X86NativeRegisterPool rX86_FARG3;
+extern X86NativeRegisterPool rX86_RET0;
+extern X86NativeRegisterPool rX86_RET1;
+extern X86NativeRegisterPool rX86_INVOKE_TGT;
+extern X86NativeRegisterPool rX86_COUNT;
+
+extern RegStorage rs_rX86_ARG0;
+extern RegStorage rs_rX86_ARG1;
+extern RegStorage rs_rX86_ARG2;
+extern RegStorage rs_rX86_ARG3;
+extern RegStorage rs_rX86_FARG0;
+extern RegStorage rs_rX86_FARG1;
+extern RegStorage rs_rX86_FARG2;
+extern RegStorage rs_rX86_FARG3;
+extern RegStorage rs_rX86_RET0;
+extern RegStorage rs_rX86_RET1;
+extern RegStorage rs_rX86_INVOKE_TGT;
+extern RegStorage rs_rX86_COUNT;
 
 // RegisterLocation templates return values (r_V0, or r_V0/r_V1).
 const RegLocation x86_loc_c_return
@@ -313,10 +311,10 @@
   opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \
   opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \
   opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \
-  opcode ## 32MR, opcode ## 32AR, opcode ## 32TR,  \
-  opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \
-  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
-  opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
+  opcode ## 32MR, opcode ## 64MR, opcode ## 32AR, opcode ## 64AR, opcode ## 32TR,  \
+  opcode ## 32RR, opcode ## 32RM, opcode ## 64RM, opcode ## 32RA, opcode ## 64RA, opcode ## 32RT, opcode ## 64RT, \
+  opcode ## 32RI, opcode ## 64RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
+  opcode ## 32RI8, opcode ## 64RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
   BinaryOpCode(kX86Add),
   BinaryOpCode(kX86Or),
   BinaryOpCode(kX86Adc),
@@ -335,9 +333,9 @@
   kX86Mov16MR, kX86Mov16AR, kX86Mov16TR,
   kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT,
   kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI,
-  kX86Mov32MR, kX86Mov32AR, kX86Mov32TR,
-  kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
-  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
+  kX86Mov32MR, kX86Mov64MR, kX86Mov32AR, kX86Mov64AR, kX86Mov32TR,
+  kX86Mov32RR, kX86Mov32RM, kX86Mov64RM, kX86Mov32RA, kX86Mov64RA, kX86Mov32RT, kX86Mov64RT,
+  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Mov64TI,
   kX86Lea32RM,
   kX86Lea32RA,
   // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
@@ -427,6 +425,8 @@
   kX86Fild64M,                  // push 64-bit integer on x87 stack
   kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
   kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
+  Binary0fOpCode(kX86Mova128),  // move 128 bits aligned
+  kX86Mova128MR, kX86Mova128AR,  // store 128 bit aligned from xmm1 to m128
   Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
   kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
   Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1
@@ -479,11 +479,11 @@
   kNullary,                                // Opcode that takes no arguments.
   kPrefix2Nullary,                         // Opcode that takes no arguments, but 2 prefixes.
   kRegOpcode,                              // Shorter form of R instruction kind (opcode+rd)
-  kReg, kMem, kArray,                      // R, M and A instruction kinds.
-  kMemReg, kArrayReg, kThreadReg,          // MR, AR and TR instruction kinds.
-  kRegReg, kRegMem, kRegArray, kRegThread,  // RR, RM, RA and RT instruction kinds.
+  kReg, kReg64, kMem, kArray,              // R, M and A instruction kinds.
+  kMemReg, kMemReg64, kArrayReg, kArrayReg64, kThreadReg,          // MR, AR and TR instruction kinds.
+  kRegReg, kRegMem, kRegArray, kRegThread, kReg64Thread,  // RR, RM, RA and RT instruction kinds.
   kRegRegStore,                            // RR following the store modrm reg-reg encoding rather than the load.
-  kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
+  kRegImm, kReg64Imm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
   kRegRegImm, kRegMemImm, kRegArrayImm,    // RRI, RMI and RAI instruction kinds.
   kMovRegImm,                              // Shorter form move RI.
   kRegRegImmRev,                           // RRI with first reg in r/m
@@ -532,6 +532,11 @@
 
 // Segment override instruction prefix used for quick TLS access to Thread::Current().
 #define THREAD_PREFIX 0x64
+#define THREAD_PREFIX_GS 0x65
+
+// 64 Bit Operand Size
+#define REX_W 0x48
+// Extension of the ModR/M reg field
 
 #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
 #define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index 979f516..3387c50 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -137,6 +137,10 @@
     return ((reg_ & k64BitMask) == k64Bits);
   }
 
+  bool Is64BitSolo() const {
+    return ((reg_ & kShapeMask) == k64BitSolo);
+  }
+
   bool IsPair() const {
     return ((reg_ & kShapeMask) == k64BitPair);
   }
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 0f812a4..e19f3cf 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -34,7 +34,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 55ba643..b48be58 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1706,7 +1706,7 @@
         // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
         // than use a special Object for the purpose we use the Class of java.lang.Class.
         Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
-        ObjectLock<mirror::Class> lock(soa.Self(), &h_klass);
+        ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
         // Attempt to initialize allowing initialization of parent classes but still not static
         // fields.
         manager->GetClassLinker()->EnsureInitialized(klass, false, true);
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 113594a..fe3a4e6 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -19,7 +19,7 @@
 #include <stdint.h>
 #include <stdio.h>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "dex_file.h"
diff --git a/compiler/elf_fixup.cc b/compiler/elf_fixup.cc
index 6fd4a73..571a091 100644
--- a/compiler/elf_fixup.cc
+++ b/compiler/elf_fixup.cc
@@ -22,7 +22,7 @@
 #include "base/stringprintf.h"
 #include "elf_file.h"
 #include "elf_writer.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/compiler/elf_stripper.cc b/compiler/elf_stripper.cc
index 42291b2..b0fa63c 100644
--- a/compiler/elf_stripper.cc
+++ b/compiler/elf_stripper.cc
@@ -20,7 +20,7 @@
 #include <sys/types.h>
 #include <vector>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "base/logging.h"
 #include "elf_file.h"
 #include "elf_utils.h"
diff --git a/compiler/elf_writer_mclinker.h b/compiler/elf_writer_mclinker.h
index 13757ed..3c1a47b 100644
--- a/compiler/elf_writer_mclinker.h
+++ b/compiler/elf_writer_mclinker.h
@@ -19,7 +19,7 @@
 
 #include "elf_writer.h"
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "safe_map.h"
 
 namespace mcld {
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7c5741b..5a79542 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -27,7 +27,7 @@
 #include "lock_word.h"
 #include "mirror/object-inl.h"
 #include "signal_catcher.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utils.h"
 #include "vector_output_stream.h"
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d855eee..20a66d4 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -52,7 +52,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utils.h"
 
 using ::art::mirror::ArtField;
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 7e22a96..f8df2bb 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -30,7 +30,7 @@
 #include "os.h"
 #include "safe_map.h"
 #include "gc/space/space.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 6035689..561d00f 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -31,7 +31,7 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 extern "C" JNIEXPORT jint JNICALL Java_MyClassNatives_bar(JNIEnv*, jobject, jint count) {
   return count + 1;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 20f9f4b..02d6fa5 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -33,7 +33,7 @@
 #include "utils/mips/managed_register_mips.h"
 #include "utils/x86/managed_register_x86.h"
 #include "thread.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 #define __ jni_asm->
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 85c9b47..7a41d87 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -26,7 +26,7 @@
 #include "oat.h"
 #include "mirror/class.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index a7604be..b9c1164 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -188,6 +188,23 @@
   printer.EndTag("compilation");
 }
 
+HGraphVisualizer::HGraphVisualizer(std::ostream* output,
+                                   HGraph* graph,
+                                   const char* name)
+    : output_(output), graph_(graph), is_enabled_(false) {
+  if (output == nullptr) {
+    return;
+  }
+
+  is_enabled_ = true;
+  HGraphVisualizerPrinter printer(graph, *output_);
+  printer.StartTag("compilation");
+  printer.PrintProperty("name", name);
+  printer.PrintProperty("method", name);
+  printer.PrintTime("date");
+  printer.EndTag("compilation");
+}
+
 void HGraphVisualizer::DumpGraph(const char* pass_name) {
   if (!is_enabled_) {
     return;
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index 433d55d..2b88e65 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -42,6 +42,12 @@
                    const DexCompilationUnit& cu);
 
   /**
+   * Version of `HGraphVisualizer` for unit testing, that is when a
+   * `DexCompilationUnit` is not available.
+   */
+  HGraphVisualizer(std::ostream* output, HGraph* graph, const char* name);
+
+  /**
    * If this visualizer is enabled, emit the compilation information
    * in `output_`.
    */
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
new file mode 100644
index 0000000..f9ae529
--- /dev/null
+++ b/compiler/optimizing/linearize_test.cc
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+
+#include "base/stringprintf.h"
+#include "builder.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "graph_visualizer.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "pretty_printer.h"
+#include "ssa_builder.h"
+#include "ssa_liveness_analysis.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraphBuilder builder(&allocator);
+  const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+  HGraph* graph = builder.BuildGraph(*item);
+  ASSERT_NE(graph, nullptr);
+
+  graph->BuildDominatorTree();
+  graph->FindNaturalLoops();
+  SsaLivenessAnalysis liveness(*graph);
+  liveness.Analyze();
+
+  ASSERT_EQ(liveness.GetLinearPostOrder().Size(), number_of_blocks);
+  for (size_t i = 0; i < number_of_blocks; ++i) {
+    ASSERT_EQ(liveness.GetLinearPostOrder().Get(number_of_blocks - i - 1)->GetBlockId(),
+              expected_order[i]);
+  }
+}
+
+TEST(LinearizeTest, CFG1) {
+  // Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2 ++++++
+  //            /   \       +
+  //       Block5   Block7  +
+  //         |        |     +
+  //       Block6   Block3  +
+  //               + /   \  +
+  //           Block4   Block8
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 5,
+    Instruction::IF_EQ, 0xFFFE,
+    Instruction::GOTO | 0xFE00,
+    Instruction::RETURN_VOID);
+
+  const int blocks[] = {0, 1, 2, 7, 3, 4, 8, 5, 6};
+  TestCode(data, blocks, 9);
+}
+
+TEST(LinearizeTest, CFG2) {
+  // Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2 ++++++
+  //            /   \       +
+  //       Block3   Block7  +
+  //         |        |     +
+  //       Block6   Block4  +
+  //               + /   \  +
+  //           Block5   Block8
+
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::RETURN_VOID,
+    Instruction::IF_EQ, 0xFFFD,
+    Instruction::GOTO | 0xFE00);
+
+  const int blocks[] = {0, 1, 2, 7, 4, 5, 8, 3, 6};
+  TestCode(data, blocks, 9);
+}
+
+TEST(LinearizeTest, CFG3) {
+  // Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2 ++++++
+  //            /   \       +
+  //       Block3   Block8  +
+  //         |        |     +
+  //       Block7   Block5  +
+  //                 / +  \ +
+  //           Block6  + Block9
+  //             |     +
+  //           Block4 ++
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 4,
+    Instruction::RETURN_VOID,
+    Instruction::GOTO | 0x0100,
+    Instruction::IF_EQ, 0xFFFC,
+    Instruction::GOTO | 0xFD00);
+
+  const int blocks[] = {0, 1, 2, 8, 5, 6, 4, 9, 3, 7};
+  TestCode(data, blocks, 10);
+}
+
+TEST(LinearizeTest, CFG4) {
+  /* Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2
+  //            / +  \
+  //       Block6 + Block8
+  //         |    +   |
+  //       Block7 + Block3 +++++++
+  //              +  /  \        +
+  //           Block9   Block10  +
+  //                      |      +
+  //                    Block4   +
+  //                  + /    \   +
+  //                Block5  Block11
+  */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 7,
+    Instruction::IF_EQ, 0xFFFE,
+    Instruction::IF_EQ, 0xFFFE,
+    Instruction::GOTO | 0xFE00,
+    Instruction::RETURN_VOID);
+
+  const int blocks[] = {0, 1, 2, 8, 3, 10, 4, 5, 11, 9, 6, 7};
+  TestCode(data, blocks, 12);
+}
+
+TEST(LinearizeTest, CFG5) {
+  /* Structure of this graph (+ are back edges)
+  //            Block0
+  //              |
+  //            Block1
+  //              |
+  //            Block2
+  //            / +  \
+  //       Block3 + Block8
+  //         |    +   |
+  //       Block7 + Block4 +++++++
+  //              +  /  \        +
+  //           Block9   Block10  +
+  //                      |      +
+  //                    Block5   +
+  //                   +/    \   +
+  //                Block6  Block11
+  */
+  const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::IF_EQ, 3,
+    Instruction::RETURN_VOID,
+    Instruction::IF_EQ, 0xFFFD,
+    Instruction::IF_EQ, 0xFFFE,
+    Instruction::GOTO | 0xFE00);
+
+  const int blocks[] = {0, 1, 2, 8, 4, 10, 5, 6, 11, 9, 3, 7};
+  TestCode(data, blocks, 12);
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index d665ab9..53e7bbe 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -35,6 +35,7 @@
   ASSERT_NE(graph, nullptr);
   graph->BuildDominatorTree();
   graph->TransformToSSA();
+  graph->FindNaturalLoops();
   SsaLivenessAnalysis liveness(*graph);
   liveness.Analyze();
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 27b87ca..1085c10 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -445,7 +445,7 @@
   bool HasUses() const { return uses_ != nullptr || env_uses_ != nullptr; }
 
   size_t NumberOfUses() const {
-    // TODO: Optimize this method if it is used outside of the HGraphTracer.
+    // TODO: Optimize this method if it is used outside of the HGraphVisualizer.
     size_t result = 0;
     HUseListNode<HInstruction>* current = uses_;
     while (current != nullptr) {
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index dfeafe7..a7727c0 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -100,6 +100,47 @@
   DISALLOW_COPY_AND_ASSIGN(HPrettyPrinter);
 };
 
+class StringPrettyPrinter : public HPrettyPrinter {
+ public:
+  explicit StringPrettyPrinter(HGraph* graph)
+      : HPrettyPrinter(graph), str_(""), current_block_(nullptr) { }
+
+  virtual void PrintInt(int value) {
+    str_ += StringPrintf("%d", value);
+  }
+
+  virtual void PrintString(const char* value) {
+    str_ += value;
+  }
+
+  virtual void PrintNewLine() {
+    str_ += '\n';
+  }
+
+  void Clear() { str_.clear(); }
+
+  std::string str() const { return str_; }
+
+  virtual void VisitBasicBlock(HBasicBlock* block) {
+    current_block_ = block;
+    HPrettyPrinter::VisitBasicBlock(block);
+  }
+
+  virtual void VisitGoto(HGoto* gota) {
+    PrintString("  ");
+    PrintInt(gota->GetId());
+    PrintString(": Goto ");
+    PrintInt(current_block_->GetSuccessors().Get(0)->GetBlockId());
+    PrintNewLine();
+  }
+
+ private:
+  std::string str_;
+  HBasicBlock* current_block_;
+
+  DISALLOW_COPY_AND_ASSIGN(StringPrettyPrinter);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_PRETTY_PRINTER_H_
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 006349c..7e604e9 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -27,47 +27,6 @@
 
 namespace art {
 
-class StringPrettyPrinter : public HPrettyPrinter {
- public:
-  explicit StringPrettyPrinter(HGraph* graph)
-      : HPrettyPrinter(graph), str_(""), current_block_(nullptr) { }
-
-  virtual void PrintInt(int value) {
-    str_ += StringPrintf("%d", value);
-  }
-
-  virtual void PrintString(const char* value) {
-    str_ += value;
-  }
-
-  virtual void PrintNewLine() {
-    str_ += '\n';
-  }
-
-  void Clear() { str_.clear(); }
-
-  std::string str() const { return str_; }
-
-  virtual void VisitBasicBlock(HBasicBlock* block) {
-    current_block_ = block;
-    HPrettyPrinter::VisitBasicBlock(block);
-  }
-
-  virtual void VisitGoto(HGoto* gota) {
-    PrintString("  ");
-    PrintInt(gota->GetId());
-    PrintString(": Goto ");
-    PrintInt(current_block_->GetSuccessors().Get(0)->GetBlockId());
-    PrintNewLine();
-  }
-
- private:
-  std::string str_;
-  HBasicBlock* current_block_;
-
-  DISALLOW_COPY_AND_ASSIGN(StringPrettyPrinter);
-};
-
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 7c2ec39..85171aa 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -20,13 +20,92 @@
 namespace art {
 
 void SsaLivenessAnalysis::Analyze() {
+  LinearizeGraph();
   NumberInstructions();
   ComputeSets();
 }
 
+static bool IsLoopExit(HLoopInformation* current, HLoopInformation* to) {
+  // `to` is either not part of a loop, or `current` is an inner loop of `to`.
+  return to == nullptr || (current != to && current->IsIn(*to));
+}
+
+static bool IsLoop(HLoopInformation* info) {
+  return info != nullptr;
+}
+
+static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) {
+  return first_loop == second_loop;
+}
+
+static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) {
+  return (inner != outer)
+      && (inner != nullptr)
+      && (outer != nullptr)
+      && inner->IsIn(*outer);
+}
+
+static void VisitBlockForLinearization(HBasicBlock* block,
+                                       GrowableArray<HBasicBlock*>* order,
+                                       ArenaBitVector* visited) {
+  if (visited->IsBitSet(block->GetBlockId())) {
+    return;
+  }
+  visited->SetBit(block->GetBlockId());
+  size_t number_of_successors = block->GetSuccessors().Size();
+  if (number_of_successors == 0) {
+    // Nothing to do.
+  } else if (number_of_successors == 1) {
+    VisitBlockForLinearization(block->GetSuccessors().Get(0), order, visited);
+  } else {
+    DCHECK_EQ(number_of_successors, 2u);
+    HBasicBlock* first_successor = block->GetSuccessors().Get(0);
+    HBasicBlock* second_successor = block->GetSuccessors().Get(1);
+    HLoopInformation* my_loop = block->GetLoopInformation();
+    HLoopInformation* first_loop = first_successor->GetLoopInformation();
+    HLoopInformation* second_loop = second_successor->GetLoopInformation();
+
+    if (!IsLoop(my_loop)) {
+      // Nothing to do. Current order is fine.
+    } else if (IsLoopExit(my_loop, second_loop) && InSameLoop(my_loop, first_loop)) {
+      // Visit the loop exit first in post order.
+      std::swap(first_successor, second_successor);
+    } else if (IsInnerLoop(my_loop, first_loop) && !IsInnerLoop(my_loop, second_loop)) {
+      // Visit the inner loop last in post order.
+      std::swap(first_successor, second_successor);
+    }
+    VisitBlockForLinearization(first_successor, order, visited);
+    VisitBlockForLinearization(second_successor, order, visited);
+  }
+  order->Add(block);
+}
+
+class HLinearOrderIterator : public ValueObject {
+ public:
+  explicit HLinearOrderIterator(const GrowableArray<HBasicBlock*>& post_order)
+      : post_order_(post_order), index_(post_order.Size()) {}
+
+  bool Done() const { return index_ == 0; }
+  HBasicBlock* Current() const { return post_order_.Get(index_ -1); }
+  void Advance() { --index_; DCHECK_GE(index_, 0U); }
+
+ private:
+  const GrowableArray<HBasicBlock*>& post_order_;
+  size_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HLinearOrderIterator);
+};
+
+void SsaLivenessAnalysis::LinearizeGraph() {
+  // For simplicity of the implementation, we create post linear order. The order for
+  // computing live ranges is the reverse of that order.
+  ArenaBitVector visited(graph_.GetArena(), graph_.GetBlocks().Size(), false);
+  VisitBlockForLinearization(graph_.GetEntryBlock(), &linear_post_order_, &visited);
+}
+
 void SsaLivenessAnalysis::NumberInstructions() {
   int ssa_index = 0;
-  for (HReversePostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+  for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
 
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
@@ -47,7 +126,7 @@
 }
 
 void SsaLivenessAnalysis::ComputeSets() {
-  for (HReversePostOrderIterator it(graph_); !it.Done(); it.Advance()) {
+  for (HLinearOrderIterator it(linear_post_order_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     block_infos_.Put(
         block->GetBlockId(),
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 6a901d1..b8695ba 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -48,6 +48,7 @@
  public:
   explicit SsaLivenessAnalysis(const HGraph& graph)
       : graph_(graph),
+        linear_post_order_(graph.GetArena(), graph.GetBlocks().Size()),
         block_infos_(graph.GetArena(), graph.GetBlocks().Size()),
         number_of_ssa_values_(0) {
     block_infos_.SetSize(graph.GetBlocks().Size());
@@ -67,7 +68,17 @@
     return &block_infos_.Get(block.GetBlockId())->kill_;
   }
 
+  const GrowableArray<HBasicBlock*>& GetLinearPostOrder() const {
+    return linear_post_order_;
+  }
+
  private:
+  // Linearize the graph so that:
+  // (1): a block is always after its dominator,
+  // (2): blocks of loops are contiguous.
+  // This creates a natural and efficient ordering when visualizing live ranges.
+  void LinearizeGraph();
+
   // Give an SSA number to each instruction that defines a value used by another instruction.
   void NumberInstructions();
 
@@ -90,6 +101,7 @@
   bool UpdateLiveOut(const HBasicBlock& block);
 
   const HGraph& graph_;
+  GrowableArray<HBasicBlock*> linear_post_order_;
   GrowableArray<BlockInfo*> block_infos_;
   size_t number_of_ssa_values_;
 
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 415d146..d104619 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -28,9 +28,9 @@
 
 namespace art {
 
-class StringPrettyPrinter : public HPrettyPrinter {
+class SsaPrettyPrinter : public HPrettyPrinter {
  public:
-  explicit StringPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {}
+  explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {}
 
   virtual void PrintInt(int value) {
     str_ += StringPrintf("%d", value);
@@ -59,7 +59,7 @@
  private:
   std::string str_;
 
-  DISALLOW_COPY_AND_ASSIGN(StringPrettyPrinter);
+  DISALLOW_COPY_AND_ASSIGN(SsaPrettyPrinter);
 };
 
 static void ReNumberInstructions(HGraph* graph) {
@@ -82,11 +82,12 @@
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   HGraph* graph = builder.BuildGraph(*item);
   ASSERT_NE(graph, nullptr);
+
   graph->BuildDominatorTree();
   graph->TransformToSSA();
   ReNumberInstructions(graph);
 
-  StringPrettyPrinter printer(graph);
+  SsaPrettyPrinter printer(graph);
   printer.VisitInsertionOrder();
 
   ASSERT_STREQ(expected, printer.str().c_str());
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index c866b29..0f4a9a4 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -26,7 +26,7 @@
 #include "utils/assembler.h"
 #include "offsets.h"
 #include "utils.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "a64/macro-assembler-a64.h"
 #include "a64/disasm-a64.h"
 
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index e99c76f..e7ed9a7 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -16,7 +16,7 @@
 
 LOCAL_PATH := $(call my-dir)
 
-dalvikvm_cflags := -Wall -Werror -Wextra
+dalvikvm_cflags := -Wall -Werror -Wextra -std=gnu++11
 
 include $(CLEAR_VARS)
 LOCAL_MODULE := dalvikvm
@@ -24,22 +24,29 @@
 LOCAL_CPP_EXTENSION := cc
 LOCAL_SRC_FILES := dalvikvm.cc
 LOCAL_CFLAGS := $(dalvikvm_cflags)
+LOCAL_C_INCLUDES := art/runtime
 LOCAL_SHARED_LIBRARIES := libdl libnativehelper
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_MULTILIB := both
-LOCAL_MODULE_STEM_32 := dalvikvm
+LOCAL_MODULE_STEM_32 := dalvikvm32
 LOCAL_MODULE_STEM_64 := dalvikvm64
 include art/build/Android.libcxx.mk
 include $(BUILD_EXECUTABLE)
+
+# create symlink for the primary version target.
+include  $(BUILD_SYSTEM)/executable_prefer_symlink.mk
+
 ART_TARGET_EXECUTABLES += $(TARGET_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 
 ifeq ($(WITH_HOST_DALVIK),true)
 include $(CLEAR_VARS)
 LOCAL_MODULE := dalvikvm
 LOCAL_MODULE_TAGS := optional
+LOCAL_CLANG := true
 LOCAL_CPP_EXTENSION := cc
 LOCAL_SRC_FILES := dalvikvm.cc
 LOCAL_CFLAGS := $(dalvikvm_cflags)
+LOCAL_C_INCLUDES := art/runtime
 LOCAL_SHARED_LIBRARIES := libnativehelper
 LOCAL_LDFLAGS := -ldl -lpthread
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
diff --git a/dalvikvm/dalvikvm.cc b/dalvikvm/dalvikvm.cc
index 8d71a7c..3fa43dc 100644
--- a/dalvikvm/dalvikvm.cc
+++ b/dalvikvm/dalvikvm.cc
@@ -16,6 +16,7 @@
 
 #include <signal.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 
 #include <algorithm>
@@ -24,7 +25,7 @@
 #include "JniInvocation.h"
 #include "ScopedLocalRef.h"
 #include "toStringArray.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index ad796f8..874c324 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1017,7 +1017,7 @@
   }
 
   if (compiler_filter_string == nullptr) {
-    if (instruction_set == kX86_64 || instruction_set == kMips) {
+    if (instruction_set == kX86_64 || instruction_set == kArm64 || instruction_set == kMips) {
       // TODO: implement/fix compilers for these architectures.
       compiler_filter_string = "interpret-only";
     } else if (image) {
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 052d12e..4a23263 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -226,7 +226,7 @@
 	monitor_pool.cc \
 	arch/arm64/fault_handler_arm64.cc
 
-LIBART_TARGET_SRC_FILES_x86 := \
+LIBART_SRC_FILES_x86 := \
 	arch/x86/context_x86.cc \
 	arch/x86/entrypoints_init_x86.cc \
 	arch/x86/jni_entrypoints_x86.S \
@@ -235,7 +235,10 @@
 	arch/x86/thread_x86.cc \
 	arch/x86/fault_handler_x86.cc
 
-LIBART_TARGET_SRC_FILES_x86_64 := \
+LIBART_TARGET_SRC_FILES_x86 := \
+	$(LIBART_SRC_FILES_x86)
+
+LIBART_SRC_FILES_x86_64 := \
 	arch/x86_64/context_x86_64.cc \
 	arch/x86_64/entrypoints_init_x86_64.cc \
 	arch/x86_64/jni_entrypoints_x86_64.S \
@@ -245,6 +248,8 @@
 	monitor_pool.cc \
 	arch/x86_64/fault_handler_x86_64.cc
 
+LIBART_TARGET_SRC_FILES_x86_64 := \
+	$(LIBART_SRC_FILES_x86_64) \
 
 LIBART_TARGET_SRC_FILES_mips := \
 	arch/mips/context_mips.cc \
@@ -270,31 +275,11 @@
 	runtime_linux.cc \
 	thread_linux.cc
 
-ifeq ($(HOST_ARCH),x86_64)
-LIBART_HOST_SRC_FILES += \
-	arch/x86_64/context_x86_64.cc \
-	arch/x86_64/entrypoints_init_x86_64.cc \
-	arch/x86_64/jni_entrypoints_x86_64.S \
-	arch/x86_64/portable_entrypoints_x86_64.S \
-	arch/x86_64/quick_entrypoints_x86_64.S \
-	arch/x86_64/thread_x86_64.cc \
-	arch/x86_64/fault_handler_x86_64.cc \
-	monitor_pool.cc
-else
-  ifeq ($(HOST_ARCH),x86)
-LIBART_HOST_SRC_FILES += \
-	arch/x86/context_x86.cc \
-	arch/x86/entrypoints_init_x86.cc \
-	arch/x86/jni_entrypoints_x86.S \
-	arch/x86/portable_entrypoints_x86.S \
-	arch/x86/quick_entrypoints_x86.S \
-	arch/x86/fault_handler_x86.cc \
-	arch/x86/thread_x86.cc
-  else # HOST_ARCH != x86 && HOST_ARCH != x86_64
-$(error unsupported HOST_ARCH=$(HOST_ARCH))
-  endif
-endif
+LIBART_HOST_SRC_FILES_32 := \
+	$(LIBART_SRC_FILES_x86)
 
+LIBART_HOST_SRC_FILES_64 := \
+	$(LIBART_SRC_FILES_x86_64)
 
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \
 	arch/x86_64/registers_x86_64.h \
@@ -363,6 +348,8 @@
       LOCAL_SRC_FILES_$(arch) := $$(LIBART_TARGET_SRC_FILES_$(arch)))
   else # host
     LOCAL_SRC_FILES := $(LIBART_HOST_SRC_FILES)
+    LOCAL_SRC_FILES_32 := $(LIBART_HOST_SRC_FILES_32)
+    LOCAL_SRC_FILES_64 := $(LIBART_HOST_SRC_FILES_64)
     LOCAL_IS_HOST_MODULE := true
   endif
 
diff --git a/runtime/UniquePtrCompat.h b/runtime/UniquePtrCompat.h
new file mode 100644
index 0000000..4a45616
--- /dev/null
+++ b/runtime/UniquePtrCompat.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_UNIQUEPTRCOMPAT_H_
+#define ART_RUNTIME_UNIQUEPTRCOMPAT_H_
+
+// Stlport doesn't declare std::unique_ptr. UniquePtr.h declares an incompatible std::swap
+// prototype with libc++. This compatibility header file resolves differences between the two, in
+// the future UniquePtr will become std::unique_ptr.
+
+#ifdef ART_WITH_STLPORT
+
+#include "UniquePtr.h"
+
+#else   //  ART_WITH_STLPORT
+
+#include <memory>
+
+template <typename T>
+using UniquePtr = typename std::unique_ptr<T>;
+
+#endif  //  ART_WITH_STLPORT
+
+#endif  // ART_RUNTIME_UNIQUEPTRCOMPAT_H_
diff --git a/runtime/barrier.h b/runtime/barrier.h
index 0c7fd87..d3e6bae 100644
--- a/runtime/barrier.h
+++ b/runtime/barrier.h
@@ -18,7 +18,7 @@
 #define ART_RUNTIME_BARRIER_H_
 
 #include "base/mutex.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index 7d32338..a02c4c7 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -22,7 +22,7 @@
 #include "common_runtime_test.h"
 #include "mirror/object_array-inl.h"
 #include "thread_pool.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 class CheckWaitTask : public Task {
diff --git a/runtime/base/bit_vector_test.cc b/runtime/base/bit_vector_test.cc
index 2ff55cb..990d1db 100644
--- a/runtime/base/bit_vector_test.cc
+++ b/runtime/base/bit_vector_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "bit_vector.h"
 #include "gtest/gtest.h"
 
diff --git a/runtime/base/histogram_test.cc b/runtime/base/histogram_test.cc
index 966b97f..d72ae47 100644
--- a/runtime/base/histogram_test.cc
+++ b/runtime/base/histogram_test.cc
@@ -16,7 +16,7 @@
 
 #include "gtest/gtest.h"
 #include "histogram-inl.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 #include <sstream>
 
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 730a2c2..b6c6b9b 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -19,7 +19,7 @@
 #include "base/mutex.h"
 #include "runtime.h"
 #include "thread-inl.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 6944278..7800cfe 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -25,7 +25,7 @@
 #include <vector>
 #include "base/macros.h"
 #include "log_severity.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 #define CHECK(x) \
   if (UNLIKELY(!(x))) \
diff --git a/runtime/base/unix_file/random_access_file_test.h b/runtime/base/unix_file/random_access_file_test.h
index 8a6605e..67e8c22 100644
--- a/runtime/base/unix_file/random_access_file_test.h
+++ b/runtime/base/unix_file/random_access_file_test.h
@@ -22,7 +22,7 @@
 #include <string>
 
 #include "common_runtime_test.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace unix_file {
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 9034560..6b98da9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -62,7 +62,7 @@
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utils.h"
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
@@ -1325,8 +1325,8 @@
   // Wait for the class if it has not already been linked.
   if (!klass->IsResolved() && !klass->IsErroneous()) {
     StackHandleScope<1> hs(self);
-    Handle<mirror::Class> h_class(hs.NewHandle(klass));
-    ObjectLock<mirror::Class> lock(self, &h_class);
+    HandleWrapper<mirror::Class> h_class(hs.NewHandleWrapper(&klass));
+    ObjectLock<mirror::Class> lock(self, h_class);
     // Check for circular dependencies between classes.
     if (!h_class->IsResolved() && h_class->GetClinitThreadId() == self->GetTid()) {
       ThrowClassCircularityError(h_class.Get());
@@ -1337,7 +1337,6 @@
     while (!h_class->IsResolved() && !h_class->IsErroneous()) {
       lock.WaitIgnoringInterrupts();
     }
-    klass = h_class.Get();
   }
   if (klass->IsErroneous()) {
     ThrowEarlierClassFailure(klass);
@@ -1471,7 +1470,7 @@
     klass->SetStatus(mirror::Class::kStatusError, self);
     return NULL;
   }
-  ObjectLock<mirror::Class> lock(self, &klass);
+  ObjectLock<mirror::Class> lock(self, klass);
   klass->SetClinitThreadId(self->GetTid());
   // Add the newly loaded class to the loaded classes table.
   mirror::Class* existing = InsertClass(descriptor, klass.Get(), Hash(descriptor));
@@ -2182,7 +2181,7 @@
   Thread* self = Thread::Current();
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> h_class(hs.NewHandle(primitive_class));
-  ObjectLock<mirror::Class> lock(self, &h_class);
+  ObjectLock<mirror::Class> lock(self, h_class);
   primitive_class->SetAccessFlags(kAccPublic | kAccFinal | kAccAbstract);
   primitive_class->SetPrimitiveType(type);
   primitive_class->SetStatus(mirror::Class::kStatusInitialized, self);
@@ -2279,7 +2278,7 @@
     }
     new_class->SetComponentType(component_type.Get());
   }
-  ObjectLock<mirror::Class> lock(self, &new_class);  // Must hold lock on object when initializing.
+  ObjectLock<mirror::Class> lock(self, new_class);  // Must hold lock on object when initializing.
   DCHECK(new_class->GetComponentType() != NULL);
   mirror::Class* java_lang_Object = GetClassRoot(kJavaLangObject);
   new_class->SetSuperClass(java_lang_Object);
@@ -2554,7 +2553,7 @@
 void ClassLinker::VerifyClass(const Handle<mirror::Class>& klass) {
   // TODO: assert that the monitor on the Class is held
   Thread* self = Thread::Current();
-  ObjectLock<mirror::Class> lock(self, &klass);
+  ObjectLock<mirror::Class> lock(self, klass);
 
   // Don't attempt to re-verify if already sufficiently verified.
   if (klass->IsVerified() ||
@@ -2589,7 +2588,7 @@
   Handle<mirror::Class> super(hs.NewHandle(klass->GetSuperClass()));
   if (super.Get() != NULL) {
     // Acquire lock to prevent races on verifying the super class.
-    ObjectLock<mirror::Class> lock(self, &super);
+    ObjectLock<mirror::Class> lock(self, super);
 
     if (!super->IsVerified() && !super->IsErroneous()) {
       VerifyClass(super);
@@ -2903,7 +2902,7 @@
   self->AssertNoPendingException();
 
   {
-    ObjectLock<mirror::Class> lock(self, &klass);  // Must hold lock on object when resolved.
+    ObjectLock<mirror::Class> lock(self, klass);  // Must hold lock on object when resolved.
     // Link the fields and virtual methods, creating vtable and iftables
     Handle<mirror::ObjectArray<mirror::Class> > h_interfaces(
         hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces)));
@@ -3121,7 +3120,7 @@
   Thread* self = Thread::Current();
   uint64_t t0;
   {
-    ObjectLock<mirror::Class> lock(self, &klass);
+    ObjectLock<mirror::Class> lock(self, klass);
 
     // Re-check under the lock in case another thread initialized ahead of us.
     if (klass->IsInitialized()) {
@@ -3198,7 +3197,7 @@
             << " that has unexpected status " << handle_scope_super->GetStatus()
             << "\nPending exception:\n"
             << (self->GetException(NULL) != NULL ? self->GetException(NULL)->Dump() : "");
-        ObjectLock<mirror::Class> lock(self, &klass);
+        ObjectLock<mirror::Class> lock(self, klass);
         // Initialization failed because the super-class is erroneous.
         klass->SetStatus(mirror::Class::kStatusError, self);
         return false;
@@ -3242,7 +3241,7 @@
 
   bool success = true;
   {
-    ObjectLock<mirror::Class> lock(self, &klass);
+    ObjectLock<mirror::Class> lock(self, klass);
 
     if (self->IsExceptionPending()) {
       WrapExceptionInInitializer();
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 0db08aa..9970dd5 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -18,7 +18,7 @@
 
 #include <string>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 79d3690..d7a1667 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -47,7 +47,7 @@
 #include "ScopedLocalRef.h"
 #include "thread.h"
 #include "utils.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 #include "well_known_classes.h"
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index edf54be..f6b4891 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2809,6 +2809,7 @@
     }
     case DeoptimizationRequest::kFullUndeoptimization: {
       DCHECK(req.method == nullptr);
+      DCHECK_GT(full_deoptimization_event_count_, 0U);
       --full_deoptimization_event_count_;
       if (full_deoptimization_event_count_ == 0) {
         VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index f3d4621..26b7d07 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -39,7 +39,7 @@
 #include "ScopedFd.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utf-inl.h"
 #include "utils.h"
 #include "well_known_classes.h"
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index cfa2555..0146f31 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -28,7 +28,7 @@
 #include "mem_map.h"
 #include "modifiers.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 9b6859a..86c282e 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -16,7 +16,7 @@
 
 #include "dex_file.h"
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "common_runtime_test.h"
 
 namespace art {
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 528e112..d179c8b 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -22,7 +22,7 @@
 #include "dex_file-inl.h"
 #include "leb128.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utf-inl.h"
 #include "utils.h"
 
diff --git a/runtime/dex_instruction_visitor_test.cc b/runtime/dex_instruction_visitor_test.cc
index 8f42b0c..99ad3ed 100644
--- a/runtime/dex_instruction_visitor_test.cc
+++ b/runtime/dex_instruction_visitor_test.cc
@@ -18,7 +18,7 @@
 
 #include <iostream>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "gtest/gtest.h"
 
 namespace art {
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index d2a044e..138147b 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -25,7 +25,7 @@
 #include "elf_utils.h"
 #include "mem_map.h"
 #include "os.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 91a0176..37ad9e5 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -27,7 +27,7 @@
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "vmap_table.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index c79b586..7d8b584 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -23,7 +23,7 @@
 #include "atomic.h"
 #include "base/logging.h"
 #include "base/macros.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "mem_map.h"
 #include "utils.h"
 
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 8d5dc07..17e62a6 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -20,7 +20,7 @@
 #include "base/mutex.h"
 #include "globals.h"
 #include "mem_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 7cddaf4..ef5653a 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -30,7 +30,7 @@
 #include "mirror/object_array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 using ::art::mirror::Object;
 
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index bbbd1ed..1def334 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -30,7 +30,7 @@
 #include "mirror/object_array-inl.h"
 #include "space_bitmap-inl.h"
 #include "thread.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 646fce6..a439462 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -28,7 +28,7 @@
 #include "mirror/object_array-inl.h"
 #include "object_utils.h"
 #include "space_bitmap-inl.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index a805809..1ccebf5 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -22,7 +22,7 @@
 #include "globals.h"
 #include "mem_map.h"
 #include "object_callbacks.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 #include <limits.h>
 #include <set>
diff --git a/runtime/gc/accounting/space_bitmap_test.cc b/runtime/gc/accounting/space_bitmap_test.cc
index 972f94d..71db44b 100644
--- a/runtime/gc/accounting/space_bitmap_test.cc
+++ b/runtime/gc/accounting/space_bitmap_test.cc
@@ -21,7 +21,7 @@
 #include "common_runtime_test.h"
 #include "globals.h"
 #include "space_bitmap-inl.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 8557f1b..9ea4306 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -28,7 +28,7 @@
 #include "base/logging.h"
 #include "globals.h"
 #include "mem_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utils.h"
 
 // Ensure we have an unordered_set until we have worked out C++ library issues.
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index cfb0b5e..fd79bf6 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -26,7 +26,7 @@
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 4b1ecc4..dacb5ae 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -25,7 +25,7 @@
 #include "immune_region.h"
 #include "object_callbacks.h"
 #include "offsets.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4642a98..b4c2d14 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -64,7 +64,7 @@
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 #include "thread_list.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "well_known_classes.h"
 
 namespace art {
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 554fbbe..6c851af 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -20,7 +20,7 @@
 #include "base/logging.h"
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "image.h"
 #include "os.h"
 #include "space-inl.h"
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index dcf5357..343bc29 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -19,7 +19,7 @@
 
 #include <string>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc/accounting/space_bitmap.h"
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index ce101e4..407d362 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -23,7 +23,7 @@
 
 #include "common_runtime_test.h"
 #include "globals.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "mirror/array-inl.h"
 #include "mirror/object-inl.h"
 
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index 27c1bdc..f2e059d 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -144,19 +144,18 @@
 // A wrapper which wraps around Object** and restores the pointer in the destructor.
 // TODO: Add more functionality.
 template<class T>
-class HandleWrapper {
+class HandleWrapper : public Handle<T> {
  public:
   HandleWrapper(T** obj, const Handle<T>& handle)
-     : obj_(obj), handle_(handle) {
+     : Handle<T>(handle), obj_(obj) {
   }
 
   ~HandleWrapper() {
-    *obj_ = handle_.Get();
+    *obj_ = Handle<T>::Get();
   }
 
  private:
   T** obj_;
-  Handle<T> handle_;
 };
 
 // Scoped handle storage of a fixed size that is usually stack allocated.
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index dfc82dd..2a8cc63 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -22,7 +22,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "thread.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "utf.h"
 
 namespace art {
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 74f386c..5b7dee1d 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -257,6 +257,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -273,6 +276,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -290,6 +296,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -306,6 +315,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -340,6 +352,9 @@
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                        shadow_frame.GetMethod(), dex_pc,
                                        result);
+    } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+      instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                       shadow_frame.GetMethod(), dex_pc);
     }
     return result;
   }
@@ -2389,6 +2404,9 @@
 // Create alternative instruction handlers dedicated to instrumentation.
 // Return instructions must not call Instrumentation::DexPcMovedEvent since they already call
 // Instrumentation::MethodExited. This is to avoid posting debugger events twice for this location.
+// Note: we do not use the kReturn instruction flag here (to test the instruction is a return). The
+// compiler seems to not evaluate "(Instruction::FlagsOf(Instruction::code) & kReturn) != 0" to
+// a constant condition that would remove the "if" statement so the test is free.
 #define INSTRUMENTATION_INSTRUCTION_HANDLER(o, code, n, f, r, i, a, v)                            \
   alt_op_##code: {                                                                                \
     if (Instruction::code != Instruction::RETURN_VOID &&                                          \
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 97c216d..859cfc4 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -184,6 +184,9 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
@@ -197,6 +200,9 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
@@ -211,6 +217,9 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
@@ -224,6 +233,9 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
@@ -255,6 +267,9 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+        } else if (UNLIKELY(instrumentation->HasDexPcListeners())) {
+          instrumentation->DexPcMovedEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
+                                           shadow_frame.GetMethod(), dex_pc);
         }
         return result;
       }
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 4843c2b..00be016 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -32,7 +32,7 @@
 #include "jdwp/jdwp_priv.h"
 #include "runtime.h"
 #include "thread-inl.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 21dab8d..3afb149 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -49,7 +49,7 @@
 #include "ScopedLocalRef.h"
 #include "thread.h"
 #include "utf.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "well_known_classes.h"
 
 namespace art {
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 98b0bbf..5225919 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -19,7 +19,7 @@
 #include <inttypes.h>
 #include <backtrace/BacktraceMap.h>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "base/stringprintf.h"
 #include "ScopedFd.h"
 #include "utils.h"
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index 2b59cd9..b26f563 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -16,7 +16,7 @@
 
 #include "mem_map.h"
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "gtest/gtest.h"
 
 namespace art {
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 537fe85..e0fd6a2 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -36,7 +36,7 @@
 #include "object_array-inl.h"
 #include "handle_scope-inl.h"
 #include "string-inl.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 64edba8..53e4a6f 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -625,8 +625,13 @@
   // Allocate and acquire a new monitor.
   UniquePtr<Monitor> m(new Monitor(self, owner, obj, hash_code));
   if (m->Install(self)) {
-    VLOG(monitor) << "monitor: thread " << owner->GetThreadId()
-                    << " created monitor " << m.get() << " for object " << obj;
+    if (owner != nullptr) {
+      VLOG(monitor) << "monitor: thread" << owner->GetThreadId()
+          << " created monitor " << m.get() << " for object " << obj;
+    } else {
+      VLOG(monitor) << "monitor: Inflate with hashcode " << hash_code
+          << " created monitor " << m.get() << " for object " << obj;
+    }
     Runtime::Current()->GetMonitorList()->Add(m.release());
     CHECK_EQ(obj->GetLockWord(true).GetState(), LockWord::kFatLocked);
   }
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 52abaab..5353592 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -15,8 +15,8 @@
  */
 
 #include <algorithm>
-#include <fcntl.h>
 #include <set>
+#include <fcntl.h>
 #include <unistd.h>
 
 #include "base/logging.h"
@@ -37,7 +37,7 @@
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
-#include "toStringArray.h"
+#include "well_known_classes.h"
 #include "zip_archive.h"
 
 #ifdef HAVE_ANDROID_OS
@@ -196,19 +196,24 @@
 }
 
 static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jlong cookie) {
-  const DexFile* dex_file;
-  dex_file = toDexFile(cookie, env);
+  jobjectArray result = nullptr;
+  const DexFile* dex_file = toDexFile(cookie, env);
   if (dex_file == nullptr) {
-    return nullptr;
+    result = env->NewObjectArray(dex_file->NumClassDefs(), WellKnownClasses::java_lang_String,
+                                 nullptr);
+    if (result != nullptr) {
+      for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+        const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+        const char* descriptor = dex_file->GetClassDescriptor(class_def);
+        ScopedLocalRef<jstring> jdescriptor(env, env->NewStringUTF(descriptor));
+        if (jdescriptor.get() == nullptr) {
+          return nullptr;
+        }
+        env->SetObjectArrayElement(result, i, jdescriptor.get());
+      }
+    }
   }
-
-  std::vector<std::string> class_names;
-  for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) {
-    const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
-    const char* descriptor = dex_file->GetClassDescriptor(class_def);
-    class_names.push_back(DescriptorToDot(descriptor));
-  }
-  return toStringArray(env, class_names);
+  return result;
 }
 
 // Copy a profile file
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 0b58af4..ceff206 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -28,21 +28,35 @@
 #include "hprof/hprof.h"
 #include "jni_internal.h"
 #include "mirror/class.h"
+#include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "scoped_fast_native_object_access.h"
-#include "toStringArray.h"
 #include "trace.h"
+#include "well_known_classes.h"
 
 namespace art {
 
 static jobjectArray VMDebug_getVmFeatureList(JNIEnv* env, jclass) {
-  std::vector<std::string> features;
-  features.push_back("method-trace-profiling");
-  features.push_back("method-trace-profiling-streaming");
-  features.push_back("method-sample-profiling");
-  features.push_back("hprof-heap-dump");
-  features.push_back("hprof-heap-dump-streaming");
-  return toStringArray(env, features);
+  static const char* features[] = {
+    "method-trace-profiling",
+    "method-trace-profiling-streaming",
+    "method-sample-profiling",
+    "hprof-heap-dump",
+    "hprof-heap-dump-streaming",
+  };
+  jobjectArray result = env->NewObjectArray(arraysize(features),
+                                            WellKnownClasses::java_lang_String,
+                                            nullptr);
+  if (result != nullptr) {
+    for (size_t i = 0; i < arraysize(features); ++i) {
+      ScopedLocalRef<jstring> jfeature(env, env->NewStringUTF(features[i]));
+      if (jfeature.get() == nullptr) {
+        return nullptr;
+      }
+      env->SetObjectArrayElement(result, i, jfeature.get());
+    }
+  }
+  return result;
 }
 
 static void VMDebug_startAllocCounting(JNIEnv*, jclass) {
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 0dd6ca1..b1e8c09 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -38,33 +38,31 @@
 template <typename T>
 class ObjectLock {
  public:
-  explicit ObjectLock(Thread* self, const Handle<T>* object)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+  ObjectLock(Thread* self, Handle<T> object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : self_(self), obj_(object) {
-    CHECK(object != nullptr);
-    CHECK(object->Get() != nullptr);
-    obj_->Get()->MonitorEnter(self_);
+    CHECK(object.Get() != nullptr);
+    obj_->MonitorEnter(self_);
   }
 
   ~ObjectLock() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    obj_->Get()->MonitorExit(self_);
+    obj_->MonitorExit(self_);
   }
 
   void WaitIgnoringInterrupts() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Monitor::Wait(self_, obj_->Get(), 0, 0, false, kWaiting);
+    Monitor::Wait(self_, obj_.Get(), 0, 0, false, kWaiting);
   }
 
   void Notify() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    obj_->Get()->Notify(self_);
+    obj_->Notify(self_);
   }
 
   void NotifyAll() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    obj_->Get()->NotifyAll(self_);
+    obj_->NotifyAll(self_);
   }
 
  private:
   Thread* const self_;
-  const Handle<T>* const obj_;
+  Handle<T> const obj_;
   DISALLOW_COPY_AND_ASSIGN(ObjectLock);
 };
 
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index 7ce17e0..d9a5813 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -23,7 +23,7 @@
 
 #include "base/logging.h"
 #include "base/unix_file/fd_file.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index 7f293cd..39f7638 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -16,7 +16,7 @@
 
 #include "parsed_options.h"
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "common_runtime_test.h"
 
 namespace art {
diff --git a/runtime/profiler.h b/runtime/profiler.h
index 31fdc79..bcd7c29 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -29,7 +29,7 @@
 #include "instrumentation.h"
 #include "os.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 78a93fd..d183cba 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -72,7 +72,7 @@
 #include "trace.h"
 #include "transaction.h"
 #include "profiler.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 085a8e7..488961e 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1105,7 +1105,7 @@
     if (lock != nullptr) {
       StackHandleScope<1> hs(self);
       Handle<mirror::Object> h_obj(hs.NewHandle(lock));
-      ObjectLock<mirror::Object> locker(self, &h_obj);
+      ObjectLock<mirror::Object> locker(self, h_obj);
       locker.NotifyAll();
     }
   }
diff --git a/runtime/thread.h b/runtime/thread.h
index 83f7b8e..be7634f 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -39,7 +39,7 @@
 #include "stack.h"
 #include "thread_state.h"
 #include "throw_location.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/trace.h b/runtime/trace.h
index bf4995a..ef6c642 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -27,7 +27,7 @@
 #include "instrumentation.h"
 #include "os.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index c332bdf..f26b598 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -25,7 +25,7 @@
 
 #include <unistd.h>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index cea2403..14200f7 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -33,7 +33,7 @@
 #include "reg_type_cache-inl.h"
 #include "register_line.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 9dca7f5..2bcf3e0 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -18,7 +18,7 @@
 
 #include <stdio.h>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 8b2dadb..f9f3e31 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -22,7 +22,7 @@
 #include "dex_instruction.h"
 #include "reg_type.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 546eb40..fdc6e3f 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -40,6 +40,7 @@
 jclass WellKnownClasses::java_lang_reflect_Proxy;
 jclass WellKnownClasses::java_lang_RuntimeException;
 jclass WellKnownClasses::java_lang_StackOverflowError;
+jclass WellKnownClasses::java_lang_String;
 jclass WellKnownClasses::java_lang_System;
 jclass WellKnownClasses::java_lang_Thread;
 jclass WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler;
@@ -142,6 +143,7 @@
   java_lang_reflect_Proxy = CacheClass(env, "java/lang/reflect/Proxy");
   java_lang_RuntimeException = CacheClass(env, "java/lang/RuntimeException");
   java_lang_StackOverflowError = CacheClass(env, "java/lang/StackOverflowError");
+  java_lang_String = CacheClass(env, "java/lang/String");
   java_lang_System = CacheClass(env, "java/lang/System");
   java_lang_Thread = CacheClass(env, "java/lang/Thread");
   java_lang_Thread$UncaughtExceptionHandler = CacheClass(env, "java/lang/Thread$UncaughtExceptionHandler");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index bc928d0..f6c2930 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -54,6 +54,7 @@
   static jclass java_lang_reflect_Proxy;
   static jclass java_lang_RuntimeException;
   static jclass java_lang_StackOverflowError;
+  static jclass java_lang_String;
   static jclass java_lang_System;
   static jclass java_lang_Thread;
   static jclass java_lang_ThreadGroup;
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index ddac7d4..13b4219 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -26,7 +26,7 @@
 
 #include "base/stringprintf.h"
 #include "base/unix_file/fd_file.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/zip_archive.h b/runtime/zip_archive.h
index 3ef0e6b..edaa88b 100644
--- a/runtime/zip_archive.h
+++ b/runtime/zip_archive.h
@@ -27,7 +27,7 @@
 #include "mem_map.h"
 #include "os.h"
 #include "safe_map.h"
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 
 namespace art {
 
diff --git a/runtime/zip_archive_test.cc b/runtime/zip_archive_test.cc
index c43fee5..d0624cf 100644
--- a/runtime/zip_archive_test.cc
+++ b/runtime/zip_archive_test.cc
@@ -21,7 +21,7 @@
 #include <sys/types.h>
 #include <zlib.h>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "common_runtime_test.h"
 #include "os.h"
 
diff --git a/test/ReferenceMap/stack_walk_refmap_jni.cc b/test/ReferenceMap/stack_walk_refmap_jni.cc
index 180db4c..d8a0eef 100644
--- a/test/ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/ReferenceMap/stack_walk_refmap_jni.cc
@@ -16,7 +16,7 @@
 
 #include <stdio.h>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
 #include "gc_map.h"
diff --git a/test/StackWalk/stack_walk_jni.cc b/test/StackWalk/stack_walk_jni.cc
index 528586e..d230ddd 100644
--- a/test/StackWalk/stack_walk_jni.cc
+++ b/test/StackWalk/stack_walk_jni.cc
@@ -16,7 +16,7 @@
 
 #include <stdio.h>
 
-#include "UniquePtr.h"
+#include "UniquePtrCompat.h"
 #include "class_linker.h"
 #include "gc_map.h"
 #include "mirror/art_method.h"