Merge "Import Dart's parallel move resolver."
diff --git a/Android.mk b/Android.mk
index 3324458..312507f 100644
--- a/Android.mk
+++ b/Android.mk
@@ -401,14 +401,14 @@
 .PHONY: dump-oat-core-target
 ifeq ($(ART_BUILD_TARGET),true)
 dump-oat-core-target: $(TARGET_CORE_IMG_OUT) $(OATDUMP)
-	$(OATDUMP) --image=$(TARGET_CORE_IMG_OUT) --output=$(ART_DUMP_OAT_PATH)/core.target.oatdump.txt
+	$(OATDUMP) --image=$(TARGET_CORE_IMG_LOCATION) --output=$(ART_DUMP_OAT_PATH)/core.target.oatdump.txt --instruction-set=$(TARGET_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/core.target.oatdump.txt
 endif
 
 .PHONY: dump-oat-boot
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-dump-oat-boot: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(OATDUMP)
-	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt
+dump-oat-boot: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME) $(OATDUMP)
+	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE_LOCATION) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt --instruction-set=$(TARGET_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/boot.oatdump.txt
 endif
 
diff --git a/build/Android.common.mk b/build/Android.common.mk
index ae54efb..83c536f 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -402,5 +402,6 @@
 endif
 
 HOST_CORE_IMG_LOCATION := $(HOST_OUT_JAVA_LIBRARIES)/core.art
+TARGET_CORE_IMG_LOCATION := $(ART_TEST_OUT)/core.art
 
 endif # ANDROID_COMMON_MK
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 3c33975..a186e85 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -99,8 +99,8 @@
     LOCAL_MULTILIB := $$(art_multilib)
   endif
 
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    include external/libcxx/libcxx.mk
     include $(BUILD_EXECUTABLE)
     ART_TARGET_EXECUTABLES := $(ART_TARGET_EXECUTABLES) $(TARGET_OUT_EXECUTABLES)/$$(LOCAL_MODULE)
   else # host
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index c5457c2..20e6aad 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -183,6 +183,7 @@
   endif
 
   LOCAL_CFLAGS := $(ART_TEST_CFLAGS)
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
@@ -192,7 +193,6 @@
     LOCAL_MODULE_PATH_32 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $(ART_NATIVETEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
-    include external/libcxx/libcxx.mk
     include $(BUILD_EXECUTABLE)
     
     ART_TARGET_GTEST_EXECUTABLES$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_NATIVETEST_OUT)/$(TARGET_ARCH)/$$(LOCAL_MODULE)
@@ -217,7 +217,7 @@
     LOCAL_STATIC_LIBRARIES += libcutils libvixl
     ifneq ($(WITHOUT_HOST_CLANG),true)
         # GCC host compiled tests fail with this linked, presumably due to destructors that run.
-        LOCAL_STATIC_LIBRARIES += libgtest_host
+        LOCAL_STATIC_LIBRARIES += libgtest_libc++_host
     endif
     LOCAL_LDLIBS += -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
diff --git a/build/Android.libarttest.mk b/build/Android.libarttest.mk
index 6965326..9e5f3d6 100644
--- a/build/Android.libarttest.mk
+++ b/build/Android.libarttest.mk
@@ -46,6 +46,7 @@
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
   LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/build/Android.libarttest.mk
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS := $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
@@ -56,13 +57,12 @@
     LOCAL_MODULE_PATH_32 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $(ART_TEST_OUT)/$(ART_TARGET_ARCH_64)
     LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH)
-    include external/libcxx/libcxx.mk
     include $(BUILD_SHARED_LIBRARY)
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
     LOCAL_STATIC_LIBRARIES := libcutils
-    LOCAL_LDLIBS := -ldl -lpthread
+    LOCAL_LDLIBS += -ldl -lpthread
     ifeq ($(HOST_OS),linux)
       LOCAL_LDLIBS += -lrt
     endif
diff --git a/compiler/Android.mk b/compiler/Android.mk
index d9417fc..274678a 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -59,8 +59,8 @@
 	dex/mir_field_info.cc \
 	dex/mir_method_info.cc \
 	dex/mir_optimization.cc \
-	dex/pass_driver.cc \
 	dex/bb_optimizations.cc \
+	dex/pass_driver_me.cc \
 	dex/bit_vector_block_iterator.cc \
 	dex/frontend.cc \
 	dex/mir_graph.cc \
@@ -196,8 +196,8 @@
   LOCAL_GENERATED_SOURCES += $$(ENUM_OPERATOR_OUT_GEN)
 
   LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS)
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    include external/libcxx/libcxx.mk
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS += $(ART_TARGET_CFLAGS)
   else # host
@@ -249,7 +249,7 @@
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
 
   ifeq ($$(art_target_or_host),host)
-    LOCAL_LDLIBS := -ldl -lpthread
+    LOCAL_LDLIBS += -ldl -lpthread
   endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index abfa7a7..1852f80 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -23,7 +23,13 @@
 /*
  * Code Layout pass implementation start.
  */
-bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool CodeLayout::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->LayoutBlocks(bb);
   // No need of repeating, so just return false.
   return false;
@@ -32,13 +38,22 @@
 /*
  * SSATransformation pass implementation start.
  */
-bool SSATransformation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool SSATransformation::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->InsertPhiNodeOperands(bb);
   // No need of repeating, so just return false.
   return false;
 }
 
-void SSATransformation::End(CompilationUnit* cUnit) const {
+void SSATransformation::End(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
   // Verify the dataflow information after the pass.
   if (cUnit->enable_debug & (1 << kDebugVerifyDataflow)) {
     cUnit->mir_graph->VerifyDataflow();
@@ -48,7 +63,13 @@
 /*
  * ConstantPropagation pass implementation start
  */
-bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool ConstantPropagation::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->DoConstantPropagation(bb);
   // No need of repeating, so just return false.
   return false;
@@ -57,7 +78,10 @@
 /*
  * MethodUseCount pass implementation start.
  */
-bool MethodUseCount::Gate(const CompilationUnit* cUnit) const {
+bool MethodUseCount::Gate(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
   // First initialize the data.
   cUnit->mir_graph->InitializeMethodUses();
 
@@ -67,7 +91,13 @@
   return res;
 }
 
-bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool MethodUseCount::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->CountUses(bb);
   // No need of repeating, so just return false.
   return false;
@@ -76,7 +106,13 @@
 /*
  * BasicBlock Combine pass implementation start.
  */
-bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+bool BBCombine::Worker(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+  CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+  DCHECK(cUnit != nullptr);
+  BasicBlock* bb = pass_me_data_holder->bb;
+  DCHECK(bb != nullptr);
   cUnit->mir_graph->CombineBlocks(bb);
 
   // No need of repeating, so just return false.
@@ -86,7 +122,10 @@
 /*
  * BasicBlock Optimization pass implementation start.
  */
-void BBOptimizations::Start(CompilationUnit* cUnit) const {
+void BBOptimizations::Start(const PassDataHolder* data) const {
+  DCHECK(data != nullptr);
+  CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  DCHECK(cUnit != nullptr);
   /*
    * This pass has a different ordering depEnding on the suppress exception,
    * so do the pass here for now:
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 6d500a5..43dcdf4 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_
 
 #include "compiler_internals.h"
-#include "pass.h"
+#include "pass_me.h"
 
 namespace art {
 
@@ -26,16 +26,22 @@
  * @class CacheFieldLoweringInfo
  * @brief Cache the lowering info for fields used by IGET/IPUT/SGET/SPUT insns.
  */
-class CacheFieldLoweringInfo : public Pass {
+class CacheFieldLoweringInfo : public PassME {
  public:
-  CacheFieldLoweringInfo() : Pass("CacheFieldLoweringInfo", kNoNodes) {
+  CacheFieldLoweringInfo() : PassME("CacheFieldLoweringInfo", kNoNodes) {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheFieldLoweringInfo();
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->HasFieldAccess();
   }
 };
@@ -44,16 +50,22 @@
  * @class CacheMethodLoweringInfo
  * @brief Cache the lowering info for methods called by INVOKEs.
  */
-class CacheMethodLoweringInfo : public Pass {
+class CacheMethodLoweringInfo : public PassME {
  public:
-  CacheMethodLoweringInfo() : Pass("CacheMethodLoweringInfo", kNoNodes) {
+  CacheMethodLoweringInfo() : PassME("CacheMethodLoweringInfo", kNoNodes) {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheMethodLoweringInfo();
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->HasInvokes();
   }
 };
@@ -62,26 +74,41 @@
  * @class CallInlining
  * @brief Perform method inlining pass.
  */
-class CallInlining : public Pass {
+class CallInlining : public PassME {
  public:
-  CallInlining() : Pass("CallInlining") {
+  CallInlining() : PassME("CallInlining") {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->InlineCallsGate();
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsStart();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
     cUnit->mir_graph->InlineCalls(bb);
     // No need of repeating, so just return false.
     return false;
   }
 
-  void End(CompilationUnit* cUnit) const {
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsEnd();
   }
 };
@@ -90,48 +117,57 @@
  * @class CodeLayout
  * @brief Perform the code layout pass.
  */
-class CodeLayout : public Pass {
+class CodeLayout : public PassME {
  public:
-  CodeLayout() : Pass("CodeLayout", "2_post_layout_cfg") {
+  CodeLayout() : PassME("CodeLayout", "2_post_layout_cfg") {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->VerifyDataflow();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 };
 
 /**
  * @class SSATransformation
  * @brief Perform an SSA representation pass on the CompilationUnit.
  */
-class SSATransformation : public Pass {
+class SSATransformation : public PassME {
  public:
-  SSATransformation() : Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
+  SSATransformation() : PassME("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InitializeSSATransformation();
   }
 
-  void End(CompilationUnit* cUnit) const;
+  void End(const PassDataHolder* data) const;
 };
 
 /**
  * @class ConstantPropagation
  * @brief Perform a constant propagation pass.
  */
-class ConstantPropagation : public Pass {
+class ConstantPropagation : public PassME {
  public:
-  ConstantPropagation() : Pass("ConstantPropagation") {
+  ConstantPropagation() : PassME("ConstantPropagation") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InitializeConstantPropagation();
   }
 };
@@ -140,12 +176,15 @@
  * @class InitRegLocations
  * @brief Initialize Register Locations.
  */
-class InitRegLocations : public Pass {
+class InitRegLocations : public PassME {
  public:
-  InitRegLocations() : Pass("InitRegLocation", kNoNodes) {
+  InitRegLocations() : PassME("InitRegLocation", kNoNodes) {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InitRegLocations();
   }
 };
@@ -154,53 +193,77 @@
  * @class MethodUseCount
  * @brief Count the register uses of the method
  */
-class MethodUseCount : public Pass {
+class MethodUseCount : public PassME {
  public:
-  MethodUseCount() : Pass("UseCount") {
+  MethodUseCount() : PassME("UseCount") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 
-  bool Gate(const CompilationUnit* cUnit) const;
+  bool Gate(const PassDataHolder* data) const;
 };
 
 /**
  * @class NullCheckEliminationAndTypeInference
  * @brief Null check elimination and type inference.
  */
-class NullCheckEliminationAndTypeInference : public Pass {
+class NullCheckEliminationAndTypeInference : public PassME {
  public:
   NullCheckEliminationAndTypeInference()
-    : Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
+    : PassME("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
   }
 
-  void Start(CompilationUnit* cUnit) const {
+  void Start(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesStart();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
     return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb);
   }
 
-  void End(CompilationUnit* cUnit) const {
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesEnd();
   }
 };
 
-class ClassInitCheckElimination : public Pass {
+class ClassInitCheckElimination : public PassME {
  public:
-  ClassInitCheckElimination() : Pass("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) {
+  ClassInitCheckElimination() : PassME("ClInitCheckElimination", kRepeatingPreOrderDFSTraversal) {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return cUnit->mir_graph->EliminateClassInitChecksGate();
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
+  bool Worker(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data);
+    CompilationUnit* cUnit = pass_me_data_holder->c_unit;
+    DCHECK(cUnit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
     return cUnit->mir_graph->EliminateClassInitChecks(bb);
   }
 
-  void End(CompilationUnit* cUnit) const {
+  void End(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateClassInitChecksEnd();
   }
 };
@@ -209,32 +272,38 @@
  * @class NullCheckEliminationAndTypeInference
  * @brief Null check elimination and type inference.
  */
-class BBCombine : public Pass {
+class BBCombine : public PassME {
  public:
-  BBCombine() : Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
+  BBCombine() : PassME("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return ((cUnit->disable_opt & (1 << kSuppressExceptionEdges)) != 0);
   }
 
-  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
+  bool Worker(const PassDataHolder* data) const;
 };
 
 /**
  * @class BasicBlock Optimizations
  * @brief Any simple BasicBlock optimization can be put here.
  */
-class BBOptimizations : public Pass {
+class BBOptimizations : public PassME {
  public:
-  BBOptimizations() : Pass("BBOptimizations", kNoNodes, "5_post_bbo_cfg") {
+  BBOptimizations() : PassME("BBOptimizations", kNoNodes, "5_post_bbo_cfg") {
   }
 
-  bool Gate(const CompilationUnit* cUnit) const {
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(cUnit != nullptr);
     return ((cUnit->disable_opt & (1 << kBBOpt)) == 0);
   }
 
-  void Start(CompilationUnit* cUnit) const;
+  void Start(const PassDataHolder* data) const;
 };
 
 }  // namespace art
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index b45d6a4..62973af 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -326,6 +326,81 @@
       GrowableArray<BasicBlock*>::Iterator all_nodes_iterator_;    /**< @brief The list of all the nodes */
   };
 
+  /**
+   * @class TopologicalSortIterator
+   * @brief Used to perform a Topological Sort Iteration of a MIRGraph.
+   */
+  class TopologicalSortIterator : public DataflowIterator {
+    public:
+      /**
+       * @brief The constructor, using all of the reachable blocks of the MIRGraph.
+       * @param mir_graph The MIRGraph considered.
+       */
+      explicit TopologicalSortIterator(MIRGraph* mir_graph)
+          : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
+            mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+        // Extra setup for TopologicalSortIterator.
+        idx_ = start_idx_;
+        block_id_list_ = mir_graph->GetTopologicalSortOrder();
+
+        if (mir_graph->GetTopologicalSortOrder() == nullptr) {
+          /* Compute the topological order */
+          mir_graph->ComputeTopologicalSortOrder();
+        }
+      }
+
+      /**
+       * @brief Get the next BasicBlock depending on iteration order.
+       * @param had_change did the user of the iteration change the previous BasicBlock.
+       * @return the next BasicBlock following the iteration order, 0 if finished.
+       */
+      virtual BasicBlock* Next(bool had_change = false) {
+        // Update changed: if had_changed is true, we remember it for the whole iteration.
+        changed_ |= had_change;
+
+        return ForwardSingleNext();
+      }
+  };
+
+  /**
+   * @class RepeatingTopologicalSortIterator
+   * @brief Used to perform a Topological Sort Iteration of a MIRGraph.
+   * @details If there is a change during an iteration, the iteration starts over at the end of the
+   *          iteration.
+   */
+  class RepeatingTopologicalSortIterator : public DataflowIterator {
+    public:
+     /**
+      * @brief The constructor, using all of the reachable blocks of the MIRGraph.
+      * @param mir_graph The MIRGraph considered.
+      */
+     explicit RepeatingTopologicalSortIterator(MIRGraph* mir_graph)
+         : DataflowIterator(mir_graph, 0, mir_graph->GetTopologicalSortOrder() != nullptr ?
+           mir_graph->GetTopologicalSortOrder()->Size() : 0) {
+       // Extra setup for RepeatingTopologicalSortIterator.
+       idx_ = start_idx_;
+       block_id_list_ = mir_graph->GetTopologicalSortOrder();
+
+       if (mir_graph->GetTopologicalSortOrder() == nullptr) {
+         /* Compute the topological order */
+         mir_graph->ComputeTopologicalSortOrder();
+       }
+     }
+
+     /**
+      * @brief Get the next BasicBlock depending on iteration order.
+      * @param had_change did the user of the iteration change the previous BasicBlock.
+      * @return the next BasicBlock following the iteration order, 0 if finished.
+      */
+     virtual BasicBlock* Next(bool had_change = false) {
+       // Update changed: if had_changed is true, we remember it for the whole iteration.
+       changed_ |= had_change;
+
+       return ForwardRepeatNext();
+     }
+  };
+
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_DATAFLOW_ITERATOR_H_
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 77b5057..63e3831 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -21,7 +21,7 @@
 #include "dataflow_iterator-inl.h"
 #include "leb128.h"
 #include "mirror/object.h"
-#include "pass_driver.h"
+#include "pass_driver_me.h"
 #include "runtime.h"
 #include "base/logging.h"
 #include "base/timing_logger.h"
@@ -136,22 +136,22 @@
 // TODO: Remove this when we are able to compile everything.
 int arm64_support_list[] = {
     Instruction::NOP,
-    // Instruction::MOVE,
-    // Instruction::MOVE_FROM16,
-    // Instruction::MOVE_16,
-    // Instruction::MOVE_WIDE,
-    // Instruction::MOVE_WIDE_FROM16,
-    // Instruction::MOVE_WIDE_16,
-    // Instruction::MOVE_OBJECT,
-    // Instruction::MOVE_OBJECT_FROM16,
-    // Instruction::MOVE_OBJECT_16,
+    Instruction::MOVE,
+    Instruction::MOVE_FROM16,
+    Instruction::MOVE_16,
+    Instruction::MOVE_WIDE,
+    Instruction::MOVE_WIDE_FROM16,
+    Instruction::MOVE_WIDE_16,
+    Instruction::MOVE_OBJECT,
+    Instruction::MOVE_OBJECT_FROM16,
+    Instruction::MOVE_OBJECT_16,
     // Instruction::MOVE_RESULT,
     // Instruction::MOVE_RESULT_WIDE,
     // Instruction::MOVE_RESULT_OBJECT,
     Instruction::MOVE_EXCEPTION,
     Instruction::RETURN_VOID,
-    // Instruction::RETURN,
-    // Instruction::RETURN_WIDE,
+    Instruction::RETURN,
+    Instruction::RETURN_WIDE,
     // Instruction::RETURN_OBJECT,
     // Instruction::CONST_4,
     // Instruction::CONST_16,
@@ -184,7 +184,7 @@
     // Instruction::CMPG_FLOAT,
     // Instruction::CMPL_DOUBLE,
     // Instruction::CMPG_DOUBLE,
-    // Instruction::CMP_LONG,
+    Instruction::CMP_LONG,
     // Instruction::IF_EQ,
     // Instruction::IF_NE,
     // Instruction::IF_LT,
@@ -258,16 +258,16 @@
     // Instruction::INVOKE_INTERFACE_RANGE,
     // Instruction::UNUSED_79,
     // Instruction::UNUSED_7A,
-    // Instruction::NEG_INT,
-    // Instruction::NOT_INT,
-    // Instruction::NEG_LONG,
-    // Instruction::NOT_LONG,
+    Instruction::NEG_INT,
+    Instruction::NOT_INT,
+    Instruction::NEG_LONG,
+    Instruction::NOT_LONG,
     // Instruction::NEG_FLOAT,
     // Instruction::NEG_DOUBLE,
-    // Instruction::INT_TO_LONG,
+    Instruction::INT_TO_LONG,
     // Instruction::INT_TO_FLOAT,
     // Instruction::INT_TO_DOUBLE,
-    // Instruction::LONG_TO_INT,
+    Instruction::LONG_TO_INT,
     // Instruction::LONG_TO_FLOAT,
     // Instruction::LONG_TO_DOUBLE,
     // Instruction::FLOAT_TO_INT,
@@ -276,31 +276,31 @@
     // Instruction::DOUBLE_TO_INT,
     // Instruction::DOUBLE_TO_LONG,
     // Instruction::DOUBLE_TO_FLOAT,
-    // Instruction::INT_TO_BYTE,
-    // Instruction::INT_TO_CHAR,
-    // Instruction::INT_TO_SHORT,
-    // Instruction::ADD_INT,
-    // Instruction::SUB_INT,
-    // Instruction::MUL_INT,
-    // Instruction::DIV_INT,
-    // Instruction::REM_INT,
-    // Instruction::AND_INT,
-    // Instruction::OR_INT,
-    // Instruction::XOR_INT,
-    // Instruction::SHL_INT,
-    // Instruction::SHR_INT,
-    // Instruction::USHR_INT,
-    // Instruction::ADD_LONG,
-    // Instruction::SUB_LONG,
-    // Instruction::MUL_LONG,
-    // Instruction::DIV_LONG,
-    // Instruction::REM_LONG,
-    // Instruction::AND_LONG,
-    // Instruction::OR_LONG,
-    // Instruction::XOR_LONG,
-    // Instruction::SHL_LONG,
-    // Instruction::SHR_LONG,
-    // Instruction::USHR_LONG,
+    Instruction::INT_TO_BYTE,
+    Instruction::INT_TO_CHAR,
+    Instruction::INT_TO_SHORT,
+    Instruction::ADD_INT,
+    Instruction::SUB_INT,
+    Instruction::MUL_INT,
+    Instruction::DIV_INT,
+    Instruction::REM_INT,
+    Instruction::AND_INT,
+    Instruction::OR_INT,
+    Instruction::XOR_INT,
+    Instruction::SHL_INT,
+    Instruction::SHR_INT,
+    Instruction::USHR_INT,
+    Instruction::ADD_LONG,
+    Instruction::SUB_LONG,
+    Instruction::MUL_LONG,
+    Instruction::DIV_LONG,
+    Instruction::REM_LONG,
+    Instruction::AND_LONG,
+    Instruction::OR_LONG,
+    Instruction::XOR_LONG,
+    Instruction::SHL_LONG,
+    Instruction::SHR_LONG,
+    Instruction::USHR_LONG,
     // Instruction::ADD_FLOAT,
     // Instruction::SUB_FLOAT,
     // Instruction::MUL_FLOAT,
@@ -311,28 +311,28 @@
     // Instruction::MUL_DOUBLE,
     // Instruction::DIV_DOUBLE,
     // Instruction::REM_DOUBLE,
-    // Instruction::ADD_INT_2ADDR,
-    // Instruction::SUB_INT_2ADDR,
-    // Instruction::MUL_INT_2ADDR,
-    // Instruction::DIV_INT_2ADDR,
-    // Instruction::REM_INT_2ADDR,
-    // Instruction::AND_INT_2ADDR,
-    // Instruction::OR_INT_2ADDR,
-    // Instruction::XOR_INT_2ADDR,
-    // Instruction::SHL_INT_2ADDR,
-    // Instruction::SHR_INT_2ADDR,
-    // Instruction::USHR_INT_2ADDR,
-    // Instruction::ADD_LONG_2ADDR,
-    // Instruction::SUB_LONG_2ADDR,
-    // Instruction::MUL_LONG_2ADDR,
-    // Instruction::DIV_LONG_2ADDR,
-    // Instruction::REM_LONG_2ADDR,
-    // Instruction::AND_LONG_2ADDR,
-    // Instruction::OR_LONG_2ADDR,
-    // Instruction::XOR_LONG_2ADDR,
-    // Instruction::SHL_LONG_2ADDR,
-    // Instruction::SHR_LONG_2ADDR,
-    // Instruction::USHR_LONG_2ADDR,
+    Instruction::ADD_INT_2ADDR,
+    Instruction::SUB_INT_2ADDR,
+    Instruction::MUL_INT_2ADDR,
+    Instruction::DIV_INT_2ADDR,
+    Instruction::REM_INT_2ADDR,
+    Instruction::AND_INT_2ADDR,
+    Instruction::OR_INT_2ADDR,
+    Instruction::XOR_INT_2ADDR,
+    Instruction::SHL_INT_2ADDR,
+    Instruction::SHR_INT_2ADDR,
+    Instruction::USHR_INT_2ADDR,
+    Instruction::ADD_LONG_2ADDR,
+    Instruction::SUB_LONG_2ADDR,
+    Instruction::MUL_LONG_2ADDR,
+    Instruction::DIV_LONG_2ADDR,
+    Instruction::REM_LONG_2ADDR,
+    Instruction::AND_LONG_2ADDR,
+    Instruction::OR_LONG_2ADDR,
+    Instruction::XOR_LONG_2ADDR,
+    Instruction::SHL_LONG_2ADDR,
+    Instruction::SHR_LONG_2ADDR,
+    Instruction::USHR_LONG_2ADDR,
     // Instruction::ADD_FLOAT_2ADDR,
     // Instruction::SUB_FLOAT_2ADDR,
     // Instruction::MUL_FLOAT_2ADDR,
@@ -343,25 +343,25 @@
     // Instruction::MUL_DOUBLE_2ADDR,
     // Instruction::DIV_DOUBLE_2ADDR,
     // Instruction::REM_DOUBLE_2ADDR,
-    // Instruction::ADD_INT_LIT16,
-    // Instruction::RSUB_INT,
-    // Instruction::MUL_INT_LIT16,
-    // Instruction::DIV_INT_LIT16,
-    // Instruction::REM_INT_LIT16,
-    // Instruction::AND_INT_LIT16,
-    // Instruction::OR_INT_LIT16,
-    // Instruction::XOR_INT_LIT16,
+    Instruction::ADD_INT_LIT16,
+    Instruction::RSUB_INT,
+    Instruction::MUL_INT_LIT16,
+    Instruction::DIV_INT_LIT16,
+    Instruction::REM_INT_LIT16,
+    Instruction::AND_INT_LIT16,
+    Instruction::OR_INT_LIT16,
+    Instruction::XOR_INT_LIT16,
     Instruction::ADD_INT_LIT8,
-    // Instruction::RSUB_INT_LIT8,
-    // Instruction::MUL_INT_LIT8,
-    // Instruction::DIV_INT_LIT8,
-    // Instruction::REM_INT_LIT8,
-    // Instruction::AND_INT_LIT8,
-    // Instruction::OR_INT_LIT8,
-    // Instruction::XOR_INT_LIT8,
-    // Instruction::SHL_INT_LIT8,
-    // Instruction::SHR_INT_LIT8,
-    // Instruction::USHR_INT_LIT8,
+    Instruction::RSUB_INT_LIT8,
+    Instruction::MUL_INT_LIT8,
+    Instruction::DIV_INT_LIT8,
+    Instruction::REM_INT_LIT8,
+    Instruction::AND_INT_LIT8,
+    Instruction::OR_INT_LIT8,
+    Instruction::XOR_INT_LIT8,
+    Instruction::SHL_INT_LIT8,
+    Instruction::SHR_INT_LIT8,
+    Instruction::USHR_INT_LIT8,
     // Instruction::IGET_QUICK,
     // Instruction::IGET_WIDE_QUICK,
     // Instruction::IGET_OBJECT_QUICK,
@@ -403,7 +403,7 @@
     // kMirOpNop,
     // kMirOpNullCheck,
     // kMirOpRangeCheck,
-    // kMirOpDivZeroCheck,
+    kMirOpDivZeroCheck,
     kMirOpCheck,
     // kMirOpCheckPart2,
     // kMirOpSelect,
@@ -699,7 +699,7 @@
 // V : void
 // (ARM64) Current calling conversion only support 32bit softfp
 //         which has problems with long, float, double
-constexpr char arm64_supported_types[] = "ZBSCILV";
+constexpr char arm64_supported_types[] = "ZBSCILVJ";
 // (x84_64) We still have troubles with compiling longs/doubles/floats
 constexpr char x86_64_supported_types[] = "ZBSCILV";
 
@@ -924,7 +924,7 @@
   }
 
   /* Create the pass driver and launch it */
-  PassDriver pass_driver(&cu);
+  PassDriverME pass_driver(&cu);
   pass_driver.Launch();
 
   if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 4ba6677..c34a9f5 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -17,6 +17,7 @@
 #include "mir_graph.h"
 
 #include <inttypes.h>
+#include <queue>
 
 #include "base/stl_util.h"
 #include "compiler_internals.h"
@@ -76,6 +77,7 @@
       dfs_order_(NULL),
       dfs_post_order_(NULL),
       dom_post_order_traversal_(NULL),
+      topological_order_(nullptr),
       i_dom_list_(NULL),
       def_block_matrix_(NULL),
       temp_dalvik_register_v_(NULL),
@@ -1337,6 +1339,104 @@
   DoDFSPreOrderSSARename(GetEntryBlock());
 }
 
+void MIRGraph::ComputeTopologicalSortOrder() {
+  std::queue<BasicBlock *> q;
+  std::map<int, int> visited_cnt_values;
+
+  // Clear the nodes.
+  ClearAllVisitedFlags();
+
+  // Create the topological order if need be.
+  if (topological_order_ != nullptr) {
+    topological_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, 0);
+  }
+  topological_order_->Reset();
+
+  // Set up visitedCntValues map for all BB. The default value for this counters in the map is zero.
+  // also fill initial queue.
+  GrowableArray<BasicBlock*>::Iterator iterator(&block_list_);
+
+  while (true) {
+    BasicBlock* bb = iterator.Next();
+
+    if (bb == nullptr) {
+      break;
+    }
+
+    if (bb->hidden == true) {
+      continue;
+    }
+
+    visited_cnt_values[bb->id] = bb->predecessors->Size();
+
+    GrowableArray<BasicBlockId>::Iterator pred_iterator(bb->predecessors);
+    // To process loops we should not wait for dominators.
+    while (true) {
+      BasicBlock* pred_bb = GetBasicBlock(pred_iterator.Next());
+
+      if (pred_bb == nullptr) {
+        break;
+      }
+
+      if (pred_bb->dominators == nullptr || pred_bb->hidden == true) {
+        continue;
+      }
+
+      // Skip the backward branch.
+      if (pred_bb->dominators->IsBitSet(bb->id) != 0) {
+        visited_cnt_values[bb->id]--;
+      }
+    }
+
+    // Add entry block to queue.
+    if (visited_cnt_values[bb->id] == 0) {
+      q.push(bb);
+    }
+  }
+
+  while (q.size() > 0) {
+    // Get top.
+    BasicBlock *bb = q.front();
+    q.pop();
+
+    DCHECK_EQ(bb->hidden, false);
+
+    if (bb->IsExceptionBlock() == true) {
+      continue;
+    }
+
+    // We've visited all the predecessors. So, we can visit bb.
+    if (bb->visited == false) {
+      bb->visited = true;
+
+      // Now add the basic block.
+      topological_order_->Insert(bb->id);
+
+      // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero.
+      ChildBlockIterator succIter(bb, this);
+      BasicBlock *successor = succIter.Next();
+      while (successor != nullptr) {
+        // one more predecessor was visited.
+        visited_cnt_values[successor->id]--;
+
+        if (visited_cnt_values[successor->id] <= 0 && successor->visited == false && successor->hidden == false) {
+          q.push(successor);
+        }
+
+        // Take next successor.
+        successor = succIter.Next();
+      }
+    }
+  }
+}
+
+bool BasicBlock::IsExceptionBlock() const {
+  if (block_type == kExceptionHandling) {
+    return true;
+  }
+  return false;
+}
+
 ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
     : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
       visited_taken_(false), have_successors_(false) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 0bb8265..3a00a43 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -354,11 +354,12 @@
    */
   MIR* GetNextUnconditionalMir(MIRGraph* mir_graph, MIR* current);
   bool RemoveMIR(MIR* mir);
+  bool IsExceptionBlock() const;
 };
 
 /*
  * The "blocks" field in "successor_block_list" points to an array of elements with the type
- * "SuccessorBlockInfo".  For catch blocks, key is type index for the exception.  For swtich
+ * "SuccessorBlockInfo".  For catch blocks, key is type index for the exception.  For switch
  * blocks, key is the case value.
  */
 struct SuccessorBlockInfo {
@@ -598,6 +599,10 @@
 
   void BasicBlockOptimization();
 
+  GrowableArray<BasicBlockId>* GetTopologicalSortOrder() {
+    return topological_order_;
+  }
+
   bool IsConst(int32_t s_reg) const {
     return is_constant_v_->IsBitSet(s_reg);
   }
@@ -865,6 +870,7 @@
   MIR* AdvanceMIR(BasicBlock** p_bb, MIR* mir);
   BasicBlock* NextDominatedBlock(BasicBlock* bb);
   bool LayoutBlocks(BasicBlock* bb);
+  void ComputeTopologicalSortOrder();
 
   bool InlineCallsGate();
   void InlineCallsStart();
@@ -1003,6 +1009,7 @@
   GrowableArray<BasicBlockId>* dfs_order_;
   GrowableArray<BasicBlockId>* dfs_post_order_;
   GrowableArray<BasicBlockId>* dom_post_order_traversal_;
+  GrowableArray<BasicBlockId>* topological_order_;
   int* i_dom_list_;
   ArenaBitVector** def_block_matrix_;    // num_dalvik_register x num_blocks.
   ArenaBitVector* temp_dalvik_register_v_;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 749a235..1d4aef2 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -268,13 +268,22 @@
     DCHECK_EQ(ct_type, kCompilerTempVR);
 
     // The new non-special compiler temp must receive a unique v_reg with a negative value.
-    compiler_temp->v_reg = static_cast<int>(kVRegNonSpecialTempBaseReg) - num_non_special_compiler_temps_;
+    compiler_temp->v_reg = static_cast<int>(kVRegNonSpecialTempBaseReg) -
+        num_non_special_compiler_temps_;
     compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
     num_non_special_compiler_temps_++;
 
     if (wide) {
-      // Ensure that the two registers are consecutive. Since the virtual registers used for temps grow in a
-      // negative fashion, we need the smaller to refer to the low part. Thus, we redefine the v_reg and s_reg_low.
+      // Create a new CompilerTemp for the high part.
+      CompilerTemp *compiler_temp_high =
+          static_cast<CompilerTemp *>(arena_->Alloc(sizeof(CompilerTemp), kArenaAllocRegAlloc));
+      compiler_temp_high->v_reg = compiler_temp->v_reg;
+      compiler_temp_high->s_reg_low = compiler_temp->s_reg_low;
+      compiler_temps_.Insert(compiler_temp_high);
+
+      // Ensure that the two registers are consecutive. Since the virtual registers used for temps
+      // grow in a negative fashion, we need the smaller to refer to the low part. Thus, we
+      // redefine the v_reg and s_reg_low.
       compiler_temp->v_reg--;
       int ssa_reg_high = compiler_temp->s_reg_low;
       compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index 9457d5b..4ce040e 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -19,6 +19,7 @@
 
 #include <string>
 
+#include "base/macros.h"
 namespace art {
 
 // Forward declarations.
@@ -26,42 +27,18 @@
 struct CompilationUnit;
 class Pass;
 
-/**
- * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass.
- * @details Each enum should be a power of 2 to be correctly used.
- */
-enum OptimizationFlag {
-};
-
-enum DataFlowAnalysisMode {
-  kAllNodes = 0,                           /**< @brief All nodes. */
-  kPreOrderDFSTraversal,                   /**< @brief Depth-First-Search / Pre-Order. */
-  kRepeatingPreOrderDFSTraversal,          /**< @brief Depth-First-Search / Repeating Pre-Order. */
-  kReversePostOrderDFSTraversal,           /**< @brief Depth-First-Search / Reverse Post-Order. */
-  kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
-  kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
-  kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
-  kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
+// Empty Pass Data Class, can be extended by any pass extending the base Pass class.
+class PassDataHolder {
 };
 
 /**
  * @class Pass
- * @brief Pass is the Pass structure for the optimizations.
- * @details The following structure has the different optimization passes that we are going to do.
+ * @brief Base Pass class, can be extended to perform a more defined way of doing the work call.
  */
 class Pass {
  public:
-  explicit Pass(const char* name, DataFlowAnalysisMode type = kAllNodes,
-                unsigned int flags = 0u, const char* dump = "")
-    : pass_name_(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
-  }
-
-  Pass(const char* name, DataFlowAnalysisMode type, const char* dump)
-    : pass_name_(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
-  }
-
-  Pass(const char* name, const char* dump)
-    : pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
+  explicit Pass(const char* name)
+    : pass_name_(name) {
   }
 
   virtual ~Pass() {
@@ -71,59 +48,42 @@
     return pass_name_;
   }
 
-  virtual DataFlowAnalysisMode GetTraversal() const {
-    return traversal_type_;
-  }
-
-  virtual bool GetFlag(OptimizationFlag flag) const {
-    return (flags_ & flag);
-  }
-
-  const char* GetDumpCFGFolder() const {
-    return dump_cfg_folder_;
-  }
-
   /**
    * @brief Gate for the pass: determines whether to execute the pass or not considering a CompilationUnit
-   * @param c_unit the CompilationUnit.
-   * @return whether or not to execute the pass
+   * @param data the PassDataHolder.
+   * @return whether or not to execute the pass.
    */
-  virtual bool Gate(const CompilationUnit* c_unit) const {
+  virtual bool Gate(const PassDataHolder* data) const {
     // Unused parameter.
-    UNUSED(c_unit);
+    UNUSED(data);
 
     // Base class says yes.
     return true;
   }
 
   /**
-   * @brief Start of the pass: called before the WalkBasicBlocks function
-   * @param c_unit the considered CompilationUnit.
+   * @brief Start of the pass: called before the Worker function.
    */
-  virtual void Start(CompilationUnit* c_unit) const {
+  virtual void Start(const PassDataHolder* data) const {
     // Unused parameter.
-    UNUSED(c_unit);
+    UNUSED(data);
   }
 
   /**
-   * @brief End of the pass: called after the WalkBasicBlocks function
-   * @param c_unit the considered CompilationUnit.
+   * @brief End of the pass: called after the WalkBasicBlocks function.
    */
-  virtual void End(CompilationUnit* c_unit) const {
+  virtual void End(const PassDataHolder* data) const {
     // Unused parameter.
-    UNUSED(c_unit);
+    UNUSED(data);
   }
 
   /**
-   * @brief Actually walk the BasicBlocks following a particular traversal type.
-   * @param c_unit the CompilationUnit.
-   * @param bb the BasicBlock.
+   * @param data the object containing data necessary for the pass.
    * @return whether or not there is a change when walking the BasicBlock
    */
-  virtual bool WalkBasicBlocks(CompilationUnit* c_unit, BasicBlock* bb) const {
-    // Unused parameters.
-    UNUSED(c_unit);
-    UNUSED(bb);
+  virtual bool Worker(const PassDataHolder* data) const {
+    // Unused parameter.
+    UNUSED(data);
 
     // BasicBlock did not change.
     return false;
@@ -133,15 +93,6 @@
   /** @brief The pass name: used for searching for a pass when running a particular pass or debugging. */
   const char* const pass_name_;
 
-  /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */
-  const DataFlowAnalysisMode traversal_type_;
-
-  /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */
-  const unsigned int flags_;
-
-  /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */
-  const char* const dump_cfg_folder_;
-
  private:
   // In order to make the all passes not copy-friendly.
   DISALLOW_COPY_AND_ASSIGN(Pass);
diff --git a/compiler/dex/pass_driver.cc b/compiler/dex/pass_driver.cc
index 999ed2a..ca936cd 100644
--- a/compiler/dex/pass_driver.cc
+++ b/compiler/dex/pass_driver.cc
@@ -162,6 +162,12 @@
     case kAllNodes:
       DoWalkBasicBlocks<AllNodesIterator>(c_unit, curPass);
       break;
+    case kTopologicalSortTraversal:
+      DoWalkBasicBlocks<TopologicalSortIterator>(c_unit, curPass);
+      break;
+    case kRepeatingTopologicalSortTraversal:
+      DoWalkBasicBlocks<RepeatingTopologicalSortIterator>(c_unit, curPass);
+      break;
     case kNoNodes:
       break;
     default:
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
index 2b7196e..aa0d1ae 100644
--- a/compiler/dex/pass_driver.h
+++ b/compiler/dex/pass_driver.h
@@ -22,77 +22,169 @@
 #include "safe_map.h"
 
 // Forward Declarations.
-class CompilationUnit;
 class Pass;
-
+class PassDriver;
 namespace art {
+/**
+ * @brief Helper function to create a single instance of a given Pass and can be shared across
+ * the threads.
+ */
+template <typename PassType>
+const Pass* GetPassInstance() {
+  static const PassType pass;
+  return &pass;
+}
+
+// Empty holder for the constructor.
+class PassDriverDataHolder {
+};
 
 /**
  * @class PassDriver
- * @brief PassDriver is the wrapper around all Pass instances in order to execute them from the Middle-End
+ * @brief PassDriver is the wrapper around all Pass instances in order to execute them
  */
+template <typename PassDriverType>
 class PassDriver {
  public:
-  explicit PassDriver(CompilationUnit* cu, bool create_default_passes = true);
+  explicit PassDriver() {
+    InitializePasses();
+  }
 
-  ~PassDriver();
+  virtual ~PassDriver() {
+  }
 
   /**
    * @brief Insert a Pass: can warn if multiple passes have the same name.
-   * @param new_pass the new Pass to insert in the map and list.
-   * @param warn_override warn if the name of the Pass is already used.
    */
-  void InsertPass(const Pass* new_pass);
+  void InsertPass(const Pass* new_pass) {
+    DCHECK(new_pass != nullptr);
+    DCHECK(new_pass->GetName() != nullptr && new_pass->GetName()[0] != 0);
+
+    // It is an error to override an existing pass.
+    DCHECK(GetPass(new_pass->GetName()) == nullptr)
+        << "Pass name " << new_pass->GetName() << " already used.";
+
+    // Now add to the list.
+    pass_list_.push_back(new_pass);
+  }
 
   /**
    * @brief Run a pass using the name as key.
-   * @param c_unit the considered CompilationUnit.
-   * @param pass_name the Pass name.
    * @return whether the pass was applied.
    */
-  bool RunPass(CompilationUnit* c_unit, const char* pass_name);
+  virtual bool RunPass(const char* pass_name) {
+    // Paranoid: c_unit cannot be nullptr and we need a pass name.
+    DCHECK(pass_name != nullptr && pass_name[0] != 0);
+
+    const Pass* cur_pass = GetPass(pass_name);
+
+    if (cur_pass != nullptr) {
+      return RunPass(cur_pass);
+    }
+
+    // Return false, we did not find the pass.
+    return false;
+  }
+
+  /**
+   * @brief Runs all the passes with the pass_list_.
+   */
+  void Launch() {
+    for (const Pass* cur_pass : pass_list_) {
+      RunPass(cur_pass);
+    }
+  }
+
+  /**
+   * @brief Searches for a particular pass.
+   * @param the name of the pass to be searched for.
+   */
+  const Pass* GetPass(const char* name) const {
+    for (const Pass* cur_pass : pass_list_) {
+      if (strcmp(name, cur_pass->GetName()) == 0) {
+        return cur_pass;
+      }
+    }
+    return nullptr;
+  }
+
+  static void CreateDefaultPassList(const std::string& disable_passes) {
+    // Insert each pass from g_passes into g_default_pass_list.
+    PassDriverType::g_default_pass_list.clear();
+    PassDriverType::g_default_pass_list.reserve(PassDriver<PassDriverType>::g_passes_size);
+    for (uint16_t i = 0; i < PassDriver<PassDriverType>::g_passes_size; ++i) {
+      const Pass* pass = PassDriver<PassDriverType>::g_passes[i];
+      // Check if we should disable this pass.
+      if (disable_passes.find(pass->GetName()) != std::string::npos) {
+        LOG(INFO) << "Skipping " << pass->GetName();
+      } else {
+        PassDriver<PassDriverType>::g_default_pass_list.push_back(pass);
+      }
+    }
+  }
 
   /**
    * @brief Run a pass using the Pass itself.
    * @param time_split do we want a time split request(default: false)?
    * @return whether the pass was applied.
    */
-  bool RunPass(CompilationUnit* c_unit, const Pass* pass, bool time_split = false);
+  virtual bool RunPass(const Pass* pass, bool time_split = false) = 0;
 
-  void Launch();
+  /**
+   * @brief Print the pass names of all the passes available.
+   */
+  static void PrintPassNames() {
+    LOG(INFO) << "Loop Passes are:";
 
-  void HandlePassFlag(CompilationUnit* c_unit, const Pass* pass);
+    for (const Pass* cur_pass : PassDriver<PassDriverType>::g_default_pass_list) {
+      LOG(INFO) << "\t-" << cur_pass->GetName();
+    }
+  }
+
+ protected:
+  /**
+   * @brief Gets the list of passes currently schedule to execute.
+   * @return pass_list_
+   */
+  std::vector<const Pass*>& GetPasses() {
+    return pass_list_;
+  }
+
+  virtual void InitializePasses() {
+    SetDefaultPasses();
+  }
+
+  void SetDefaultPasses() {
+    pass_list_ = PassDriver<PassDriverType>::g_default_pass_list;
+  }
 
   /**
    * @brief Apply a patch: perform start/work/end functions.
    */
-  void ApplyPass(CompilationUnit* c_unit, const Pass* pass);
-
-  /**
-   * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode
-   */
-  void DispatchPass(CompilationUnit* c_unit, const Pass* pass);
-
-  static void PrintPassNames();
-  static void CreateDefaultPassList(const std::string& disable_passes);
-
-  const Pass* GetPass(const char* name) const;
-
-  const char* GetDumpCFGFolder() const {
-    return dump_cfg_folder_;
+  virtual void ApplyPass(PassDataHolder* data, const Pass* pass) {
+    pass->Start(data);
+    DispatchPass(pass);
+    pass->End(data);
   }
-
- protected:
-  void CreatePasses();
+  /**
+   * @brief Dispatch a patch.
+   * Gives the ability to add logic when running the patch.
+   */
+  virtual void DispatchPass(const Pass* pass) {
+    UNUSED(pass);
+  }
 
   /** @brief List of passes: provides the order to execute the passes. */
   std::vector<const Pass*> pass_list_;
 
-  /** @brief The CompilationUnit on which to execute the passes on. */
-  CompilationUnit* const cu_;
+  /** @brief The number of passes within g_passes.  */
+  static const uint16_t g_passes_size;
 
-  /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */
-  const char* dump_cfg_folder_;
+  /** @brief The number of passes within g_passes.  */
+  static const Pass* const g_passes[];
+
+  /** @brief The default pass list is used to initialize pass_list_. */
+  static std::vector<const Pass*> g_default_pass_list;
 };
 
 }  // namespace art
diff --git a/compiler/dex/pass_driver_me.cc b/compiler/dex/pass_driver_me.cc
new file mode 100644
index 0000000..d054500
--- /dev/null
+++ b/compiler/dex/pass_driver_me.cc
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/macros.h"
+#include "bb_optimizations.h"
+#include "compiler_internals.h"
+#include "dataflow_iterator.h"
+#include "dataflow_iterator-inl.h"
+#include "pass_driver_me.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass, DataflowIterator* iterator) {
+  // Paranoid: Check the iterator before walking the BasicBlocks.
+  DCHECK(iterator != nullptr);
+  bool change = false;
+  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
+    data->bb = bb;
+    change = pass->Worker(data);
+  }
+}
+
+template <typename Iterator>
+inline void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) {
+  DCHECK(data != nullptr);
+  CompilationUnit* c_unit = data->c_unit;
+  DCHECK(c_unit != nullptr);
+  Iterator iterator(c_unit->mir_graph.get());
+  DoWalkBasicBlocks(data, pass, &iterator);
+}
+}  // anonymous namespace
+
+/*
+ * Create the pass list. These passes are immutable and are shared across the threads.
+ *
+ * Advantage is that there will be no race conditions here.
+ * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
+ *   - This is not yet an issue: no current pass would require it.
+ */
+// The initial list of passes to be used by the PassDriveME.
+template<>
+const Pass* const PassDriver<PassDriverME>::g_passes[] = {
+  GetPassInstance<CacheFieldLoweringInfo>(),
+  GetPassInstance<CacheMethodLoweringInfo>(),
+  GetPassInstance<CallInlining>(),
+  GetPassInstance<CodeLayout>(),
+  GetPassInstance<SSATransformation>(),
+  GetPassInstance<ConstantPropagation>(),
+  GetPassInstance<InitRegLocations>(),
+  GetPassInstance<MethodUseCount>(),
+  GetPassInstance<NullCheckEliminationAndTypeInference>(),
+  GetPassInstance<ClassInitCheckElimination>(),
+  GetPassInstance<BBCombine>(),
+  GetPassInstance<BBOptimizations>(),
+};
+
+// The number of the passes in the initial list of Passes (g_passes).
+template<>
+uint16_t const PassDriver<PassDriverME>::g_passes_size = arraysize(PassDriver<PassDriverME>::g_passes);
+
+// The default pass list is used by the PassDriverME instance of PassDriver to initialize pass_list_.
+template<>
+std::vector<const Pass*> PassDriver<PassDriverME>::g_default_pass_list(PassDriver<PassDriverME>::g_passes, PassDriver<PassDriverME>::g_passes + PassDriver<PassDriverME>::g_passes_size);
+
+PassDriverME::PassDriverME(CompilationUnit* cu)
+    : PassDriver(), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") {
+  pass_me_data_holder_.bb = nullptr;
+  pass_me_data_holder_.c_unit = cu;
+}
+
+PassDriverME::~PassDriverME() {
+}
+
+void PassDriverME::DispatchPass(const Pass* pass) {
+  VLOG(compiler) << "Dispatching " << pass->GetName();
+  const PassME* me_pass = down_cast<const PassME*>(pass);
+
+  DataFlowAnalysisMode mode = me_pass->GetTraversal();
+
+  switch (mode) {
+    case kPreOrderDFSTraversal:
+      DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kRepeatingPreOrderDFSTraversal:
+      DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kRepeatingPostOrderDFSTraversal:
+      DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kReversePostOrderDFSTraversal:
+      DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kRepeatingReversePostOrderDFSTraversal:
+      DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kPostOrderDOMTraversal:
+      DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kAllNodes:
+      DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass);
+      break;
+    case kNoNodes:
+      break;
+    default:
+      LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode;
+      break;
+  }
+}
+
+bool PassDriverME::RunPass(const Pass* pass, bool time_split) {
+  // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name
+  DCHECK(pass != nullptr);
+  DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
+  CompilationUnit* c_unit = pass_me_data_holder_.c_unit;
+  DCHECK(c_unit != nullptr);
+
+  // Do we perform a time split
+  if (time_split) {
+    c_unit->NewTimingSplit(pass->GetName());
+  }
+
+  // Check the pass gate first.
+  bool should_apply_pass = pass->Gate(&pass_me_data_holder_);
+  if (should_apply_pass) {
+    // Applying the pass: first start, doWork, and end calls.
+    ApplyPass(&pass_me_data_holder_, pass);
+
+    // Do we want to log it?
+    if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
+      // Do we have a pass folder?
+      const PassME* me_pass = (down_cast<const PassME*>(pass));
+      const char* passFolder = me_pass->GetDumpCFGFolder();
+      DCHECK(passFolder != nullptr);
+
+      if (passFolder[0] != 0) {
+        // Create directory prefix.
+        std::string prefix = GetDumpCFGFolder();
+        prefix += passFolder;
+        prefix += "/";
+
+        c_unit->mir_graph->DumpCFG(prefix.c_str(), false);
+      }
+    }
+  }
+
+  // If the pass gate passed, we can declare success.
+  return should_apply_pass;
+}
+
+const char* PassDriverME::GetDumpCFGFolder() const {
+  return dump_cfg_folder_;
+}
+
+
+}  // namespace art
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
new file mode 100644
index 0000000..0142934
--- /dev/null
+++ b/compiler/dex/pass_driver_me.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_H_
+#define ART_COMPILER_DEX_PASS_DRIVER_ME_H_
+
+#include "bb_optimizations.h"
+#include "pass_driver.h"
+#include "pass_me.h"
+
+namespace art {
+
+class PassDriverME: public PassDriver<PassDriverME> {
+ public:
+  explicit PassDriverME(CompilationUnit* cu);
+  ~PassDriverME();
+  /**
+   * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode
+   */
+  void DispatchPass(const Pass* pass);
+  bool RunPass(const Pass* pass, bool time_split = false);
+  const char* GetDumpCFGFolder() const;
+ protected:
+  /** @brief The data holder that contains data needed for the PassDriverME. */
+  PassMEDataHolder pass_me_data_holder_;
+
+  /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */
+  const char* dump_cfg_folder_;
+};
+
+}  // namespace art
+#endif  // ART_COMPILER_DEX_PASS_DRIVER_ME_H_
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
new file mode 100644
index 0000000..069fb45
--- /dev/null
+++ b/compiler/dex/pass_me.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_PASS_ME_H_
+#define ART_COMPILER_DEX_PASS_ME_H_
+
+#include <string>
+#include "pass.h"
+
+namespace art {
+
+// Forward declarations.
+struct BasicBlock;
+struct CompilationUnit;
+class Pass;
+
+/**
+ * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass.
+ * @details Each enum should be a power of 2 to be correctly used.
+ */
+enum OptimizationFlag {
+};
+
+// Data holder class.
+class PassMEDataHolder: public PassDataHolder {
+  public:
+    CompilationUnit* c_unit;
+    BasicBlock* bb;
+};
+
+enum DataFlowAnalysisMode {
+  kAllNodes = 0,                           /**< @brief All nodes. */
+  kPreOrderDFSTraversal,                   /**< @brief Depth-First-Search / Pre-Order. */
+  kRepeatingPreOrderDFSTraversal,          /**< @brief Depth-First-Search / Repeating Pre-Order. */
+  kReversePostOrderDFSTraversal,           /**< @brief Depth-First-Search / Reverse Post-Order. */
+  kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
+  kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
+  kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
+  kTopologicalSortTraversal,               /**< @brief Topological Order traversal. */
+  kRepeatingTopologicalSortTraversal,      /**< @brief Repeating Topological Order traversal. */
+  kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
+};
+
+/**
+ * @class Pass
+ * @brief Pass is the Pass structure for the optimizations.
+ * @details The following structure has the different optimization passes that we are going to do.
+ */
+class PassME: public Pass {
+ public:
+  explicit PassME(const char* name, DataFlowAnalysisMode type = kAllNodes,
+          unsigned int flags = 0u, const char* dump = "")
+    : Pass(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
+  }
+
+  PassME(const char* name, DataFlowAnalysisMode type, const char* dump)
+    : Pass(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
+  }
+
+  PassME(const char* name, const char* dump)
+    : Pass(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
+  }
+
+  ~PassME() {
+  }
+
+  virtual DataFlowAnalysisMode GetTraversal() const {
+    return traversal_type_;
+  }
+
+  const char* GetDumpCFGFolder() const {
+    return dump_cfg_folder_;
+  }
+
+  bool GetFlag(OptimizationFlag flag) const {
+    return (flags_ & flag);
+  }
+
+ protected:
+  /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */
+  const DataFlowAnalysisMode traversal_type_;
+
+  /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */
+  const unsigned int flags_;
+
+  /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */
+  const char* const dump_cfg_folder_;
+};
+}  // namespace art
+#endif  // ART_COMPILER_DEX_PASS_ME_H_
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 2d1c19e..f0a9ca4 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -120,6 +120,7 @@
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                    RegLocation rl_src2);
@@ -127,6 +128,8 @@
                     RegLocation rl_src2);
     void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                     RegLocation rl_src2);
+    void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2, bool is_div);
     RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
     void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 384a008..2556788 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -998,6 +998,15 @@
 #endif
 }
 
+void ArmMir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
+  LOG(FATAL) << "Unexpected use GenNotLong()";
+}
+
+void ArmMir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                           RegLocation rl_src2, bool is_div) {
+  LOG(FATAL) << "Unexpected use GenDivRemLong()";
+}
+
 void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index c3b23fd..6a6b0f6 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -298,6 +298,7 @@
   kA64Mov2rr,        // mov [00101010000] rm[20-16] [000000] [11111] rd[4-0].
   kA64Mvn2rr,        // mov [00101010001] rm[20-16] [000000] [11111] rd[4-0].
   kA64Mul3rrr,       // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0].
+  kA64Msub4rrrr,     // msub[s0011011000] rm[20-16] [1] ra[14-10] rn[9-5] rd[4-0].
   kA64Neg3rro,       // neg alias of "sub arg0, rzr, arg1, arg2".
   kA64Orr3Rrl,       // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
   kA64Orr4rrro,      // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 656f8fd..4a0c055 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -422,6 +422,10 @@
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "mul", "!0r, !1r, !2r", kFixupNone),
+    ENCODING_MAP(WIDE(kA64Msub4rrrr), SF_VARIANTS(0x1b008000),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 14, 10,
+                 kFmtRegR, 20, 16, IS_QUAD_OP | REG_DEF0_USE123,
+                 "msub", "!0r, !1r, !3r, !2r", kFixupNone),
     ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0),
                  kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index f7a0199..2e3ef86 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -94,8 +94,7 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  OpRegRegRegShift(kOpAdd, r_base.GetReg(), r_base.GetReg(), r_disp.GetReg(),
-                   ENCODE_NO_SHIFT, true);
+  OpRegRegRegShift(kOpAdd, r_base, r_base, r_disp, ENCODE_NO_SHIFT);
   NewLIR1(kA64Br1x, r_base.GetReg());
 
   // Loop exit label.
@@ -148,8 +147,7 @@
   tab_rec->anchor = switch_label;
 
   // Add displacement to base branch address and go!
-  OpRegRegRegShift(kOpAdd, branch_reg.GetReg(), branch_reg.GetReg(), disp_reg.GetReg(),
-                   ENCODE_NO_SHIFT, true);
+  OpRegRegRegShift(kOpAdd, branch_reg, branch_reg, disp_reg, ENCODE_NO_SHIFT);
   NewLIR1(kA64Br1x, branch_reg.GetReg());
 
   // branch_over target here
@@ -334,7 +332,7 @@
 
   if (!skip_overflow_check) {
     LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x12);
-    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
+    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
       /* Load stack limit */
       // TODO(Arm64): fix the line below:
@@ -348,7 +346,7 @@
       MarkPossibleStackOverflowException();
     }
   } else if (frame_size_ > 0) {
-    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_, /*is_wide*/true);
+    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
   }
 
   /* Need to spill any FP regs? */
@@ -391,7 +389,7 @@
     UnSpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
   }
 
-  OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_, /*is_wide*/true);
+  OpRegImm64(kOpAdd, rs_rA64_SP, frame_size_);
   NewLIR0(kA64Ret);
 }
 
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 350e483..fddbfd7 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -93,6 +93,8 @@
     RegisterClass RegClassForFieldLoadStore(OpSize size, bool is_volatile) OVERRIDE;
 
     // Required for target - Dalvik-level generators.
+    void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                        RegLocation lr_shift);
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_src2);
     void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
@@ -120,6 +122,8 @@
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
+    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                    RegLocation rl_src2);
@@ -127,6 +131,8 @@
                     RegLocation rl_src2);
     void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                     RegLocation rl_src2);
+    void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2, bool is_div);
     RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
     void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
@@ -170,7 +176,7 @@
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
     void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
-    LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide);
+    LIR* OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value);
     LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
     LIR* OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2);
@@ -191,8 +197,8 @@
 
     LIR* LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size);
     LIR* StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, OpSize size);
-    LIR* OpRegRegRegShift(OpKind op, int r_dest, int r_src1, int r_src2, int shift,
-                          bool is_wide = false);
+    LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
+                          int shift);
     LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     static const ArmEncodingMap EncodingMap[kA64Last];
     int EncodeShift(int code, int amount);
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index b0f5904..38f110e 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -53,10 +53,36 @@
   rl_result = EvalLoc(rl_dest, kCoreReg, true);
 
   OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
-  NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
-  NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
+  NewLIR4(WIDE(kA64Csinc4rrrc), rl_result.reg.GetReg(), rxzr, rxzr, kArmCondEq);
+  NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_result.reg.GetReg(),
           rl_result.reg.GetReg(), kArmCondLe);
-  StoreValue(rl_dest, rl_result);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_shift) {
+  OpKind op = kOpBkpt;
+  switch (opcode) {
+  case Instruction::SHL_LONG:
+  case Instruction::SHL_LONG_2ADDR:
+    op = kOpLsl;
+    break;
+  case Instruction::SHR_LONG:
+  case Instruction::SHR_LONG_2ADDR:
+    op = kOpAsr;
+    break;
+  case Instruction::USHR_LONG:
+  case Instruction::USHR_LONG_2ADDR:
+    op = kOpLsr;
+    break;
+  default:
+    LOG(FATAL) << "Unexpected case: " << opcode;
+  }
+  rl_shift = LoadValueWide(rl_shift, kCoreReg);
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegReg(op, rl_result.reg, rl_src1.reg, rl_shift.reg);
+  StoreValueWide(rl_dest, rl_result);
 }
 
 void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
@@ -69,7 +95,7 @@
     LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0);
     branch->target = taken;
   } else {
-    OpRegImm64(kOpCmp, rl_src1.reg, val, /*is_wide*/true);
+    OpRegImm64(kOpCmp, rl_src1.reg, val);
     OpCondBranch(ccode, taken);
   }
 }
@@ -219,7 +245,8 @@
   ArmConditionCode arm_cond = ArmConditionEncoding(cond);
   if (check_value == 0 && (arm_cond == kArmCondEq || arm_cond == kArmCondNe)) {
     ArmOpcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
-    branch = NewLIR2(opcode, reg.GetReg(), 0);
+    ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
+    branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
   } else {
     OpRegImm(kOpCmp, reg, check_value);
     branch = NewLIR2(kA64B2ct, arm_cond, 0);
@@ -354,19 +381,16 @@
   NewLIR4(kA64Smaddl4xwwx, r_lo.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
   switch (pattern) {
     case Divide3:
-      OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi.GetReg(),
-               rl_src.reg.GetReg(), EncodeShift(kA64Asr, 31));
+      OpRegRegRegShift(kOpSub, rl_result.reg, r_hi, rl_src.reg, EncodeShift(kA64Asr, 31));
       break;
     case Divide5:
       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
-               EncodeShift(kA64Asr, magic_table[lit].shift));
+      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
       break;
     case Divide7:
       OpRegReg(kOpAdd, r_hi, rl_src.reg);
       OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31);
-      OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(),
-               EncodeShift(kA64Asr, magic_table[lit].shift));
+      OpRegRegRegShift(kOpRsub, rl_result.reg, r_lo, r_hi, EncodeShift(kA64Asr, magic_table[lit].shift));
       break;
     default:
       LOG(FATAL) << "Unexpected pattern: " << pattern;
@@ -405,25 +429,30 @@
   return rl_result;
 }
 
-RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
+RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
                                   bool is_div) {
+  CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
+
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_div) {
-    // Simple case, use sdiv instruction.
-    OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2);
+    OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2);
   } else {
-    // Remainder case, use the following code:
-    // temp = reg1 / reg2      - integer division
-    // temp = temp * reg2
-    // dest = reg1 - temp
-
-    RegStorage temp = AllocTemp();
-    OpRegRegReg(kOpDiv, temp, reg1, reg2);
-    OpRegReg(kOpMul, temp, reg2);
-    OpRegRegReg(kOpSub, rl_result.reg, reg1, temp);
+    // temp = r_src1 / r_src2
+    // dest = r_src1 - temp * r_src2
+    RegStorage temp;
+    ArmOpcode wide;
+    if (rl_result.reg.Is64Bit()) {
+      temp = AllocTempWide();
+      wide = WIDE(0);
+    } else {
+      temp = AllocTemp();
+      wide = UNWIDE(0);
+    }
+    OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
+    NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
+            r_src1.GetReg(), r_src2.GetReg());
     FreeTemp(temp);
   }
-
   return rl_result;
 }
 
@@ -684,17 +713,14 @@
 void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
                                                RegLocation rl_result, int lit,
                                                int first_bit, int second_bit) {
-  OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
-                   EncodeShift(kA64Lsl, second_bit - first_bit));
+  OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, EncodeShift(kA64Lsl, second_bit - first_bit));
   if (first_bit != 0) {
     OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
   }
 }
 
 void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
-  DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
-  OpRegImm64(kOpCmp, reg, 0, /*is_wide*/true);
-  GenDivZeroCheck(kCondEq);
+  LOG(FATAL) << "Unexpected use of GenDivZero for Arm64";
 }
 
 // Test suspend flag, return target of taken suspend branch
@@ -756,33 +782,51 @@
 #endif
 }
 
-void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
-  rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage z_reg = AllocTemp();
-  LoadConstantNoClobber(z_reg, 0);
-  // Check for destructive overlap
-  if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
-    RegStorage t_reg = AllocTemp();
-    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
-    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg);
-    FreeTemp(t_reg);
-  } else {
-    OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
-    OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh());
-  }
-  FreeTemp(z_reg);
+void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+
+  rl_src = LoadValue(rl_src, kCoreReg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 31);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
+                                 RegLocation rl_src1, RegLocation rl_src2, bool is_div) {
+  RegLocation rl_result;
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+  GenDivZeroCheck(rl_src2.reg);
+  rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
   StoreValueWide(rl_dest, rl_result);
 }
 
 void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
                              RegLocation rl_src2) {
   RegLocation rl_result;
+
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  OpRegRegRegShift(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg(),
-                   ENCODE_NO_SHIFT, /*is_wide*/ true);
+  OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation rl_result;
+
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -865,8 +909,7 @@
     } else {
       // No special indexed operation, lea + load w/ displacement
       reg_ptr = AllocTemp();
-      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
-                       EncodeShift(kA64Lsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
       FreeTemp(rl_index.reg);
     }
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -971,8 +1014,7 @@
       rl_src = LoadValue(rl_src, reg_class);
     }
     if (!constant_index) {
-      OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(),
-                       EncodeShift(kA64Lsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
     }
     if (needs_range_check) {
       if (constant_index) {
@@ -1004,167 +1046,84 @@
   }
 }
 
-
 void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
-  // TODO(Arm64): check this.
-  UNIMPLEMENTED(WARNING);
-
-  rl_src = LoadValueWide(rl_src, kCoreReg);
+  OpKind op = kOpBkpt;
   // Per spec, we only care about low 6 bits of shift amount.
   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
+  rl_src = LoadValueWide(rl_src, kCoreReg);
   if (shift_amount == 0) {
     StoreValueWide(rl_dest, rl_src);
     return;
   }
-  if (BadOverlap(rl_src, rl_dest)) {
-    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
-    return;
-  }
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   switch (opcode) {
     case Instruction::SHL_LONG:
     case Instruction::SHL_LONG_2ADDR:
-      if (shift_amount == 1) {
-        OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow());
-        OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh());
-      } else if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg);
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else if (shift_amount > 31) {
-        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else {
-        OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetLowReg(),
-                         EncodeShift(kA64Lsr, 32 - shift_amount));
-        OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount);
-      }
+      op = kOpLsl;
       break;
     case Instruction::SHR_LONG:
     case Instruction::SHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
-      } else if (shift_amount > 31) {
-        OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
-        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31);
-      } else {
-        RegStorage t_reg = AllocTemp();
-        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
-                         EncodeShift(kA64Lsl, 32 - shift_amount));
-        FreeTemp(t_reg);
-        OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
-      }
+      op = kOpAsr;
       break;
     case Instruction::USHR_LONG:
     case Instruction::USHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else if (shift_amount > 31) {
-        OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else {
-        RegStorage t_reg = AllocTemp();
-        OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount);
-        OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(),
-                         EncodeShift(kA64Lsl, 32 - shift_amount));
-        FreeTemp(t_reg);
-        OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount);
-      }
+      op = kOpLsr;
       break;
     default:
       LOG(FATAL) << "Unexpected case";
   }
+  OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
   StoreValueWide(rl_dest, rl_result);
 }
 
 void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                      RegLocation rl_src1, RegLocation rl_src2) {
-  // TODO(Arm64): implement this.
-  UNIMPLEMENTED(WARNING);
-
-  if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
+  if ((opcode == Instruction::SUB_LONG) || (opcode == Instruction::SUB_LONG_2ADDR)) {
     if (!rl_src2.is_const) {
-      // Don't bother with special handling for subtract from immediate.
-      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
-      return;
+      return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
     }
   } else {
-    // Normalize
+    // Associativity.
     if (!rl_src2.is_const) {
       DCHECK(rl_src1.is_const);
       std::swap(rl_src1, rl_src2);
     }
   }
-  if (BadOverlap(rl_src1, rl_dest)) {
-    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
-    return;
-  }
   DCHECK(rl_src2.is_const);
-  // TODO(Arm64): implement this.
-  //  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
-  int32_t mod_imm_lo = -1;  // ModifiedImmediate(val_lo);
-  int32_t mod_imm_hi = -1;  // ModifiedImmediate(val_hi);
 
-  // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
+  OpKind op = kOpBkpt;
+  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+
   switch (opcode) {
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
+      op = kOpAdd;
+      break;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
-      if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
-        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
-        return;
-      }
-      break;
-    default:
-      break;
-  }
-  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
-  switch (opcode) {
-#if 0
-    case Instruction::ADD_LONG:
-    case Instruction::ADD_LONG_2ADDR:
-      NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
-      NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
-      break;
-    case Instruction::OR_LONG:
-    case Instruction::OR_LONG_2ADDR:
-      if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
-        OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
-      }
-      if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
-        OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
-      }
-      break;
-    case Instruction::XOR_LONG:
-    case Instruction::XOR_LONG_2ADDR:
-      OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
-      OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
+      op = kOpSub;
       break;
     case Instruction::AND_LONG:
     case Instruction::AND_LONG_2ADDR:
-      if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) {
-        OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo);
-      }
-      if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) {
-        OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi);
-      }
+      op = kOpAnd;
       break;
-    case Instruction::SUB_LONG_2ADDR:
-    case Instruction::SUB_LONG:
-      NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo);
-      NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi);
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      op = kOpOr;
       break;
-#endif
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      op = kOpXor;
+      break;
     default:
-      LOG(FATAL) << "Unexpected opcode " << opcode;
+      LOG(FATAL) << "Unexpected opcode";
   }
+
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  OpRegRegImm(op, rl_result.reg, rl_src1.reg, val);
   StoreValueWide(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 2b1c5e8..808060d 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -606,7 +606,7 @@
   GrowableArray<RegisterInfo*>::Iterator fp_it(&reg_pool_->sp_regs_);
   for (RegisterInfo* info = fp_it.Next(); info != nullptr; info = fp_it.Next()) {
     int fp_reg_num = info->GetReg().GetRegNum();
-    RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | fp_reg_num);
+    RegStorage dp_reg = RegStorage::FloatSolo64(fp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
     // Double precision register's master storage should refer to itself.
     DCHECK_EQ(dp_reg_info, dp_reg_info->Master());
@@ -616,6 +616,20 @@
     DCHECK_EQ(info->StorageMask(), 0x1U);
   }
 
+  // Alias 32bit W registers to corresponding 64bit X registers.
+  GrowableArray<RegisterInfo*>::Iterator w_it(&reg_pool_->core_regs_);
+  for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) {
+    int x_reg_num = info->GetReg().GetRegNum();
+    RegStorage x_reg = RegStorage::Solo64(x_reg_num);
+    RegisterInfo* x_reg_info = GetRegInfo(x_reg);
+    // 64bit X register's master storage should refer to itself.
+    DCHECK_EQ(x_reg_info, x_reg_info->Master());
+    // Redirect 32bit W master storage to 64bit X.
+    info->SetMaster(x_reg_info);
+    // 32bit W should show a single 32-bit mask bit, at first referring to the low half.
+    DCHECK_EQ(info->StorageMask(), 0x1U);
+  }
+
   // TODO: re-enable this when we can safely save r4 over the suspension code path.
   bool no_suspend = NO_SUSPEND;  // || !Runtime::Current()->ExplicitSuspendChecks();
   if (no_suspend) {
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 39e9fad..eca0d2f 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -408,7 +408,7 @@
       DCHECK_EQ(shift, ENCODE_NO_SHIFT);
       return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
     default:
-      return OpRegRegRegShift(op, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift);
+      return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
   }
 
   DCHECK(!IsPseudoLirOp(opcode));
@@ -445,8 +445,8 @@
   return NULL;
 }
 
-LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1,
-                                    int r_src2, int shift, bool is_wide) {
+LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
+                                    RegStorage r_src2, int shift) {
   ArmOpcode opcode = kA64Brk1d;
 
   switch (op) {
@@ -500,21 +500,24 @@
   // The instructions above belong to two kinds:
   // - 4-operands instructions, where the last operand is a shift/extend immediate,
   // - 3-operands instructions with no shift/extend.
-  ArmOpcode widened_opcode = (is_wide) ? WIDE(opcode) : opcode;
+  ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
+  CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
+  CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
   if (EncodingMap[opcode].flags & IS_QUAD_OP) {
     DCHECK_EQ(shift, ENCODE_NO_SHIFT);
-    return NewLIR4(widened_opcode, r_dest, r_src1, r_src2, shift);
+    return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
   } else {
     DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
     DCHECK_EQ(shift, ENCODE_NO_SHIFT);
-    return NewLIR3(widened_opcode, r_dest, r_src1, r_src2);
+    return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
   }
 }
 
 LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
-  return OpRegRegRegShift(op, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), ENCODE_NO_SHIFT);
+  return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
 }
 
+// Should be taking an int64_t value ?
 LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
   LIR* res;
   bool neg = (value < 0);
@@ -523,6 +526,7 @@
   ArmOpcode alt_opcode = kA64Brk1d;
   int32_t log_imm = -1;
   bool is_wide = r_dest.Is64Bit();
+  CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
   ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
 
   switch (op) {
@@ -610,11 +614,11 @@
 }
 
 LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
-  return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value), /*is_wide*/false);
+  return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value));
 }
 
-LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value, bool is_wide) {
-  ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
+LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) {
+  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
   ArmOpcode opcode = kA64Brk1d;
   ArmOpcode neg_opcode = kA64Brk1d;
   bool shift;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index de55a05..7e3c8ce 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1595,7 +1595,7 @@
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
       OpRegReg(op, rl_result.reg, rl_src1.reg);
     } else {
-      if (shift_op) {
+      if ((shift_op) && (cu_->instruction_set != kArm64)) {
         rl_src2 = LoadValue(rl_src2, kCoreReg);
         RegStorage t_reg = AllocTemp();
         OpRegRegImm(kOpAnd, t_reg, rl_src2.reg, 31);
@@ -1613,7 +1613,7 @@
     StoreValue(rl_dest, rl_result);
   } else {
     bool done = false;      // Set to true if we happen to find a way to use a real instruction.
-    if (cu_->instruction_set == kMips) {
+    if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) {
       rl_src1 = LoadValue(rl_src1, kCoreReg);
       rl_src2 = LoadValue(rl_src2, kCoreReg);
       if (check_zero) {
@@ -1889,7 +1889,7 @@
       }
 
       bool done = false;
-      if (cu_->instruction_set == kMips) {
+      if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) {
         rl_src = LoadValue(rl_src, kCoreReg);
         rl_result = GenDivRemLit(rl_dest, rl_src.reg, lit, is_div);
         done = true;
@@ -1952,6 +1952,10 @@
 
   switch (opcode) {
     case Instruction::NOT_LONG:
+      if (cu->instruction_set == kArm64) {
+        mir_to_lir->GenNotLong(rl_dest, rl_src2);
+        return;
+      }
       rl_src2 = mir_to_lir->LoadValueWide(rl_src2, kCoreReg);
       rl_result = mir_to_lir->EvalLoc(rl_dest, kCoreReg, true);
       // Check for destructive overlap
@@ -1998,6 +2002,10 @@
       break;
     case Instruction::DIV_LONG:
     case Instruction::DIV_LONG_2ADDR:
+      if (cu->instruction_set == kArm64) {
+        mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
+        return;
+      }
       call_out = true;
       check_zero = true;
       ret_reg = mir_to_lir->TargetReg(kRet0).GetReg();
@@ -2005,6 +2013,10 @@
       break;
     case Instruction::REM_LONG:
     case Instruction::REM_LONG_2ADDR:
+      if (cu->instruction_set == kArm64) {
+        mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
+        return;
+      }
       call_out = true;
       check_zero = true;
       func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pLmod);
@@ -2014,7 +2026,8 @@
       break;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
-      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64) {
+      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64 ||
+          cu->instruction_set == kArm64) {
         return mir_to_lir->GenAndLong(opcode, rl_dest, rl_src1, rl_src2);
       }
       first_op = kOpAnd;
@@ -2022,7 +2035,8 @@
       break;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
-      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64) {
+      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64 ||
+          cu->instruction_set == kArm64) {
         mir_to_lir->GenOrLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
@@ -2031,7 +2045,8 @@
       break;
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
-      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64) {
+      if (cu->instruction_set == kX86 || cu->instruction_set == kX86_64 ||
+          cu->instruction_set == kArm64) {
         mir_to_lir->GenXorLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 2b57b35..e462173 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -118,6 +118,7 @@
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                    RegLocation rl_src2);
@@ -125,6 +126,8 @@
                     RegLocation rl_src2);
     void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                     RegLocation rl_src2);
+    void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2, bool is_div);
     RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
     void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 55e93d7..beaf6bb 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -431,6 +431,15 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void MipsMir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
+  LOG(FATAL) << "Unexpected use GenNotLong()";
+}
+
+void MipsMir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                           RegLocation rl_src2, bool is_div) {
+  LOG(FATAL) << "Unexpected use GenDivRemLong()";
+}
+
 void MipsMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 3584c33..4cebb7c 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -775,7 +775,7 @@
                              RegLocation rl_src2, LIR* taken, LIR* fall_through);
     void GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src,
                                  LIR* taken, LIR* fall_through);
-    void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
+    virtual void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
     void GenIntNarrowing(Instruction::Code opcode, RegLocation rl_dest,
                          RegLocation rl_src);
     void GenNewArray(uint32_t type_idx, RegLocation rl_dest,
@@ -800,7 +800,7 @@
     void GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_src);
     void GenLong3Addr(OpKind first_op, OpKind second_op, RegLocation rl_dest,
                       RegLocation rl_src1, RegLocation rl_src2);
-    void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+    virtual void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                         RegLocation rl_src1, RegLocation rl_shift);
     void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src, int lit);
@@ -1170,6 +1170,7 @@
     virtual bool GenInlinedSqrt(CallInfo* info) = 0;
     virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
     virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0;
+    virtual void GenNotLong(RegLocation rl_dest, RegLocation rl_src) = 0;
     virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0;
     virtual void GenOrLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) = 0;
@@ -1177,6 +1178,8 @@
                             RegLocation rl_src2) = 0;
     virtual void GenXorLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
+    virtual void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2, bool is_div) = 0;
     virtual RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
                                   bool is_div) = 0;
     virtual RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit,
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 2c51c1f..8c0f2bb 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -447,8 +447,11 @@
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
-    // TODO: add 64-bit core pool similar to above.
-    reg = FindLiveReg(reg_pool_->core_regs_, s_reg);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      reg = FindLiveReg(wide ? reg_pool_->core64_regs_ : reg_pool_->core_regs_, s_reg);
+    } else {
+      reg = FindLiveReg(reg_pool_->core_regs_, s_reg);
+    }
   }
   if (reg.Valid()) {
     if (wide && !reg.IsFloat() && !Is64BitInstructionSet(cu_->instruction_set)) {
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 3070edd..72cdbbd 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -118,6 +118,7 @@
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
+    void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                    RegLocation rl_src2);
@@ -125,6 +126,8 @@
                     RegLocation rl_src2);
     void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                     RegLocation rl_src2);
+    void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                       RegLocation rl_src2, bool is_div);
     // TODO: collapse reg_lo, reg_hi
     RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index a6ccc99..48bff6e 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1372,6 +1372,15 @@
   GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
 }
 
+void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
+  LOG(FATAL) << "Unexpected use GenNotLong()";
+}
+
+void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
+                           RegLocation rl_src2, bool is_div) {
+  LOG(FATAL) << "Unexpected use GenDivRemLong()";
+}
+
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
   RegLocation rl_result = ForceTempWide(rl_src);
@@ -2189,6 +2198,10 @@
           }
         }
         rl_rhs = LoadValue(rl_rhs, kCoreReg);
+        // It might happen rl_rhs and rl_dest are the same VR
+        // in this case rl_dest is in reg after LoadValue while
+        // rl_result is not updated yet, so do this
+        rl_result = UpdateLocTyped(rl_dest, kCoreReg);
         if (rl_result.location != kLocPhysReg) {
           // Okay, we can do this into memory.
           OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index 3387c50..df21343 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -312,7 +312,7 @@
       case k256BitSolo: return 32;
       case k512BitSolo: return 64;
       case k1024BitSolo: return 128;
-      default: LOG(FATAL) << "Unexpected shap";
+      default: LOG(FATAL) << "Unexpected shape";
     }
     return 0;
   }
diff --git a/compiler/output_stream.h b/compiler/output_stream.h
index 478a854..97ccc2c 100644
--- a/compiler/output_stream.h
+++ b/compiler/output_stream.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OUTPUT_STREAM_H_
 
 #include <stdint.h>
+#include <sys/types.h>
 
 #include <string>
 
diff --git a/dalvikvm/Android.mk b/dalvikvm/Android.mk
index 0ded2d8..03d32f0 100644
--- a/dalvikvm/Android.mk
+++ b/dalvikvm/Android.mk
@@ -51,6 +51,7 @@
 LOCAL_LDFLAGS := -ldl -lpthread
 LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.mk
 LOCAL_IS_HOST_MODULE := true
+include external/libcxx/libcxx.mk
 include $(BUILD_HOST_EXECUTABLE)
 ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 9914875..f0b5750 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -33,7 +33,7 @@
 #include "compiler.h"
 #include "compiler_callbacks.h"
 #include "dex_file-inl.h"
-#include "dex/pass_driver.h"
+#include "dex/pass_driver_me.h"
 #include "dex/verification_results.h"
 #include "driver/compiler_callbacks_impl.h"
 #include "driver/compiler_driver.h"
@@ -918,10 +918,10 @@
     } else if (option == "--no-profile-file") {
       // No profile
     } else if (option == "--print-pass-names") {
-      PassDriver::PrintPassNames();
+      PassDriverME::PrintPassNames();
     } else if (option.starts_with("--disable-passes=")) {
       std::string disable_passes = option.substr(strlen("--disable-passes=")).data();
-      PassDriver::CreateDefaultPassList(disable_passes);
+      PassDriverME::CreateDefaultPassList(disable_passes);
     } else {
       Usage("Unknown argument %s", option.data());
     }
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index dd4e9d5..814323c 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -87,8 +87,8 @@
 
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+  include external/libcxx/libcxx.mk
   ifeq ($$(art_target_or_host),target)
-    include external/libcxx/libcxx.mk
     LOCAL_SHARED_LIBRARIES += libcutils libvixl
     include $(BUILD_SHARED_LIBRARY)
   else # host
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index dcae502..d38f37f 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -76,6 +76,13 @@
           "      Example: --boot-image=/system/framework/boot.art\n"
           "\n");
   fprintf(stderr,
+          "  --instruction-set=(arm|arm64|mips|x86|x86_64): for locating the image file based on the image location\n"
+          "      set.\n"
+          "      Example: --instruction-set=x86\n"
+          "      Default: %s\n"
+          "\n",
+          GetInstructionSetString(kRuntimeISA));
+  fprintf(stderr,
           "  --output=<file> may be used to send the output to a file.\n"
           "      Example: --output=/tmp/oatdump.txt\n"
           "\n");
@@ -1461,8 +1468,9 @@
   }
 
   const char* oat_filename = NULL;
-  const char* image_filename = NULL;
-  const char* boot_image_filename = NULL;
+  const char* image_location = NULL;
+  const char* boot_image_location = NULL;
+  InstructionSet instruction_set = kRuntimeISA;
   std::string elf_filename_prefix;
   std::ostream* os = &std::cout;
   std::unique_ptr<std::ofstream> out;
@@ -1474,9 +1482,22 @@
     if (option.starts_with("--oat-file=")) {
       oat_filename = option.substr(strlen("--oat-file=")).data();
     } else if (option.starts_with("--image=")) {
-      image_filename = option.substr(strlen("--image=")).data();
+      image_location = option.substr(strlen("--image=")).data();
     } else if (option.starts_with("--boot-image=")) {
-      boot_image_filename = option.substr(strlen("--boot-image=")).data();
+      boot_image_location = option.substr(strlen("--boot-image=")).data();
+    } else if (option.starts_with("--instruction-set=")) {
+      StringPiece instruction_set_str = option.substr(strlen("--instruction-set=")).data();
+      if (instruction_set_str == "arm") {
+        instruction_set = kThumb2;
+      } else if (instruction_set_str == "arm64") {
+        instruction_set = kArm64;
+      } else if (instruction_set_str == "mips") {
+        instruction_set = kMips;
+      } else if (instruction_set_str == "x86") {
+        instruction_set = kX86;
+      } else if (instruction_set_str == "x86_64") {
+        instruction_set = kX86_64;
+      }
     } else if (option.starts_with("--dump:")) {
         if (option == "--dump:raw_mapping_table") {
           dump_raw_mapping_table = true;
@@ -1500,12 +1521,12 @@
     }
   }
 
-  if (image_filename == NULL && oat_filename == NULL) {
+  if (image_location == NULL && oat_filename == NULL) {
     fprintf(stderr, "Either --image or --oat must be specified\n");
     return EXIT_FAILURE;
   }
 
-  if (image_filename != NULL && oat_filename != NULL) {
+  if (image_location != NULL && oat_filename != NULL) {
     fprintf(stderr, "Either --image or --oat must be specified but not both\n");
     return EXIT_FAILURE;
   }
@@ -1533,16 +1554,19 @@
   NoopCompilerCallbacks callbacks;
   options.push_back(std::make_pair("compilercallbacks", &callbacks));
 
-  if (boot_image_filename != NULL) {
+  if (boot_image_location != NULL) {
     boot_image_option += "-Ximage:";
-    boot_image_option += boot_image_filename;
+    boot_image_option += boot_image_location;
     options.push_back(std::make_pair(boot_image_option.c_str(), reinterpret_cast<void*>(NULL)));
   }
-  if (image_filename != NULL) {
+  if (image_location != NULL) {
     image_option += "-Ximage:";
-    image_option += image_filename;
+    image_option += image_location;
     options.push_back(std::make_pair(image_option.c_str(), reinterpret_cast<void*>(NULL)));
   }
+  options.push_back(
+      std::make_pair("imageinstructionset",
+                     reinterpret_cast<const void*>(GetInstructionSetString(instruction_set))));
 
   if (!Runtime::Create(options, false)) {
     fprintf(stderr, "Failed to create runtime\n");
@@ -1558,7 +1582,7 @@
   CHECK(image_space != NULL);
   const ImageHeader& image_header = image_space->GetImageHeader();
   if (!image_header.IsValid()) {
-    fprintf(stderr, "Invalid image header %s\n", image_filename);
+    fprintf(stderr, "Invalid image header %s\n", image_location);
     return EXIT_FAILURE;
   }
   ImageDumper image_dumper(os, *image_space, image_header,
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 1521caa..c2507b1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -308,6 +308,12 @@
   LIBART_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
 endif
 
+ifeq ($(MALLOC_IMPL),jemalloc)
+  LIBART_CFLAGS += -DUSE_JEMALLOC
+else
+  LIBART_CFLAGS += -DUSE_DLMALLOC
+endif
+
 # $(1): target or host
 # $(2): ndebug or debug
 # $(3): true or false for LOCAL_CLANG
@@ -397,12 +403,8 @@
   endif
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES)
   LOCAL_SHARED_LIBRARIES += liblog libnativehelper
-  ifeq ($$(art_target_or_host),target)
-    include external/libcxx/libcxx.mk
-    LOCAL_SHARED_LIBRARIES += libbacktrace_libc++
-  else
-    LOCAL_SHARED_LIBRARIES += libbacktrace
-  endif
+  include external/libcxx/libcxx.mk
+  LOCAL_SHARED_LIBRARIES += libbacktrace_libc++
   ifeq ($$(art_target_or_host),target)
     LOCAL_SHARED_LIBRARIES += libcutils libdl libselinux libutils
     LOCAL_STATIC_LIBRARIES := libziparchive libz
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 2a5c7d1..cb9f53b 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -84,12 +84,6 @@
 // Double-precision FP arithmetics.
 extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
 
-// Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR]
-extern "C" int64_t art_quick_mul_long(int64_t, int64_t);
-extern "C" uint64_t art_quick_shl_long(uint64_t, uint32_t);
-extern "C" uint64_t art_quick_shr_long(uint64_t, uint32_t);
-extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
-
 // Intrinsic entrypoints.
 extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
@@ -199,10 +193,10 @@
   qpoints->pF2l = NULL;
   qpoints->pLdiv = NULL;
   qpoints->pLmod = NULL;
-  qpoints->pLmul = art_quick_mul_long;
-  qpoints->pShlLong = art_quick_shl_long;
-  qpoints->pShrLong = art_quick_shr_long;
-  qpoints->pUshrLong = art_quick_ushr_long;
+  qpoints->pLmul = NULL;
+  qpoints->pShlLong = NULL;
+  qpoints->pShrLong = NULL;
+  qpoints->pUshrLong = NULL;
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index ac922dd..7f31fb6 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1611,10 +1611,6 @@
 UNIMPLEMENTED art_quick_instrumentation_entry
 UNIMPLEMENTED art_quick_instrumentation_exit
 UNIMPLEMENTED art_quick_deoptimize
-UNIMPLEMENTED art_quick_mul_long
-UNIMPLEMENTED art_quick_shl_long
-UNIMPLEMENTED art_quick_shr_long
-UNIMPLEMENTED art_quick_ushr_long
 UNIMPLEMENTED art_quick_indexof
 
    /*
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 26cd864..9f36927 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -40,10 +40,9 @@
 
 namespace art {
 
-static Mutex modify_ldt_lock("modify_ldt lock");
-
 void Thread::InitCpu() {
-  MutexLock mu(Thread::Current(), modify_ldt_lock);
+  // Take the ldt lock, Thread::Current isn't yet established.
+  MutexLock mu(nullptr, *Locks::modify_ldt_lock_);
 
   const uintptr_t base = reinterpret_cast<uintptr_t>(this);
   const size_t limit = kPageSize;
@@ -138,7 +137,7 @@
 }
 
 void Thread::CleanupCpu() {
-  MutexLock mu(Thread::Current(), modify_ldt_lock);
+  MutexLock mu(this, *Locks::modify_ldt_lock_);
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index adf4c66..6c415e7 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -132,9 +132,21 @@
   // TODO: tighten this check.
   if (kDebugLocking) {
     Runtime* runtime = Runtime::Current();
-    CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDownLocked() ||
-          level == kDefaultMutexLevel  || level == kRuntimeShutdownLock ||
-          level == kThreadListLock || level == kLoggingLock || level == kAbortLock);
+    CHECK(runtime == nullptr || !runtime->IsStarted() || runtime->IsShuttingDownLocked() ||
+          // Used during thread creation to avoid races with runtime shutdown. Thread::Current not
+          // yet established.
+          level == kRuntimeShutdownLock ||
+          // Thread Ids are allocated/released before threads are established.
+          level == kAllocatedThreadIdsLock ||
+          // Thread LDT's are initialized without Thread::Current established.
+          level == kModifyLdtLock ||
+          // Threads are unregistered while holding the thread list lock, during this process they
+          // no longer exist and so we expect an unlock with no self.
+          level == kThreadListLock ||
+          // Ignore logging which may or may not have set up thread data structures.
+          level == kLoggingLock ||
+          // Avoid recursive death.
+          level == kAbortLock);
   }
 }
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 6f7f2c1..705be40 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -30,10 +30,12 @@
 namespace art {
 
 Mutex* Locks::abort_lock_ = nullptr;
+Mutex* Locks::allocated_thread_ids_lock_ = nullptr;
 Mutex* Locks::breakpoint_lock_ = nullptr;
 ReaderWriterMutex* Locks::classlinker_classes_lock_ = nullptr;
 ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr;
 Mutex* Locks::logging_lock_ = nullptr;
+Mutex* Locks::modify_ldt_lock_ = nullptr;
 ReaderWriterMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
@@ -814,7 +816,13 @@
 void Locks::Init() {
   if (logging_lock_ != nullptr) {
     // Already initialized.
+    if (kRuntimeISA == kX86) {
+      DCHECK(modify_ldt_lock_ != nullptr);
+    } else {
+      DCHECK(modify_ldt_lock_ == nullptr);
+    }
     DCHECK(abort_lock_ != nullptr);
+    DCHECK(allocated_thread_ids_lock_ != nullptr);
     DCHECK(breakpoint_lock_ != nullptr);
     DCHECK(classlinker_classes_lock_ != nullptr);
     DCHECK(heap_bitmap_lock_ != nullptr);
@@ -827,32 +835,76 @@
     DCHECK(unexpected_signal_lock_ != nullptr);
     DCHECK(intern_table_lock_ != nullptr);
   } else {
-    logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
-    abort_lock_ = new Mutex("abort lock", kAbortLock, true);
+    // Create global locks in level order from highest lock level to lowest.
+    LockLevel current_lock_level = kMutatorLock;
+    DCHECK(mutator_lock_ == nullptr);
+    mutator_lock_ = new ReaderWriterMutex("mutator lock", current_lock_level);
 
+    #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
+        DCHECK_LT(new_level, current_lock_level); \
+        current_lock_level = new_level;
+
+    UPDATE_CURRENT_LOCK_LEVEL(kHeapBitmapLock);
+    DCHECK(heap_bitmap_lock_ == nullptr);
+    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kRuntimeShutdownLock);
+    DCHECK(runtime_shutdown_lock_ == nullptr);
+    runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kProfilerLock);
+    DCHECK(profiler_lock_ == nullptr);
+    profiler_lock_ = new Mutex("profiler lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kTraceLock);
+    DCHECK(trace_lock_ == nullptr);
+    trace_lock_ = new Mutex("trace lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kThreadListLock);
+    DCHECK(thread_list_lock_ == nullptr);
+    thread_list_lock_ = new Mutex("thread list lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kBreakpointLock);
     DCHECK(breakpoint_lock_ == nullptr);
-    breakpoint_lock_ = new Mutex("breakpoint lock", kBreakpointLock);
+    breakpoint_lock_ = new Mutex("breakpoint lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kClassLinkerClassesLock);
     DCHECK(classlinker_classes_lock_ == nullptr);
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
-                                                      kClassLinkerClassesLock);
-    DCHECK(heap_bitmap_lock_ == nullptr);
-    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock);
-    DCHECK(mutator_lock_ == nullptr);
-    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
-    DCHECK(runtime_shutdown_lock_ == nullptr);
-    runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", kRuntimeShutdownLock);
-    DCHECK(thread_list_lock_ == nullptr);
-    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
-    DCHECK(thread_suspend_count_lock_ == nullptr);
-    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
-    DCHECK(trace_lock_ == nullptr);
-    trace_lock_ = new Mutex("trace lock", kTraceLock);
-    DCHECK(profiler_lock_ == nullptr);
-    profiler_lock_ = new Mutex("profiler lock", kProfilerLock);
-    DCHECK(unexpected_signal_lock_ == nullptr);
-    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
+                                                      current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kAllocatedThreadIdsLock);
+    DCHECK(allocated_thread_ids_lock_ == nullptr);
+    allocated_thread_ids_lock_ =  new Mutex("allocated thread ids lock", current_lock_level);
+
+    if (kRuntimeISA == kX86) {
+      UPDATE_CURRENT_LOCK_LEVEL(kModifyLdtLock);
+      DCHECK(modify_ldt_lock_ == nullptr);
+      modify_ldt_lock_ = new Mutex("modify_ldt lock", current_lock_level);
+    }
+
+    UPDATE_CURRENT_LOCK_LEVEL(kInternTableLock);
     DCHECK(intern_table_lock_ == nullptr);
-    intern_table_lock_ = new Mutex("InternTable lock", kInternTableLock);
+    intern_table_lock_ = new Mutex("InternTable lock", current_lock_level);
+
+
+    UPDATE_CURRENT_LOCK_LEVEL(kAbortLock);
+    DCHECK(abort_lock_ == nullptr);
+    abort_lock_ = new Mutex("abort lock", current_lock_level, true);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kThreadSuspendCountLock);
+    DCHECK(thread_suspend_count_lock_ == nullptr);
+    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kUnexpectedSignalLock);
+    DCHECK(unexpected_signal_lock_ == nullptr);
+    unexpected_signal_lock_ = new Mutex("unexpected signal lock", current_lock_level, true);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kLoggingLock);
+    DCHECK(logging_lock_ == nullptr);
+    logging_lock_ = new Mutex("logging lock", current_lock_level, true);
+
+    #undef UPDATE_CURRENT_LOCK_LEVEL
   }
 }
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index e13c8d5..522692e 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -74,6 +74,8 @@
   kPinTableLock,
   kLoadLibraryLock,
   kJdwpObjectRegistryLock,
+  kModifyLdtLock,
+  kAllocatedThreadIdsLock,
   kClassLinkerClassesLock,
   kBreakpointLock,
   kMonitorLock,
@@ -532,28 +534,34 @@
   // Guards shutdown of the runtime.
   static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
 
+  // Guards background profiler global state.
+  static Mutex* profiler_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+
+  // Guards trace (ie traceview) requests.
+  static Mutex* trace_lock_ ACQUIRED_AFTER(profiler_lock_);
+
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(trace_lock_);
 
   // Guards breakpoints.
   static Mutex* breakpoint_lock_ ACQUIRED_AFTER(thread_list_lock_);
 
-  // Guards trace requests.
-  static Mutex* trace_lock_ ACQUIRED_AFTER(breakpoint_lock_);
-
-  // Guards profile objects.
-  static Mutex* profiler_lock_ ACQUIRED_AFTER(trace_lock_);
-
   // Guards lists of classes within the class linker.
-  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(profiler_lock_);
+  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(breakpoint_lock_);
 
   // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
   // doesn't try to hold a higher level Mutex.
   #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::classlinker_classes_lock_)
 
+  // Guard the allocation/deallocation of thread ids.
+  static Mutex* allocated_thread_ids_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+
+  // Guards modification of the LDT on x86.
+  static Mutex* modify_ldt_lock_ ACQUIRED_AFTER(allocated_thread_ids_lock_);
+
   // Guards intern table.
-  static Mutex* intern_table_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+  static Mutex* intern_table_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
 
   // Have an exclusive aborting thread.
   static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 7136c67..8c8a355 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -4039,7 +4039,11 @@
   // Send a series of heap segment chunks.
   HeapChunkContext context((what == HPSG_WHAT_MERGED_OBJECTS), native);
   if (native) {
+#ifdef USE_DLMALLOC
     dlmalloc_inspect_all(HeapChunkContext::HeapChunkCallback, &context);
+#else
+    UNIMPLEMENTED(WARNING) << "Native heap inspection is only supported with dlmalloc";
+#endif
   } else {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ea1ccdd..fdc4367 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -893,10 +893,16 @@
   uint64_t gc_heap_end_ns = NanoTime();
   // We never move things in the native heap, so we can finish the GC at this point.
   FinishGC(self, collector::kGcTypeNone);
+  size_t native_reclaimed = 0;
+#if defined(USE_DLMALLOC)
   // Trim the native heap.
   dlmalloc_trim(0);
-  size_t native_reclaimed = 0;
   dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
+#elif defined(USE_JEMALLOC)
+  // Jemalloc does it's own internal trimming.
+#else
+  UNIMPLEMENTED(WARNING) << "Add trimming support";
+#endif
   uint64_t end_ns = NanoTime();
   VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
       << ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index f783edb..c53520d 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -111,7 +111,7 @@
   MutexLock mu(self, monitor_lock_);  // Uncontended mutex acquisition as monitor isn't yet public.
   CHECK(owner_ == nullptr || owner_ == self || owner_->IsSuspended());
   // Propagate the lock state.
-  LockWord lw(obj_->GetLockWord(false));
+  LockWord lw(GetObject()->GetLockWord(false));
   switch (lw.GetState()) {
     case LockWord::kThinLocked: {
       CHECK_EQ(owner_->GetThreadId(), lw.ThinLockOwner());
@@ -137,7 +137,7 @@
   }
   LockWord fat(this);
   // Publish the updated lock word, which may race with other threads.
-  bool success = obj_->CasLockWord(lw, fat);
+  bool success = GetObject()->CasLockWord(lw, fat);
   // Lock profiling.
   if (success && owner_ != nullptr && lock_profiling_threshold_ != 0) {
     locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_);
@@ -226,9 +226,9 @@
     // Do this before releasing the lock so that we don't get deflated.
     ++num_waiters_;
     monitor_lock_.Unlock(self);  // Let go of locks in order.
+    self->SetMonitorEnterObject(GetObject());
     {
       ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
-      self->SetMonitorEnterObject(obj_);
       MutexLock mu2(self, monitor_lock_);  // Reacquire monitor_lock_ without mutator_lock_ for Wait.
       if (owner_ != NULL) {  // Did the owner_ give the lock up?
         monitor_contenders_.Wait(self);  // Still contended so wait.
@@ -249,8 +249,8 @@
           }
         }
       }
-      self->SetMonitorEnterObject(nullptr);
     }
+    self->SetMonitorEnterObject(nullptr);
     monitor_lock_.Lock(self);  // Reacquire locks in order.
     --num_waiters_;
   }
@@ -363,7 +363,7 @@
     // We don't own this, so we're not allowed to unlock it.
     // The JNI spec says that we should throw IllegalMonitorStateException
     // in this case.
-    FailedUnlock(obj_, self, owner, this);
+    FailedUnlock(GetObject(), self, owner, this);
     return false;
   }
   return true;
@@ -895,7 +895,7 @@
     MutexLock mu(self, *thread->GetWaitMutex());
     Monitor* monitor = thread->GetWaitMonitor();
     if (monitor != nullptr) {
-      pretty_object = monitor->obj_;
+      pretty_object = monitor->GetObject();
     }
   } else if (state == kBlocked) {
     wait_message = "  - waiting to lock ";
@@ -1101,12 +1101,13 @@
   MutexLock mu(Thread::Current(), monitor_list_lock_);
   for (auto it = list_.begin(); it != list_.end(); ) {
     Monitor* m = *it;
-    mirror::Object* obj = m->GetObject();
+    // Disable the read barrier in GetObject() as this is called by GC.
+    mirror::Object* obj = m->GetObject<kWithoutReadBarrier>();
     // The object of a monitor can be null if we have deflated it.
     mirror::Object* new_obj = obj != nullptr ? callback(obj, arg) : nullptr;
     if (new_obj == nullptr) {
       VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
-                    << m->GetObject();
+                    << obj;
       delete m;
       it = list_.erase(it);
     } else {
diff --git a/runtime/monitor.h b/runtime/monitor.h
index bc1b2ed4..7af2d4c 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -27,6 +27,7 @@
 #include "atomic.h"
 #include "base/mutex.h"
 #include "object_callbacks.h"
+#include "read_barrier.h"
 #include "thread_state.h"
 
 namespace art {
@@ -92,8 +93,9 @@
 
   static bool IsValidLockWord(LockWord lock_word);
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Object* GetObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return obj_;
+    return ReadBarrier::BarrierForWeakRoot<mirror::Object, kReadBarrierOption>(obj_);
   }
 
   void SetObject(mirror::Object* object);
@@ -190,7 +192,9 @@
   // Owner's recursive lock depth.
   int lock_count_ GUARDED_BY(monitor_lock_);
 
-  // What object are we part of.
+  // What object are we part of. This is a weak root. Do not access
+  // this directly, use GetObject() to read it so it will be guarded
+  // by a read barrier.
   mirror::Object* obj_;
 
   // Threads currently waiting on this monitor.
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 69b05f4..d9c9b59 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -155,6 +155,21 @@
   return env->NewStringUTF(kIsDebugBuild ? "libartd.so" : "libart.so");
 }
 
+static jstring VMRuntime_vmInstructionSet(JNIEnv* env, jobject) {
+  InstructionSet isa = Runtime::Current()->GetInstructionSet();
+  const char* isa_string = GetInstructionSetString(isa);
+  return env->NewStringUTF(isa_string);
+}
+
+static jboolean VMRuntime_is64Bit(JNIEnv* env, jobject) {
+  bool is64BitMode = (sizeof(void*) == sizeof(uint64_t));
+  return is64BitMode ? JNI_TRUE : JNI_FALSE;
+}
+
+static jboolean VMRuntime_isCheckJniEnabled(JNIEnv* env, jobject) {
+  return Runtime::Current()->GetJavaVM()->check_jni ? JNI_TRUE : JNI_FALSE;
+}
+
 static void VMRuntime_setTargetSdkVersionNative(JNIEnv* env, jobject, jint targetSdkVersion) {
   // This is the target SDK version of the app we're about to run. It is intended that this a place
   // where workarounds can be enabled.
@@ -529,6 +544,9 @@
   NATIVE_METHOD(VMRuntime, trimHeap, "()V"),
   NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"),
+  NATIVE_METHOD(VMRuntime, vmInstructionSet, "()Ljava/lang/String;"),
+  NATIVE_METHOD(VMRuntime, is64Bit, "!()Z"),
+  NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "!()Z"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
   NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V"),
 };
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index 5d90f1a..e17e60a 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -52,9 +52,15 @@
     jobject internal_trace = self->CreateInternalStackTrace<false>(soa);
     trace = Thread::InternalStackTraceToStackTraceElementArray(soa, internal_trace);
   } else {
-    // Suspend thread to build stack trace.
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
     bool timed_out;
+
+    // Check for valid thread
+    if (thin_lock_id == ThreadList::kInvalidThreadId) {
+      return nullptr;
+    }
+
+    // Suspend thread to build stack trace.
     Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out);
     if (thread != nullptr) {
       {
diff --git a/runtime/oat.cc b/runtime/oat.cc
index cb9334a..10d335e 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '2', '8', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '2', '9', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 88e2f8f..4302c9e 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -43,6 +43,21 @@
   }
 }
 
+template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
+inline MirrorType* ReadBarrier::BarrierForWeakRoot(MirrorType* ref) {
+  UNUSED(ref);
+  const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
+  if (with_read_barrier && kUseBakerReadBarrier) {
+    // To be implemented.
+    return ref;
+  } else if (with_read_barrier && kUseBrooksReadBarrier) {
+    // To be implemented.
+    return ref;
+  } else {
+    return ref;
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_READ_BARRIER_INL_H_
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 73c3d43..e40e8ea 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -37,6 +37,10 @@
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  ALWAYS_INLINE static MirrorType* BarrierForWeakRoot(MirrorType* ref)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 };
 
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 62fa323..9a7cb48 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -396,11 +396,11 @@
   // Convert a jobject into a Object*
   mirror::Object* DecodeJObject(jobject obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Object* GetMonitorEnterObject() const {
+  mirror::Object* GetMonitorEnterObject() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return tlsPtr_.monitor_enter_object;
   }
 
-  void SetMonitorEnterObject(mirror::Object* obj) {
+  void SetMonitorEnterObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     tlsPtr_.monitor_enter_object = obj;
   }
 
@@ -1045,9 +1045,6 @@
     // A cached pthread_t for the pthread underlying this Thread*.
     pthread_t pthread_self;
 
-    // Support for Mutex lock hierarchy bug detection.
-    BaseMutex* held_mutexes[kLockLevelCount];
-
     // If no_thread_suspension_ is > 0, what is causing that assertion.
     const char* last_no_thread_suspension_cause;
 
@@ -1074,6 +1071,9 @@
     // Thread-local allocation stack data/routines.
     mirror::Object** thread_local_alloc_stack_top;
     mirror::Object** thread_local_alloc_stack_end;
+
+    // Support for Mutex lock hierarchy bug detection.
+    BaseMutex* held_mutexes[kLockLevelCount];
   } tlsPtr_;
 
   // Guards the 'interrupted_' and 'wait_monitor_' members.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 8046500..388c9b4 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -40,8 +40,7 @@
 namespace art {
 
 ThreadList::ThreadList()
-    : allocated_ids_lock_("allocated thread ids lock"),
-      suspend_all_count_(0), debug_suspend_all_count_(0),
+    : suspend_all_count_(0), debug_suspend_all_count_(0),
       thread_exit_cond_("thread exit condition variable", *Locks::thread_list_lock_) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1)));
 }
@@ -849,7 +848,7 @@
 }
 
 uint32_t ThreadList::AllocThreadId(Thread* self) {
-  MutexLock mu(self, allocated_ids_lock_);
+  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
   for (size_t i = 0; i < allocated_ids_.size(); ++i) {
     if (!allocated_ids_[i]) {
       allocated_ids_.set(i);
@@ -861,7 +860,7 @@
 }
 
 void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) {
-  MutexLock mu(self, allocated_ids_lock_);
+  MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
   --id;  // Zero is reserved to mean "invalid".
   DCHECK(allocated_ids_[id]) << id;
   allocated_ids_.reset(id);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index a574340..d46987a 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -132,7 +132,7 @@
 
  private:
   uint32_t AllocThreadId(Thread* self);
-  void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_);
+  void ReleaseThreadId(Thread* self, uint32_t id) LOCKS_EXCLUDED(Locks::allocated_thread_ids_lock_);
 
   bool Contains(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
   bool Contains(pid_t tid) EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
@@ -151,8 +151,7 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
 
-  mutable Mutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(allocated_ids_lock_);
+  std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(Locks::allocated_thread_ids_lock_);
 
   // The actual list of all threads.
   std::list<Thread*> list_ GUARDED_BY(Locks::thread_list_lock_);