Merge "ART: Instanceof returns true if ref == 0 on x86-64"
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index 06e259a..920cde2 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -54,9 +54,9 @@
 /*
  * BasicBlock Optimization pass implementation start.
  */
-void BBOptimizations::Start(const PassDataHolder* data) const {
+void BBOptimizations::Start(PassDataHolder* data) const {
   DCHECK(data != nullptr);
-  CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
   DCHECK(c_unit != nullptr);
   /*
    * This pass has a different ordering depEnding on the suppress exception,
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 0094790..2b097b5 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -31,9 +31,9 @@
   CacheFieldLoweringInfo() : PassME("CacheFieldLoweringInfo", kNoNodes) {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheFieldLoweringInfo();
   }
@@ -55,9 +55,9 @@
   CacheMethodLoweringInfo() : PassME("CacheMethodLoweringInfo", kNoNodes) {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->DoCacheMethodLoweringInfo();
   }
@@ -86,9 +86,9 @@
     return cUnit->mir_graph->InlineCallsGate();
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsStart();
   }
@@ -105,9 +105,9 @@
     return false;
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->InlineCallsEnd();
   }
@@ -122,9 +122,9 @@
   CodeLayout() : PassME("CodeLayout", kAllNodes, kOptimizationBasicBlockChange, "2_post_layout_cfg") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->VerifyDataflow();
   }
@@ -142,9 +142,9 @@
     : PassME("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesStart();
   }
@@ -159,9 +159,9 @@
     return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb);
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateNullChecksAndInferTypesEnd();
   }
@@ -189,9 +189,9 @@
     return cUnit->mir_graph->EliminateClassInitChecks(bb);
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* cUnit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(cUnit != nullptr);
     cUnit->mir_graph->EliminateClassInitChecksEnd();
   }
@@ -232,7 +232,7 @@
     return ((cUnit->disable_opt & (1 << kBBOpt)) == 0);
   }
 
-  void Start(const PassDataHolder* data) const;
+  void Start(PassDataHolder* data) const;
 };
 
 }  // namespace art
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 55a4c78..de9ac4b 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -50,6 +50,8 @@
   kArg3,
   kArg4,
   kArg5,
+  kArg6,
+  kArg7,
   kFArg0,
   kFArg1,
   kFArg2,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 89b1a75..b8d190a 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -165,7 +165,7 @@
     Instruction::CONST_STRING,
     Instruction::MONITOR_ENTER,
     Instruction::MONITOR_EXIT,
-    Instruction::THROW,
+    // Instruction::THROW,
     Instruction::GOTO,
     Instruction::GOTO_16,
     Instruction::GOTO_32,
@@ -295,12 +295,12 @@
     Instruction::SHL_LONG,
     Instruction::SHR_LONG,
     Instruction::USHR_LONG,
-    Instruction::REM_FLOAT,
+    // Instruction::REM_FLOAT,
     Instruction::ADD_DOUBLE,
     Instruction::SUB_DOUBLE,
     Instruction::MUL_DOUBLE,
     Instruction::DIV_DOUBLE,
-    Instruction::REM_DOUBLE,
+    // Instruction::REM_DOUBLE,
     Instruction::ADD_LONG_2ADDR,
     Instruction::SUB_LONG_2ADDR,
     Instruction::MUL_LONG_2ADDR,
@@ -312,12 +312,12 @@
     Instruction::SHL_LONG_2ADDR,
     Instruction::SHR_LONG_2ADDR,
     Instruction::USHR_LONG_2ADDR,
-    Instruction::REM_FLOAT_2ADDR,
+    // Instruction::REM_FLOAT_2ADDR,
     Instruction::ADD_DOUBLE_2ADDR,
     Instruction::SUB_DOUBLE_2ADDR,
     Instruction::MUL_DOUBLE_2ADDR,
     Instruction::DIV_DOUBLE_2ADDR,
-    Instruction::REM_DOUBLE_2ADDR,
+    // Instruction::REM_DOUBLE_2ADDR,
     // TODO(Arm64): Enable compiler pass
     // ----- ExtendedMIROpcode -----
     kMirOpPhi,
@@ -336,54 +336,54 @@
     kMirOpSelect,
 
 #if ARM64_USE_EXPERIMENTAL_OPCODES
-    // Instruction::MOVE_RESULT,
-    // Instruction::MOVE_RESULT_WIDE,
-    // Instruction::MOVE_RESULT_OBJECT,
-    // Instruction::CONST_STRING_JUMBO,
-    // Instruction::CONST_CLASS,
-    // Instruction::CHECK_CAST,
-    // Instruction::INSTANCE_OF,
-    // Instruction::ARRAY_LENGTH,
-    // Instruction::NEW_INSTANCE,
-    // Instruction::NEW_ARRAY,
-    // Instruction::FILLED_NEW_ARRAY,
-    // Instruction::FILLED_NEW_ARRAY_RANGE,
-    // Instruction::FILL_ARRAY_DATA,
+    Instruction::MOVE_RESULT,
+    Instruction::MOVE_RESULT_WIDE,
+    Instruction::MOVE_RESULT_OBJECT,
+    Instruction::CONST_STRING_JUMBO,
+    Instruction::CONST_CLASS,
+    Instruction::CHECK_CAST,
+    Instruction::INSTANCE_OF,
+    Instruction::ARRAY_LENGTH,
+    Instruction::NEW_INSTANCE,
+    Instruction::NEW_ARRAY,
+    Instruction::FILLED_NEW_ARRAY,
+    Instruction::FILLED_NEW_ARRAY_RANGE,
+    Instruction::FILL_ARRAY_DATA,
     // Instruction::UNUSED_3E,
     // Instruction::UNUSED_3F,
     // Instruction::UNUSED_40,
     // Instruction::UNUSED_41,
     // Instruction::UNUSED_42,
     // Instruction::UNUSED_43,
-    // Instruction::AGET,
-    // Instruction::AGET_WIDE,
-    // Instruction::AGET_OBJECT,
-    // Instruction::AGET_BOOLEAN,
-    // Instruction::AGET_BYTE,
-    // Instruction::AGET_CHAR,
-    // Instruction::AGET_SHORT,
-    // Instruction::APUT,
-    // Instruction::APUT_WIDE,
-    // Instruction::APUT_OBJECT,
-    // Instruction::APUT_BOOLEAN,
-    // Instruction::APUT_BYTE,
-    // Instruction::APUT_CHAR,
-    // Instruction::APUT_SHORT,
-    // Instruction::IPUT_WIDE,
-    // Instruction::IGET_WIDE,
-    // Instruction::SGET_WIDE,
-    // Instruction::SPUT_WIDE,
+    Instruction::AGET,
+    Instruction::AGET_WIDE,
+    Instruction::AGET_OBJECT,
+    Instruction::AGET_BOOLEAN,
+    Instruction::AGET_BYTE,
+    Instruction::AGET_CHAR,
+    Instruction::AGET_SHORT,
+    Instruction::APUT,
+    Instruction::APUT_WIDE,
+    Instruction::APUT_OBJECT,
+    Instruction::APUT_BOOLEAN,
+    Instruction::APUT_BYTE,
+    Instruction::APUT_CHAR,
+    Instruction::APUT_SHORT,
+    Instruction::IPUT_WIDE,
+    Instruction::IGET_WIDE,
+    Instruction::SGET_WIDE,
+    Instruction::SPUT_WIDE,
     Instruction::INVOKE_VIRTUAL,
     Instruction::INVOKE_SUPER,
     Instruction::INVOKE_DIRECT,
     Instruction::INVOKE_STATIC,
     Instruction::INVOKE_INTERFACE,
-    // Instruction::RETURN_VOID_BARRIER,
-    // Instruction::INVOKE_VIRTUAL_RANGE,
-    // Instruction::INVOKE_SUPER_RANGE,
-    // Instruction::INVOKE_DIRECT_RANGE,
-    // Instruction::INVOKE_STATIC_RANGE,
-    // Instruction::INVOKE_INTERFACE_RANGE,
+    Instruction::RETURN_VOID_BARRIER,
+    Instruction::INVOKE_VIRTUAL_RANGE,
+    Instruction::INVOKE_SUPER_RANGE,
+    Instruction::INVOKE_DIRECT_RANGE,
+    Instruction::INVOKE_STATIC_RANGE,
+    Instruction::INVOKE_INTERFACE_RANGE,
     // Instruction::UNUSED_79,
     // Instruction::UNUSED_7A,
     // Instruction::IGET_QUICK,
@@ -714,19 +714,9 @@
 static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
   uint32_t shorty_size = strlen(shorty);
   CHECK_GE(shorty_size, 1u);
-  // Set a limitation on maximum number of parameters.
-  // Note : there is an implied "method*" parameter, and probably "this" as well.
-  // 1 is for the return type. Currently, we only accept 2 parameters at the most.
-  // (x86_64): For now we have the same limitation. But we might want to split this
-  //           check in future into two separate cases for arm64 and x86_64.
-  if ((shorty_size > (1 + 2)) && (instruction_set != kX86_64)) {
-    return false;
-  }
 
-  const char* supported_types = arm64_supported_types;
-  if (instruction_set == kX86_64) {
-    supported_types = x86_64_supported_types;
-  }
+  const char* supported_types =
+      (instruction_set == kX86_64) ? x86_64_supported_types : arm64_supported_types;
   for (uint32_t i = 0; i < shorty_size; i++) {
     if (strchr(supported_types, shorty[i]) == nullptr) {
       return false;
@@ -791,9 +781,6 @@
         }
       }
     }
-
-    LOG(INFO) << "Using experimental instruction set A64 for "
-              << PrettyMethod(method_idx, dex_file);
   }
   return true;
 }
@@ -890,7 +877,6 @@
   } else if (cu.instruction_set == kArm64) {
     // TODO(Arm64): enable optimizations once backend is mature enough.
     cu.disable_opt = ~(uint32_t)0;
-    cu.enable_debug |= (1 << kDebugCodegenDump);
   }
 
   cu.StartTimingSplit("BuildMIRGraph");
@@ -928,7 +914,8 @@
 
   cu.NewTimingSplit("MIROpt:CheckFilters");
   if (cu.mir_graph->SkipCompilation()) {
-    return NULL;
+    VLOG(compiler) << "Skipping method : " << PrettyMethod(method_idx, dex_file);
+    return nullptr;
   }
 
   /* Create the pass driver and launch it */
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index b4906d6..dbb5366 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -64,7 +64,7 @@
   /**
    * @brief Start of the pass: called before the Worker function.
    */
-  virtual void Start(const PassDataHolder* data) const {
+  virtual void Start(PassDataHolder* data) const {
     // Unused parameter.
     UNUSED(data);
   }
@@ -72,7 +72,7 @@
   /**
    * @brief End of the pass: called after the WalkBasicBlocks function.
    */
-  virtual void End(const PassDataHolder* data) const {
+  virtual void End(PassDataHolder* data) const {
     // Unused parameter.
     UNUSED(data);
   }
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
index 9efd5ae..ff69865 100644
--- a/compiler/dex/pass_me.h
+++ b/compiler/dex/pass_me.h
@@ -42,6 +42,7 @@
   public:
     CompilationUnit* c_unit;
     BasicBlock* bb;
+    void* data;
 };
 
 enum DataFlowAnalysisMode {
diff --git a/compiler/dex/post_opt_passes.cc b/compiler/dex/post_opt_passes.cc
index 58700a4..1371652 100644
--- a/compiler/dex/post_opt_passes.cc
+++ b/compiler/dex/post_opt_passes.cc
@@ -74,9 +74,9 @@
   return false;
 }
 
-void CalculatePredecessors::Start(const PassDataHolder* data) const {
+void CalculatePredecessors::Start(PassDataHolder* data) const {
   DCHECK(data != nullptr);
-  CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+  CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
   DCHECK(c_unit != nullptr);
   // First get the MIRGraph here to factorize a bit the code.
   MIRGraph *mir_graph = c_unit->mir_graph.get();
diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h
index f203505..445c46d 100644
--- a/compiler/dex/post_opt_passes.h
+++ b/compiler/dex/post_opt_passes.h
@@ -32,11 +32,11 @@
   InitializeData() : PassME("InitializeData") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     // New blocks may have been inserted so the first thing we do is ensure that
     // the c_unit's number of blocks matches the actual count of basic blocks.
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->InitializeBasicBlockData();
     c_unit->mir_graph.get()->SSATransformationStart();
@@ -78,7 +78,7 @@
   CalculatePredecessors() : PassME("CalculatePredecessors") {
   }
 
-  void Start(const PassDataHolder* data) const;
+  void Start(PassDataHolder* data) const;
 };
 
 /**
@@ -90,9 +90,9 @@
   DFSOrders() : PassME("DFSOrders") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->ComputeDFSOrders();
   }
@@ -107,17 +107,17 @@
   BuildDomination() : PassME("BuildDomination") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->ComputeDominators();
     c_unit->mir_graph.get()->CompilerInitializeSSAConversion();
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     // Verify the dataflow information after the pass.
     if (c_unit->enable_debug & (1 << kDebugVerifyDataflow)) {
@@ -135,9 +135,9 @@
   DefBlockMatrix() : PassME("DefBlockMatrix") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->ComputeDefBlockMatrix();
   }
@@ -152,9 +152,9 @@
   CreatePhiNodes() : PassME("CreatePhiNodes") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->InsertPhiNodes();
   }
@@ -170,9 +170,9 @@
   ClearVisitedFlag() : PassME("ClearVisitedFlag") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->ClearAllVisitedFlags();
   }
@@ -187,9 +187,9 @@
   SSAConversion() : PassME("SSAConversion") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     MIRGraph *mir_graph = c_unit->mir_graph.get();
     mir_graph->DoDFSPreOrderSSARename(mir_graph->GetEntryBlock());
@@ -226,9 +226,9 @@
   PerformInitRegLocations() : PassME("PerformInitRegLocation") {
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph->InitRegLocations();
   }
@@ -254,9 +254,9 @@
     return false;
   }
 
-  void Start(const PassDataHolder* data) const {
+  void Start(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph->InitializeConstantPropagation();
   }
@@ -271,9 +271,9 @@
   FreeData() : PassME("FreeData") {
   }
 
-  void End(const PassDataHolder* data) const {
+  void End(PassDataHolder* data) const {
     DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
     DCHECK(c_unit != nullptr);
     c_unit->mir_graph.get()->SSATransformationEnd();
   }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 95bcfbd..4499862 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -205,8 +205,9 @@
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
                                   ConditionCode ccode);
     LIR* LoadFPConstantValue(int r_dest, int value);
-    LIR* LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement,
-                              RegStorage r_src_dest, RegStorage r_work = RegStorage::InvalidReg());
+    LIR* LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base,
+                                              int displacement, RegStorage r_src_dest,
+                                              RegStorage r_work = RegStorage::InvalidReg());
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 61d3d56..b236f99 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -820,15 +820,17 @@
 }
 
 // Helper function for LoadBaseDispBody()/StoreBaseDispBody().
-LIR* ArmMir2Lir::LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement,
-                                      RegStorage r_src_dest, RegStorage r_work) {
+LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base,
+                                                      int displacement, RegStorage r_src_dest,
+                                                      RegStorage r_work) {
   DCHECK_EQ(displacement & 3, 0);
-  int encoded_disp = (displacement & 1020) >> 2;  // Within range of the instruction.
+  constexpr int kOffsetMask = 0xff << 2;
+  int encoded_disp = (displacement & kOffsetMask) >> 2;  // Within range of the instruction.
   RegStorage r_ptr = r_base;
-  if ((displacement & ~1020) != 0) {
+  if ((displacement & ~kOffsetMask) != 0) {
     r_ptr = r_work.Valid() ? r_work : AllocTemp();
-    // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
-    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
+    // Add displacement & ~kOffsetMask to base, it's a single instruction for up to +-256KiB.
+    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~kOffsetMask);
   }
   LIR* lir = nullptr;
   if (!r_src_dest.IsPair()) {
@@ -837,7 +839,7 @@
     lir = NewLIR4(opcode, r_src_dest.GetLowReg(), r_src_dest.GetHighReg(), r_ptr.GetReg(),
                   encoded_disp);
   }
-  if ((displacement & ~1020) != 0 && !r_work.Valid()) {
+  if ((displacement & ~kOffsetMask) != 0 && !r_work.Valid()) {
     FreeTemp(r_ptr);
   }
   return lir;
@@ -863,11 +865,12 @@
     case k64:
       if (r_dest.IsFloat()) {
         DCHECK(!r_dest.IsPair());
-        load = LoadStoreMaxDisp1020(kThumb2Vldrd, r_base, displacement, r_dest);
+        load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrd, r_base, displacement, r_dest);
       } else {
         DCHECK(r_dest.IsPair());
         // Use the r_dest.GetLow() for the temporary pointer if needed.
-        load = LoadStoreMaxDisp1020(kThumb2LdrdI8, r_base, displacement, r_dest, r_dest.GetLow());
+        load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2LdrdI8, r_base, displacement, r_dest,
+                                                    r_dest.GetLow());
       }
       already_generated = true;
       break;
@@ -878,7 +881,7 @@
     case kReference:
       if (r_dest.IsFloat()) {
         DCHECK(r_dest.IsSingle());
-        load = LoadStoreMaxDisp1020(kThumb2Vldrs, r_base, displacement, r_dest);
+        load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrs, r_base, displacement, r_dest);
         already_generated = true;
         break;
       }
@@ -1001,10 +1004,10 @@
     case k64:
       if (r_src.IsFloat()) {
         DCHECK(!r_src.IsPair());
-        store = LoadStoreMaxDisp1020(kThumb2Vstrd, r_base, displacement, r_src);
+        store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrd, r_base, displacement, r_src);
       } else {
         DCHECK(r_src.IsPair());
-        store = LoadStoreMaxDisp1020(kThumb2StrdI8, r_base, displacement, r_src);
+        store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2StrdI8, r_base, displacement, r_src);
       }
       already_generated = true;
       break;
@@ -1015,7 +1018,7 @@
     case kReference:
       if (r_src.IsFloat()) {
         DCHECK(r_src.IsSingle());
-        store = LoadStoreMaxDisp1020(kThumb2Vstrs, r_base, displacement, r_src);
+        store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrs, r_base, displacement, r_src);
         already_generated = true;
         break;
       }
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 9362147..c5bd005 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -688,9 +688,10 @@
 
               // Fail, if `expected' contains an unsatisfied requirement.
               if (expected != nullptr) {
-                // TODO(Arm64): make this FATAL.
-                LOG(WARNING) << "Bad argument n. " << i << " of " << encoder->name
-                             << ". Expected " << expected << ", got 0x" << std::hex << operand;
+                LOG(WARNING) << "Method: " << PrettyMethod(cu_->method_idx, *cu_->dex_file)
+                             << " @ 0x" << std::hex << lir->dalvik_offset;
+                LOG(FATAL) << "Bad argument n. " << i << " of " << encoder->name
+                           << ". Expected " << expected << ", got 0x" << std::hex << operand;
               }
             }
 
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 59eec3d..f1748ef 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -142,7 +142,7 @@
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
   // TODO(Arm64): generate "ldr w3, [x1,w2,sxtw #2]" rather than "ldr w3, [x1,x2,lsl #2]"?
-  LoadBaseIndexed(table_base, key_reg, As64BitReg(disp_reg), 2, k32);
+  LoadBaseIndexed(table_base, As64BitReg(key_reg), As64BitReg(disp_reg), 2, k32);
 
   // Get base branch address.
   RegStorage branch_reg = AllocTempWide();
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 9a80c69..a79c4fa 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -20,9 +20,45 @@
 #include "arm64_lir.h"
 #include "dex/compiler_internals.h"
 
+#include <map>
+
 namespace art {
 
 class Arm64Mir2Lir : public Mir2Lir {
+ protected:
+  // TODO: consolidate 64-bit target support.
+  class InToRegStorageMapper {
+   public:
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+    virtual ~InToRegStorageMapper() {}
+  };
+
+  class InToRegStorageArm64Mapper : public InToRegStorageMapper {
+   public:
+    InToRegStorageArm64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
+    virtual ~InToRegStorageArm64Mapper() {}
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+   private:
+    int cur_core_reg_;
+    int cur_fp_reg_;
+  };
+
+  class InToRegStorageMapping {
+   public:
+    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
+    initialized_(false) {}
+    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
+    int GetMaxMappedIn() { return max_mapped_in_; }
+    bool IsThereStackMapped() { return is_there_stack_mapped_; }
+    RegStorage Get(int in_position);
+    bool IsInitialized() { return initialized_; }
+   private:
+    std::map<int, RegStorage> mapping_;
+    int max_mapped_in_;
+    bool is_there_stack_mapped_;
+    bool initialized_;
+  };
+
   public:
     Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -219,12 +255,21 @@
     bool InexpensiveConstantDouble(int64_t value);
 
     void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
-    int LoadArgRegs(CallInfo* info, int call_state,
-                    NextCallInsn next_call_insn,
-                    const MethodReference& target_method,
-                    uint32_t vtable_idx,
-                    uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                    bool skip_this);
+
+    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                             NextCallInsn next_call_insn,
+                             const MethodReference& target_method,
+                             uint32_t vtable_idx,
+                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                             bool skip_this);
+
+    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                           NextCallInsn next_call_insn,
+                           const MethodReference& target_method,
+                           uint32_t vtable_idx,
+                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                           bool skip_this);
+    InToRegStorageMapping in_to_reg_storage_mapping_;
 
   private:
     /**
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 8112c2e..2c6b11d 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -551,8 +551,11 @@
 
 // Decrement register and branch on condition
 LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
-  // Combine sub & test using sub setflags encoding here
-  OpRegRegImm(kOpSub, reg, reg, 1);  // For value == 1, this should set flags.
+  // Combine sub & test using sub setflags encoding here.  We need to make sure a
+  // subtract form that sets carry is used, so generate explicitly.
+  // TODO: might be best to add a new op, kOpSubs, and handle it generically.
+  ArmOpcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
+  NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1);  // For value == 1, this should set flags.
   DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
   return OpCondBranch(c_code, target);
 }
@@ -676,9 +679,6 @@
  */
 void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale) {
-  // TODO(Arm64): check this.
-  UNIMPLEMENTED(WARNING);
-
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -720,7 +720,8 @@
     } else {
       // No special indexed operation, lea + load w/ displacement
       reg_ptr = AllocTempRef();
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
+                       EncodeShift(kA64Lsl, scale));
       FreeTemp(rl_index.reg);
     }
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -754,7 +755,7 @@
       GenArrayBoundsCheck(rl_index.reg, reg_len);
       FreeTemp(reg_len);
     }
-    LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
+    LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size);
     MarkPossibleNullPointerException(opt_flags);
     FreeTemp(reg_ptr);
     StoreValue(rl_dest, rl_result);
@@ -767,9 +768,6 @@
  */
 void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
-  // TODO(Arm64): check this.
-  UNIMPLEMENTED(WARNING);
-
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   bool constant_index = rl_index.is_const;
@@ -825,7 +823,8 @@
       rl_src = LoadValue(rl_src, reg_class);
     }
     if (!constant_index) {
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
+      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
+                       EncodeShift(kA64Lsl, scale));
     }
     if (needs_range_check) {
       if (constant_index) {
@@ -846,7 +845,7 @@
       GenArrayBoundsCheck(rl_index.reg, reg_len);
       FreeTemp(reg_len);
     }
-    StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
+    StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size);
     MarkPossibleNullPointerException(opt_flags);
   }
   if (allocated_reg_ptr_temp) {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index e2846ae..fba368a 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -105,7 +105,6 @@
 
 // Return a target-dependent special register.
 RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
-  // TODO(Arm64): this function doesn't work for hard-float ABI.
   RegStorage res_reg = RegStorage::InvalidReg();
   switch (reg) {
     case kSelf: res_reg = rs_rA64_SELF; break;
@@ -117,12 +116,20 @@
     case kArg1: res_reg = rs_x1; break;
     case kArg2: res_reg = rs_x2; break;
     case kArg3: res_reg = rs_x3; break;
+    case kArg4: res_reg = rs_x4; break;
+    case kArg5: res_reg = rs_x5; break;
+    case kArg6: res_reg = rs_x6; break;
+    case kArg7: res_reg = rs_x7; break;
     case kFArg0: res_reg = rs_f0; break;
     case kFArg1: res_reg = rs_f1; break;
     case kFArg2: res_reg = rs_f2; break;
     case kFArg3: res_reg = rs_f3; break;
+    case kFArg4: res_reg = rs_f4; break;
+    case kFArg5: res_reg = rs_f5; break;
+    case kFArg6: res_reg = rs_f6; break;
+    case kFArg7: res_reg = rs_f7; break;
     case kRet0: res_reg = rs_x0; break;
-    case kRet1: res_reg = rs_x0; break;
+    case kRet1: res_reg = rs_x1; break;
     case kInvokeTgt: res_reg = rs_rA64_LR; break;
     case kHiddenArg: res_reg = rs_x12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
@@ -132,10 +139,6 @@
   return res_reg;
 }
 
-RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  return RegStorage::InvalidReg();
-}
-
 /*
  * Decode the register id. This routine makes assumptions on the encoding made by RegStorage.
  */
@@ -738,18 +741,44 @@
 
 /* To be used when explicitly managing register use */
 void Arm64Mir2Lir::LockCallTemps() {
+  // TODO: needs cleanup.
   LockTemp(rs_x0);
   LockTemp(rs_x1);
   LockTemp(rs_x2);
   LockTemp(rs_x3);
+  LockTemp(rs_x4);
+  LockTemp(rs_x5);
+  LockTemp(rs_x6);
+  LockTemp(rs_x7);
+  LockTemp(rs_f0);
+  LockTemp(rs_f1);
+  LockTemp(rs_f2);
+  LockTemp(rs_f3);
+  LockTemp(rs_f4);
+  LockTemp(rs_f5);
+  LockTemp(rs_f6);
+  LockTemp(rs_f7);
 }
 
 /* To be used when explicitly managing register use */
 void Arm64Mir2Lir::FreeCallTemps() {
+  // TODO: needs cleanup.
   FreeTemp(rs_x0);
   FreeTemp(rs_x1);
   FreeTemp(rs_x2);
   FreeTemp(rs_x3);
+  FreeTemp(rs_x4);
+  FreeTemp(rs_x5);
+  FreeTemp(rs_x6);
+  FreeTemp(rs_x7);
+  FreeTemp(rs_f0);
+  FreeTemp(rs_f1);
+  FreeTemp(rs_f2);
+  FreeTemp(rs_f3);
+  FreeTemp(rs_f4);
+  FreeTemp(rs_f5);
+  FreeTemp(rs_f6);
+  FreeTemp(rs_f7);
 }
 
 RegStorage Arm64Mir2Lir::LoadHelper(ThreadOffset<4> offset) {
@@ -786,6 +815,69 @@
   return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
 }
 
+RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or_float,
+                                                               bool is_wide) {
+  const RegStorage coreArgMappingToPhysicalReg[] =
+      {rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7};
+  const int coreArgMappingToPhysicalRegSize =
+      sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const RegStorage fpArgMappingToPhysicalReg[] =
+      {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7};
+  const int fpArgMappingToPhysicalRegSize =
+      sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+
+  RegStorage result = RegStorage::InvalidReg();
+  if (is_double_or_float) {
+    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+      result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
+      if (result.Valid()) {
+        // TODO: switching between widths remains a bit ugly.  Better way?
+        int res_reg = result.GetReg();
+        result = is_wide ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg);
+      }
+    }
+  } else {
+    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = coreArgMappingToPhysicalReg[cur_core_reg_++];
+      if (result.Valid()) {
+        // TODO: switching between widths remains a bit ugly.  Better way?
+        int res_reg = result.GetReg();
+        result = is_wide ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
+      }
+    }
+  }
+  return result;
+}
+
+RegStorage Arm64Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+  DCHECK(IsInitialized());
+  auto res = mapping_.find(in_position);
+  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
+void Arm64Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
+                                                     InToRegStorageMapper* mapper) {
+  DCHECK(mapper != nullptr);
+  max_mapped_in_ = -1;
+  is_there_stack_mapped_ = false;
+  for (int in_position = 0; in_position < count; in_position++) {
+     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+     if (reg.Valid()) {
+       mapping_[in_position] = reg;
+       max_mapped_in_ = std::max(max_mapped_in_, in_position);
+       if (reg.Is64BitSolo()) {
+         // We covered 2 args, so skip the next one
+         in_position++;
+       }
+     } else {
+       is_there_stack_mapped_ = true;
+     }
+  }
+  initialized_ = true;
+}
+
+
+// Deprecate.  Use the new mechanism.
 // TODO(Arm64): reuse info in QuickArgumentVisitor?
 static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
                                     OpSize* op_size) {
@@ -805,7 +897,7 @@
     }
   } else {
     int n = *num_gpr_used;
-    if (n < 7) {
+    if (n < 8) {
       *num_gpr_used = n + 1;
       if (loc->wide) {
         *op_size = k64;
@@ -820,6 +912,18 @@
   return RegStorage::InvalidReg();
 }
 
+RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  if (!in_to_reg_storage_mapping_.IsInitialized()) {
+    int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
+
+    InToRegStorageArm64Mapper mapper;
+    in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper);
+  }
+  return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
+
 /*
  * If there are any ins passed in registers that have not been promoted
  * to a callee-save register, flush them to the frame.  Perform initial
@@ -888,33 +992,188 @@
   }
 }
 
-int Arm64Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
-                              NextCallInsn next_call_insn,
-                              const MethodReference& target_method,
-                              uint32_t vtable_idx, uintptr_t direct_code,
-                              uintptr_t direct_method, InvokeType type, bool skip_this) {
-  int last_arg_reg = TargetReg(kArg3).GetReg();
-  int next_reg = TargetReg(kArg1).GetReg();
-  int next_arg = 0;
-  if (skip_this) {
-    next_reg++;
-    next_arg++;
-  }
-  for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
-    RegLocation rl_arg = info->args[next_arg++];
-    rl_arg = UpdateRawLoc(rl_arg);
-    if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) {
-      LoadValueDirectWideFixed(rl_arg, RegStorage::Solo64(next_reg));
-      next_arg++;
-    } else {
-      if (rl_arg.wide) {
-        rl_arg = NarrowRegLoc(rl_arg);
-        rl_arg.is_const = false;
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer.
+ */
+int Arm64Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
+                                       int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
+                                       const MethodReference& target_method,
+                                       uint32_t vtable_idx, uintptr_t direct_code,
+                                       uintptr_t direct_method, InvokeType type, bool skip_this) {
+  return GenDalvikArgsRange(info,
+                       call_state, pcrLabel, next_call_insn,
+                       target_method,
+                       vtable_idx, direct_code,
+                       direct_method, type, skip_this);
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo).  Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying.  This
+ * applies to arg3 and above (see below).
+ *
+ * FIXME: update comments.
+ *
+ * Two general strategies:
+ *    If < 20 arguments
+ *       Pass args 3-18 using vldm/vstm block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *    If 20+ arguments
+ *       Pass args arg19+ using memcpy block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *
+ */
+int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
+                                     LIR** pcrLabel, NextCallInsn next_call_insn,
+                                     const MethodReference& target_method,
+                                     uint32_t vtable_idx, uintptr_t direct_code,
+                                     uintptr_t direct_method, InvokeType type, bool skip_this) {
+  /* If no arguments, just return */
+  if (info->num_arg_words == 0)
+    return call_state;
+
+  const int start_index = skip_this ? 1 : 0;
+
+  InToRegStorageArm64Mapper mapper;
+  InToRegStorageMapping in_to_reg_storage_mapping;
+  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
+  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
+  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
+          in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
+  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
+
+  // Fisrt of all, check whether it make sense to use bulk copying
+  // Optimization is aplicable only for range case
+  // TODO: make a constant instead of 2
+  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
+    // Scan the rest of the args - if in phys_reg flush to memory
+    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
+      RegLocation loc = info->args[next_arg];
+      if (loc.wide) {
+        loc = UpdateLocWide(loc);
+        if (loc.location == kLocPhysReg) {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
+        }
+        next_arg += 2;
+      } else {
+        loc = UpdateLoc(loc);
+        if (loc.location == kLocPhysReg) {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32);
+        }
+        next_arg++;
       }
-      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(next_reg));
     }
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                direct_code, direct_method, type);
+
+    // Logic below assumes that Method pointer is at offset zero from SP.
+    DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+    // The rest can be copied together
+    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped,
+                                                   cu_->instruction_set);
+
+    int current_src_offset = start_offset;
+    int current_dest_offset = outs_offset;
+
+    // Only davik regs are accessed in this loop; no next_call_insn() calls.
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    while (regs_left_to_pass_via_stack > 0) {
+      /*
+       * TODO: Improve by adding block copy for large number of arguments.  This
+       * should be done, if possible, as a target-depending helper.  For now, just
+       * copy a Dalvik vreg at a time.
+       */
+      // Moving 32-bits via general purpose register.
+      size_t bytes_to_move = sizeof(uint32_t);
+
+      // Instead of allocating a new temp, simply reuse one of the registers being used
+      // for argument passing.
+      RegStorage temp = TargetReg(kArg3);
+
+      // Now load the argument VR and store to the outs.
+      Load32Disp(TargetReg(kSp), current_src_offset, temp);
+      Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+
+      current_src_offset += bytes_to_move;
+      current_dest_offset += bytes_to_move;
+      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+    }
+    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
+  }
+
+  // Now handle rest not registers if they are
+  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
+    RegStorage regSingle = TargetReg(kArg2);
+    RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg());
+    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
+      RegLocation rl_arg = info->args[i];
+      rl_arg = UpdateRawLoc(rl_arg);
+      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      if (!reg.Valid()) {
+        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+
+        {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.wide) {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64);
+            } else {
+              LoadValueDirectWideFixed(rl_arg, regWide);
+              StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64);
+            }
+            i++;
+          } else {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32);
+            } else {
+              LoadValueDirectFixed(rl_arg, regSingle);
+              StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32);
+            }
+          }
+        }
+        call_state = next_call_insn(cu_, info, call_state, target_method,
+                                    vtable_idx, direct_code, direct_method, type);
+      }
+    }
+  }
+
+  // Finish with mapped registers
+  for (int i = start_index; i <= last_mapped_in; i++) {
+    RegLocation rl_arg = info->args[i];
+    rl_arg = UpdateRawLoc(rl_arg);
+    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    if (reg.Valid()) {
+      if (rl_arg.wide) {
+        LoadValueDirectWideFixed(rl_arg, reg);
+        i++;
+      } else {
+        LoadValueDirectFixed(rl_arg, reg);
+      }
+      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                               direct_code, direct_method, type);
+    }
+  }
+
+  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                           direct_code, direct_method, type);
+  if (pcrLabel) {
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      Load32Disp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
   }
   return call_state;
 }
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 71e9e95..f384293 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -112,7 +112,7 @@
 LIR* Arm64Mir2Lir::LoadFPConstantValueWide(int r_dest, int64_t value) {
   DCHECK(RegStorage::IsDouble(r_dest));
   if (value == 0) {
-    return NewLIR2(kA64Fmov2Sx, r_dest, rwzr);
+    return NewLIR2(kA64Fmov2Sx, r_dest, rxzr);
   } else {
     int32_t encoded_imm = EncodeImmDouble(value);
     if (encoded_imm >= 0) {
@@ -778,6 +778,11 @@
   LIR* load;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
+  DCHECK(r_base.Is64Bit());
+  // TODO: need a cleaner handling of index registers here and throughout.
+  if (r_index.Is32Bit()) {
+    r_index = As64BitReg(r_index);
+  }
 
   if (r_dest.IsFloat()) {
     if (r_dest.IsDouble()) {
@@ -846,6 +851,11 @@
   LIR* store;
   int expected_scale = 0;
   ArmOpcode opcode = kA64Brk1d;
+  DCHECK(r_base.Is64Bit());
+  // TODO: need a cleaner handling of index registers here and throughout.
+  if (r_index.Is32Bit()) {
+    r_index = As64BitReg(r_index);
+  }
 
   if (r_src.IsFloat()) {
     if (r_src.IsDouble()) {
@@ -968,8 +978,9 @@
     load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
   } else {
     // Use long sequence.
-    RegStorage r_scratch = AllocTemp();
-    LoadConstant(r_scratch, displacement);
+    // TODO: cleaner support for index/displacement registers?  Not a reference, but must match width.
+    RegStorage r_scratch = AllocTempWide();
+    LoadConstantWide(r_scratch, displacement);
     load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size);
     FreeTemp(r_scratch);
   }
@@ -1050,8 +1061,8 @@
     store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
   } else {
     // Use long sequence.
-    RegStorage r_scratch = AllocTemp();
-    LoadConstant(r_scratch, displacement);
+    RegStorage r_scratch = AllocTempWide();
+    LoadConstantWide(r_scratch, displacement);
     store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
     FreeTemp(r_scratch);
   }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index f9081ce..3b99421 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -73,7 +73,7 @@
       m2l_->ResetRegPool();
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pThrowDivZero), true);
       } else {
         m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pThrowDivZero), true);
@@ -96,7 +96,7 @@
       m2l_->ResetRegPool();
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
                                       index_, length_, true);
       } else {
@@ -129,7 +129,7 @@
 
       m2l_->OpRegCopy(m2l_->TargetReg(kArg1), length_);
       m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
                                       m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
       } else {
@@ -158,7 +158,7 @@
       m2l_->ResetRegPool();
       m2l_->ResetDefTracking();
       GenerateTargetLabel(kPseudoThrowTarget);
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pThrowNullPointer), true);
       } else {
         m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pThrowNullPointer), true);
@@ -385,7 +385,7 @@
  */
 void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest,
                           RegLocation rl_src) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenNewArrayImpl<8>(this, cu_, type_idx, rl_dest, rl_src);
   } else {
     GenNewArrayImpl<4>(this, cu_, type_idx, rl_dest, rl_src);
@@ -414,7 +414,7 @@
   int elems = info->num_arg_words;
   int type_idx = info->index;
   FlushAllRegs();  /* Everything to home location */
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenFilledNewArrayCall<8>(this, cu_, elems, type_idx);
   } else {
     GenFilledNewArrayCall<4>(this, cu_, elems, type_idx);
@@ -457,12 +457,13 @@
      * critical.
      */
     // This is addressing the stack, which may be out of the 4G area.
-    RegStorage r_src = cu_->target64 ? AllocTempWide() : AllocTemp();
-    RegStorage r_dst = AllocTemp();
-    RegStorage r_idx = AllocTemp();
+    RegStorage r_src = AllocTempRef();
+    RegStorage r_dst = AllocTempRef();
+    RegStorage r_idx = AllocTempRef();  // Not really a reference, but match src/dst.
     RegStorage r_val;
     switch (cu_->instruction_set) {
       case kThumb2:
+      case kArm64:
         r_val = TargetReg(kLr);
         break;
       case kX86:
@@ -531,7 +532,7 @@
   void Compile() {
     LIR* unresolved_target = GenerateTargetLabel();
     uninit_->target = unresolved_target;
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeStaticStorage),
                                  storage_index_, true);
     } else {
@@ -640,7 +641,7 @@
     FreeTemp(r_base);
   } else {
     FlushAllRegs();  // Everything to home locations
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       GenSputCall<8>(this, is_long_or_double, is_object, &field_info, rl_src);
     } else {
       GenSputCall<4>(this, is_long_or_double, is_object, &field_info, rl_src);
@@ -734,7 +735,7 @@
     }
   } else {
     FlushAllRegs();  // Everything to home locations
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       GenSgetCall<8>(this, is_long_or_double, is_object, &field_info);
     } else {
       GenSgetCall<4>(this, is_long_or_double, is_object, &field_info);
@@ -801,7 +802,7 @@
       StoreValue(rl_dest, rl_result);
     }
   } else {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       GenIgetCall<8>(this, is_long_or_double, is_object, &field_info, rl_obj);
     } else {
       GenIgetCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj);
@@ -861,7 +862,7 @@
       MarkGCCard(rl_src.reg, rl_obj.reg);
     }
   } else {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       GenIputCall<8>(this, is_long_or_double, is_object, &field_info, rl_obj, rl_src);
     } else {
       GenIputCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj, rl_src);
@@ -885,7 +886,7 @@
   bool needs_range_check = !(opt_flags & MIR_IGNORE_RANGE_CHECK);
   bool needs_null_check = !((cu_->disable_opt & (1 << kNullCheckElimination)) &&
       (opt_flags & MIR_IGNORE_NULL_CHECK));
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenArrayObjPutCall<8>(this, needs_range_check, needs_null_check, rl_array, rl_index, rl_src);
   } else {
     GenArrayObjPutCall<4>(this, needs_range_check, needs_null_check, rl_array, rl_index, rl_src);
@@ -894,14 +895,15 @@
 
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
-  RegStorage res_reg = AllocTemp();
+  DCHECK(!cu_->target64 || rl_method.reg.Is64Bit());
+  RegStorage res_reg = AllocTempRef();
   RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
                                                    *cu_->dex_file,
                                                    type_idx)) {
     // Call out to helper which resolves type and verifies access.
     // Resolved type returned in kRet0.
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
                               type_idx, rl_method.reg, true);
     } else {
@@ -936,7 +938,7 @@
         void Compile() {
           GenerateTargetLabel();
 
-          if (Is64BitInstructionSet(cu_->instruction_set)) {
+          if (cu_->target64) {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
                                           rl_method_.reg, true);
           } else {
@@ -1005,7 +1007,7 @@
 
         void Compile() {
           GenerateTargetLabel();
-          if (Is64BitInstructionSet(cu_->instruction_set)) {
+          if (cu_->target64) {
             m2l_->CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pResolveString),
                                           r_method_, string_idx_, true);
           } else {
@@ -1094,7 +1096,7 @@
  * call Class::NewInstanceFromCode(type_idx, method);
  */
 void Mir2Lir::GenNewInstance(uint32_t type_idx, RegLocation rl_dest) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenNewInstanceImpl<8>(this, cu_, type_idx, rl_dest);
   } else {
     GenNewInstanceImpl<4>(this, cu_, type_idx, rl_dest);
@@ -1103,7 +1105,7 @@
 
 void Mir2Lir::GenThrow(RegLocation rl_src) {
   FlushAllRegs();
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pDeliverException), rl_src, true);
   } else {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pDeliverException), rl_src, true);
@@ -1182,7 +1184,7 @@
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kArg0
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     } else {
@@ -1207,7 +1209,7 @@
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
       // Not resolved
       // Call out to helper, which will return resolved type in kRet0
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
       } else {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
@@ -1247,7 +1249,7 @@
     }
   } else {
     if (cu_->instruction_set == kThumb2) {
-      RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+      RegStorage r_tgt = cu_->target64 ?
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial)) :
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
       LIR* it = nullptr;
@@ -1269,7 +1271,7 @@
         LoadConstant(rl_result.reg, 1);     // assume true
         branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
       }
-      RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+      RegStorage r_tgt = cu_->target64 ?
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial)) :
           LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
       OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));    // .ne case - arg0 <= class
@@ -1332,7 +1334,7 @@
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // returns Class* in kRet0
     // InitializeTypeAndVerifyAccess(idx, method)
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
                               type_idx, TargetReg(kArg1), true);
     } else {
@@ -1368,7 +1370,7 @@
 
           // Call out to helper, which will return resolved type in kArg0
           // InitializeTypeFromCode(idx, method)
-          if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+          if (m2l_->cu_->target64) {
             m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx_,
                                           m2l_->TargetReg(kArg1), true);
           } else {
@@ -1405,7 +1407,7 @@
         m2l_->LoadRefDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
                           m2l_->TargetReg(kArg1));
       }
-      if (Is64BitInstructionSet(m2l_->cu_->instruction_set)) {
+      if (m2l_->cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pCheckCast), m2l_->TargetReg(kArg2),
                                       m2l_->TargetReg(kArg1), true);
       } else {
@@ -1520,7 +1522,7 @@
 
 void Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                              RegLocation rl_src1, RegLocation rl_shift) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenShiftOpLongCall<8>(this, opcode, rl_src1, rl_shift);
   } else {
     GenShiftOpLongCall<4>(this, opcode, rl_src1, rl_shift);
@@ -1653,7 +1655,7 @@
     if (!done) {
       FlushAllRegs();   /* Send everything to home location */
       LoadValueDirectFixed(rl_src2, TargetReg(kArg1));
-      RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+      RegStorage r_tgt = cu_->target64 ?
           CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod)) :
           CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(4, pIdivmod));
       LoadValueDirectFixed(rl_src1, TargetReg(kArg0));
@@ -1661,7 +1663,7 @@
         GenDivZeroCheck(TargetReg(kArg1));
       }
       // NOTE: callout here is not a safepoint.
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), false /* not a safepoint */);
       } else {
         CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), false /* not a safepoint */);
@@ -1924,7 +1926,7 @@
         FlushAllRegs();   /* Everything to home location. */
         LoadValueDirectFixed(rl_src, TargetReg(kArg0));
         Clobber(TargetReg(kArg0));
-        if (Is64BitInstructionSet(cu_->instruction_set)) {
+        if (cu_->target64) {
           CallRuntimeHelperRegImm(QUICK_ENTRYPOINT_OFFSET(8, pIdivmod), TargetReg(kArg0), lit,
                                   false);
         } else {
@@ -2104,7 +2106,7 @@
 
 void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
                              RegLocation rl_src1, RegLocation rl_src2) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     GenArithOpLongImpl<8>(this, cu_, opcode, rl_dest, rl_src1, rl_src2);
   } else {
     GenArithOpLongImpl<4>(this, cu_, opcode, rl_dest, rl_src1, rl_src2);
@@ -2156,7 +2158,7 @@
     m2l_->ResetRegPool();
     m2l_->ResetDefTracking();
     GenerateTargetLabel(kPseudoSuspendTarget);
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(8, pTestSuspend), true);
     } else {
       m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pTestSuspend), true);
@@ -2215,7 +2217,7 @@
 /* Call out to helper assembly routine that will null check obj and then lock it. */
 void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pLockObject), rl_src, true);
   } else {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pLockObject), rl_src, true);
@@ -2225,7 +2227,7 @@
 /* Call out to helper assembly routine that will null check obj and then unlock it. */
 void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) {
   FlushAllRegs();
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pUnlockObject), rl_src, true);
   } else {
     CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject), rl_src, true);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index a90a06e..641579f 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -156,7 +156,7 @@
     LoadValueDirectFixed(arg0, TargetReg(kArg0));
   } else {
     RegStorage r_tmp;
-    if (cu_->instruction_set == kX86_64) {
+    if (cu_->target64) {
       r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
     } else {
       r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
@@ -187,7 +187,7 @@
     LoadValueDirectFixed(arg1, TargetReg(kArg1));
   } else {
     RegStorage r_tmp;
-    if (cu_->instruction_set == kX86_64) {
+    if (cu_->target64) {
       r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
     } else {
       r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
@@ -309,7 +309,7 @@
         LoadValueDirectWideFixed(arg1, r_tmp);
       } else {
         RegStorage r_tmp;
-        if (cu_->instruction_set == kX86_64) {
+        if (cu_->target64) {
           r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
         } else {
           r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
@@ -320,13 +320,13 @@
   } else {
     RegStorage r_tmp;
     if (arg0.fp) {
-      if (cu_->instruction_set == kX86_64) {
+      if (cu_->target64) {
         r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
       } else {
         r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
       }
     } else {
-      if (cu_->instruction_set == kX86_64) {
+      if (cu_->target64) {
         r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
       } else {
         r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
@@ -334,7 +334,7 @@
     }
     LoadValueDirectWideFixed(arg0, r_tmp);
     if (arg1.wide == 0) {
-      if (cu_->instruction_set == kX86_64) {
+      if (cu_->target64) {
         LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
       } else {
         LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
@@ -342,13 +342,13 @@
     } else {
       RegStorage r_tmp;
       if (arg1.fp) {
-        if (cu_->instruction_set == kX86_64) {
+        if (cu_->target64) {
           r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
         } else {
           r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
         }
       } else {
-        if (cu_->instruction_set == kX86_64) {
+        if (cu_->target64) {
           r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
         } else {
           r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
@@ -440,7 +440,7 @@
     LoadValueDirectFixed(arg2, TargetReg(kArg2));
   } else {
     RegStorage r_tmp;
-    if (cu_->instruction_set == kX86_64) {
+    if (cu_->target64) {
       r_tmp = RegStorage::Solo64(TargetReg(kArg2).GetReg());
     } else {
       r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
@@ -779,7 +779,7 @@
                                 const MethodReference& target_method,
                                 uint32_t unused, uintptr_t unused2,
                                 uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
     ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeStaticTrampolineWithAccessCheck);
     return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
   } else {
@@ -792,7 +792,7 @@
                                 const MethodReference& target_method,
                                 uint32_t unused, uintptr_t unused2,
                                 uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
     ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeDirectTrampolineWithAccessCheck);
     return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
   } else {
@@ -805,7 +805,7 @@
                                const MethodReference& target_method,
                                uint32_t unused, uintptr_t unused2,
                                uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
     ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeSuperTrampolineWithAccessCheck);
     return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
   } else {
@@ -818,7 +818,7 @@
                            const MethodReference& target_method,
                            uint32_t unused, uintptr_t unused2,
                            uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
     ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeVirtualTrampolineWithAccessCheck);
     return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
   } else {
@@ -832,7 +832,7 @@
                                                 const MethodReference& target_method,
                                                 uint32_t unused, uintptr_t unused2,
                                                 uintptr_t unused3, InvokeType unused4) {
-  if (Is64BitInstructionSet(cu->instruction_set)) {
+  if (cu->target64) {
       ThreadOffset<8> trampoline = QUICK_ENTRYPOINT_OFFSET(8, pInvokeInterfaceTrampolineWithAccessCheck);
       return NextInvokeInsnSP<8>(cu, info, trampoline, state, target_method, 0);
     } else {
@@ -1188,7 +1188,7 @@
     // Generate memcpy
     OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
     OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(8, pMemcpy), TargetReg(kArg0),
                                  TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
     } else {
@@ -1540,7 +1540,7 @@
     RegLocation rl_start = info->args[2];     // 3rd arg only present in III flavor of IndexOf.
     LoadValueDirectFixed(rl_start, reg_start);
   }
-  RegStorage r_tgt = Is64BitInstructionSet(cu_->instruction_set) ?
+  RegStorage r_tgt = cu_->target64 ?
       LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pIndexOf)) :
       LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pIndexOf));
   GenExplicitNullCheck(reg_ptr, info->opt_flags);
@@ -1581,7 +1581,7 @@
   LoadValueDirectFixed(rl_cmp, reg_cmp);
   RegStorage r_tgt;
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(8, pStringCompareTo));
     } else {
       r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
@@ -1598,7 +1598,7 @@
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
     OpReg(kOpBlx, r_tgt);
   } else {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pStringCompareTo));
     } else {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
@@ -1747,7 +1747,8 @@
   DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
   // TODO: Enable instrinsics for x86_64
   // Temporary disable intrinsics for x86_64. We will enable them later step by step.
-  if (cu_->instruction_set != kX86_64) {
+  // Temporary disable intrinsics for Arm64. We will enable them later step by step.
+  if ((cu_->instruction_set != kX86_64) && (cu_->instruction_set != kArm64)) {
     if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
         ->GenIntrinsic(this, info)) {
       return;
@@ -1850,7 +1851,7 @@
       }
     } else {
       // TODO: Extract?
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         call_inst = GenInvokeNoInlineCall<8>(this, info->type);
       } else {
         call_inst = GenInvokeNoInlineCall<4>(this, info->type);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 40205ea..1fc4163 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -66,7 +66,7 @@
   }
 }
 
-// TODO: needs revisit for 64-bit.
+// TODO: simplify when 32-bit targets go hard-float.
 RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -87,10 +87,11 @@
     offset += sizeof(uint64_t);
   }
 
-  if (cu_->instruction_set == kX86_64) {
+  if (cu_->target64) {
     RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
     if (!reg_arg.Valid()) {
-      RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+      RegStorage new_reg =
+          wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
       LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32);
       return new_reg;
     } else {
@@ -159,6 +160,7 @@
   return reg_arg;
 }
 
+// TODO: simpilfy when 32-bit targets go hard float.
 void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -186,7 +188,7 @@
       Load32Disp(TargetReg(kSp), offset, rl_dest.reg);
     }
   } else {
-    if (cu_->instruction_set == kX86_64) {
+    if (cu_->target64) {
       RegStorage reg = GetArgMappingToPhysicalReg(in_position);
       if (reg.Valid()) {
         OpRegCopy(rl_dest.reg, reg);
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index cae59c8..5bb0ee0 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -473,14 +473,14 @@
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       reg = FindLiveReg(wide ? reg_pool_->core64_regs_ : reg_pool_->core_regs_, s_reg);
     } else {
       reg = FindLiveReg(reg_pool_->core_regs_, s_reg);
     }
   }
   if (reg.Valid()) {
-    if (wide && !reg.IsFloat() && !Is64BitInstructionSet(cu_->instruction_set)) {
+    if (wide && !reg.IsFloat() && !cu_->target64) {
       // Only allow reg pairs for core regs on 32-bit targets.
       RegStorage high_reg = FindLiveReg(reg_pool_->core_regs_, s_reg + 1);
       if (high_reg.Valid()) {
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index f5fce34..dd5dab2 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -175,7 +175,7 @@
   }
   NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec));
   NewLIR2(Gen64Bit() ? kX86Add64RR : kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0,
                             rs_rX86_ARG1, true);
   } else {
@@ -185,7 +185,7 @@
 }
 
 void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
-  int ex_offset = Is64BitInstructionSet(cu_->instruction_set) ?
+  int ex_offset = cu_->target64 ?
       Thread::ExceptionOffset<8>().Int32Value() :
       Thread::ExceptionOffset<4>().Int32Value();
   RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
@@ -201,7 +201,7 @@
   RegStorage reg_card_base = AllocTemp();
   RegStorage reg_card_no = AllocTemp();
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
-  int ct_offset = Is64BitInstructionSet(cu_->instruction_set) ?
+  int ct_offset = cu_->target64 ?
       Thread::CardTableOffset<8>().Int32Value() :
       Thread::CardTableOffset<4>().Int32Value();
   if (Gen64Bit()) {
@@ -255,7 +255,7 @@
         m2l_->OpRegImm(kOpAdd, rs_rX86_SP, sp_displace_);
         m2l_->ClobberCallerSave();
         // Assumes codegen and target are in thumb2 mode.
-        if (Is64BitInstructionSet(cu_->instruction_set)) {
+        if (cu_->target64) {
           m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow),
                            false /* MarkSafepointPC */, false /* UseLink */);
         } else {
@@ -276,7 +276,7 @@
     // in case a signal comes in that's not using an alternate signal stack and the large frame may
     // have moved us outside of the reserved area at the end of the stack.
     // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
     } else {
       OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index f6f0617..61623d0 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -49,7 +49,7 @@
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                                 false);
       } else {
@@ -111,7 +111,7 @@
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
                                                 false);
       } else {
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index f6f0c84..b342813 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -991,7 +991,7 @@
       }
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
                                       new_index, m2l_->TargetReg(kArg1), true);
       } else {
@@ -1031,7 +1031,7 @@
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
       m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
                                       m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
       } else {
@@ -1054,7 +1054,7 @@
 
 // Test suspend flag, return target of taken suspend branch
 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
-  if (Is64BitInstructionSet(cu_->instruction_set)) {
+  if (cu_->target64) {
     OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
   } else {
     OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
@@ -1810,7 +1810,7 @@
           NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
           LoadConstant(rl_result.reg.GetLow(), 0);
         } else {
-          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
           NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
                   shift_amount);
@@ -1829,7 +1829,7 @@
           NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
           NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
         } else {
-          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
                   shift_amount);
@@ -1846,7 +1846,7 @@
           NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
           LoadConstant(rl_result.reg.GetHigh(), 0);
         } else {
-          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
           OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
           NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
                   shift_amount);
@@ -2311,7 +2311,7 @@
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // Caller function returns Class* in kArg0.
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
                            type_idx, true);
     } else {
@@ -2337,7 +2337,7 @@
       // Need to test presence of type in dex cache at runtime.
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
       // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0.
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
+      if (cu_->target64) {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
       } else {
         CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
@@ -2380,7 +2380,7 @@
       branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
     }
     OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
-    if (Is64BitInstructionSet(cu_->instruction_set)) {
+    if (cu_->target64) {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial));
     } else {
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 342a191..f4b12e2 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -889,7 +889,6 @@
   __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
 }
 
-
 void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch1(
       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ef17ca7..ebeef9d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -35,6 +35,9 @@
 
 namespace x86_64 {
 
+// Some x86_64 instructions require a register to be available as temp.
+static constexpr Register TMP = R11;
+
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
@@ -53,7 +56,8 @@
 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph)
       : CodeGenerator(graph, kNumberOfRegIds),
         location_builder_(graph, this),
-        instruction_visitor_(graph, this) {}
+        instruction_visitor_(graph, this),
+        move_resolver_(graph->GetArena(), this) {}
 
 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
       : HGraphVisitor(graph),
@@ -89,6 +93,9 @@
   // Stack register is always reserved.
   blocked_registers[RSP] = true;
 
+  // Block the register used as TMP.
+  blocked_registers[TMP] = true;
+
   // TODO: We currently don't use Quick's callee saved registers.
   blocked_registers[RBX] = true;
   blocked_registers[RBP] = true;
@@ -192,8 +199,8 @@
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
     } else {
       DCHECK(source.IsStackSlot());
-      __ movl(CpuRegister(RAX), Address(CpuRegister(RSP), source.GetStackIndex()));
-      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(RAX));
+      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
   } else {
     DCHECK(destination.IsDoubleStackSlot());
@@ -201,8 +208,8 @@
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movq(CpuRegister(RAX), Address(CpuRegister(RSP), source.GetStackIndex()));
-      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(RAX));
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
   }
 }
@@ -211,7 +218,7 @@
   if (instruction->AsIntConstant() != nullptr) {
     Immediate imm(instruction->AsIntConstant()->GetValue());
     if (location.IsRegister()) {
-      __ movq(location.AsX86_64().AsCpuRegister(), imm);
+      __ movl(location.AsX86_64().AsCpuRegister(), imm);
     } else {
       __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
     }
@@ -220,8 +227,8 @@
     if (location.IsRegister()) {
       __ movq(location.AsX86_64().AsCpuRegister(), Immediate(value));
     } else {
-      __ movq(CpuRegister(RAX), Immediate(value));
-      __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(RAX));
+      __ movq(CpuRegister(TMP), Immediate(value));
+      __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
     }
   } else if (instruction->AsLoadLocal() != nullptr) {
     switch (instruction->GetType()) {
@@ -288,7 +295,7 @@
 
 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, X86_64CpuLocation(RAX));
+  locations->SetInAt(0, Location::RequiresRegister());
   if_instr->SetLocations(locations);
 }
 
@@ -344,9 +351,9 @@
 
 void LocationsBuilderX86_64::VisitEqual(HEqual* equal) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
-  locations->SetInAt(0, X86_64CpuLocation(RAX));
-  locations->SetInAt(1, X86_64CpuLocation(RCX));
-  locations->SetOut(X86_64CpuLocation(RAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
   equal->SetLocations(locations);
 }
 
@@ -364,7 +371,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) {
-  // Will be generated at use site.
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
@@ -545,9 +552,9 @@
   switch (add->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(0, X86_64CpuLocation(RAX));
-      locations->SetInAt(1, X86_64CpuLocation(RCX));
-      locations->SetOut(X86_64CpuLocation(RAX));
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
     }
 
@@ -566,11 +573,15 @@
 
 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
   LocationSummary* locations = add->GetLocations();
+  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
   switch (add->GetResultType()) {
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
     case Primitive::kPrimLong: {
-      DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
-                locations->Out().AsX86_64().AsCpuRegister().AsRegister());
       __ addq(locations->InAt(0).AsX86_64().AsCpuRegister(),
               locations->InAt(1).AsX86_64().AsCpuRegister());
       break;
@@ -593,9 +604,9 @@
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(0, X86_64CpuLocation(RAX));
-      locations->SetInAt(1, X86_64CpuLocation(RCX));
-      locations->SetOut(X86_64CpuLocation(RAX));
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
     }
 
@@ -614,11 +625,15 @@
 
 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
   LocationSummary* locations = sub->GetLocations();
+  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
+            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
   switch (sub->GetResultType()) {
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      __ subl(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    }
     case Primitive::kPrimLong: {
-      DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
-                locations->Out().AsX86_64().AsCpuRegister().AsRegister());
       __ subq(locations->InAt(0).AsX86_64().AsCpuRegister(),
               locations->InAt(1).AsX86_64().AsCpuRegister());
       break;
@@ -671,8 +686,8 @@
 
 void LocationsBuilderX86_64::VisitNot(HNot* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, X86_64CpuLocation(RAX));
-  locations->SetOut(X86_64CpuLocation(RAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
   instruction->SetLocations(locations);
 }
 
@@ -701,7 +716,85 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
-  LOG(FATAL) << "Unimplemented";
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverX86_64::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister()) {
+    if (destination.IsRegister()) {
+      __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
+    }
+  } else if (source.IsStackSlot()) {
+    if (destination.IsRegister()) {
+      __ movl(destination.AsX86_64().AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsStackSlot());
+      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+    }
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+void ParallelMoveResolverX86_64::Exchange(CpuRegister reg, int mem) {
+  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+  __ movl(Address(CpuRegister(RSP), mem), CpuRegister(reg));
+  __ movl(CpuRegister(reg), CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+  __ movl(CpuRegister(ensure_scratch.GetRegister()),
+          Address(CpuRegister(RSP), mem2 + stack_offset));
+  __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+  __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
+          CpuRegister(ensure_scratch.GetRegister()));
+}
+
+void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+
+  if (source.IsRegister() && destination.IsRegister()) {
+    __ xchgq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+  } else if (source.IsRegister() && destination.IsStackSlot()) {
+    Exchange(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsRegister()) {
+    Exchange(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+    Exchange(destination.GetStackIndex(), source.GetStackIndex());
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
+
+void ParallelMoveResolverX86_64::SpillScratch(int reg) {
+  __ pushq(CpuRegister(reg));
+}
+
+
+void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
+  __ popq(CpuRegister(reg));
 }
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index ac7ee9f..f07df29 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -19,6 +19,7 @@
 
 #include "code_generator.h"
 #include "nodes.h"
+#include "parallel_move_resolver.h"
 #include "utils/x86_64/assembler_x86_64.h"
 
 namespace art {
@@ -55,6 +56,27 @@
 
 class CodeGeneratorX86_64;
 
+class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
+ public:
+  ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
+      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+
+  virtual void EmitMove(size_t index) OVERRIDE;
+  virtual void EmitSwap(size_t index) OVERRIDE;
+  virtual void SpillScratch(int reg) OVERRIDE;
+  virtual void RestoreScratch(int reg) OVERRIDE;
+
+  X86_64Assembler* GetAssembler() const;
+
+ private:
+  void Exchange(CpuRegister reg, int mem);
+  void Exchange(int mem1, int mem2);
+
+  CodeGeneratorX86_64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64);
+};
+
 class LocationsBuilderX86_64 : public HGraphVisitor {
  public:
   LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
@@ -123,6 +145,10 @@
     return &assembler_;
   }
 
+  ParallelMoveResolverX86_64* GetMoveResolver() {
+    return &move_resolver_;
+  }
+
   int32_t GetStackSlot(HLocal* local) const;
   virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
@@ -150,6 +176,7 @@
 
   LocationsBuilderX86_64 location_builder_;
   InstructionCodeGeneratorX86_64 instruction_visitor_;
+  ParallelMoveResolverX86_64 move_resolver_;
   X86_64Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 8b7c4f1..e63122f 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -65,7 +65,7 @@
 
   static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
   static bool Supports(InstructionSet instruction_set) {
-    return instruction_set == kX86 || instruction_set == kArm;
+    return instruction_set == kX86 || instruction_set == kArm || instruction_set == kX86_64;
   }
 
   size_t GetNumberOfSpillSlots() const {
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index b07eed3..41d1529 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -138,8 +138,8 @@
 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
-  EmitUint8(0x89);
-  EmitRegisterOperand(src.LowBits(), dst.LowBits());
+  EmitUint8(0x8B);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
 }
 
 
@@ -821,6 +821,15 @@
   EmitRegisterOperand(dst.LowBits(), src.LowBits());
 }
 
+
+void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x87);
+  EmitOperand(dst.LowBits(), Operand(src));
+}
+
+
 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg, address);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 6276603..9aa5a54 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -375,6 +375,7 @@
   void fptan();
 
   void xchgl(CpuRegister dst, CpuRegister src);
+  void xchgq(CpuRegister dst, CpuRegister src);
   void xchgl(CpuRegister reg, const Address& address);
 
   void cmpl(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 799db9f..f7bad8b 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -125,6 +125,16 @@
   DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi");
 }
 
+TEST_F(AssemblerX86_64Test, Movl) {
+  GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::CpuRegister(x86_64::R11));
+  GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::CpuRegister(x86_64::R11));
+  const char* expected =
+    "movl %R11d, %R8d\n"
+    "movl %R11d, %EAX\n";
+
+  DriverStr(expected, "movl");
+}
+
 
 std::string setcc_test_fn(x86_64::X86_64Assembler* assembler) {
   // From Condition
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index 58a0379..ca9eae3 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -30,6 +30,7 @@
 class CpuRegister {
  public:
   explicit CpuRegister(Register r) : reg_(r) {}
+  explicit CpuRegister(int r) : reg_(Register(r)) {}
   Register AsRegister() const {
     return reg_;
   }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 67ac729..c3f2082 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1093,7 +1093,9 @@
   }
 
   if (compiler_filter_string == nullptr) {
-    if ((instruction_set == kX86_64 && image) || instruction_set == kArm64 || instruction_set == kMips) {
+    if ((instruction_set == kX86_64 && image) ||
+        instruction_set == kArm64 ||
+        instruction_set == kMips) {
       // TODO: implement/fix compilers for these architectures.
       compiler_filter_string = "interpret-only";
     } else if (image) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 7385382..d684a50 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1137,8 +1137,10 @@
     MoveImageClassesToClassTable();
   }
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  for (const std::pair<size_t, mirror::Class*>& it : class_table_) {
-    if (!visitor(it.second, arg)) {
+  for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
+    mirror::Class** root = &it.second;
+    mirror::Class* c = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
+    if (!visitor(c, arg)) {
       return;
     }
   }
@@ -2353,7 +2355,8 @@
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
        it != end && it->first == hash;
        ++it) {
-    mirror::Class* klass = it->second;
+    mirror::Class** root = &it->second;
+    mirror::Class* klass = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
     if (klass->GetClassLoader() == class_loader && klass->DescriptorEquals(descriptor)) {
       class_table_.erase(it);
       return true;
@@ -2397,12 +2400,14 @@
                                                        size_t hash) {
   auto end = class_table_.end();
   for (auto it = class_table_.lower_bound(hash); it != end && it->first == hash; ++it) {
-    mirror::Class* klass = it->second;
+    mirror::Class** root = &it->second;
+    mirror::Class* klass = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
     if (klass->GetClassLoader() == class_loader && klass->DescriptorEquals(descriptor)) {
       if (kIsDebugBuild) {
         // Check for duplicates in the table.
         for (++it; it != end && it->first == hash; ++it) {
-          mirror::Class* klass2 = it->second;
+          mirror::Class** root2 = &it->second;
+          mirror::Class* klass2 = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root2);
           CHECK(!(klass2->GetClassLoader() == class_loader &&
               klass2->DescriptorEquals(descriptor)))
               << PrettyClass(klass) << " " << klass << " " << klass->GetClassLoader() << " "
@@ -2494,7 +2499,8 @@
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   for (auto it = class_table_.lower_bound(hash), end = class_table_.end();
       it != end && it->first == hash; ++it) {
-    mirror::Class* klass = it->second;
+    mirror::Class** root = &it->second;
+    mirror::Class* klass = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
     if (klass->DescriptorEquals(descriptor)) {
       result.push_back(klass);
     }
@@ -4362,8 +4368,10 @@
   std::vector<mirror::Class*> all_classes;
   {
     ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    for (const std::pair<size_t, mirror::Class*>& it : class_table_) {
-      all_classes.push_back(it.second);
+    for (std::pair<const size_t, mirror::Class*>& it : class_table_) {
+      mirror::Class** root = &it.second;
+      mirror::Class* klass = ReadBarrier::BarrierForRoot<mirror::Class, kWithReadBarrier>(root);
+      all_classes.push_back(klass);
     }
   }
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index a1d7bc6..6d96aa2 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -573,6 +573,8 @@
   // mirror::Class* instances. Results should be compared for a matching
   // Class::descriptor_ and Class::class_loader_.
   typedef std::multimap<size_t, mirror::Class*> Table;
+  // This contains strong roots. To enable concurrent root scanning of
+  // the class table, be careful to use a read barrier when accessing this.
   Table class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
   std::vector<std::pair<size_t, mirror::Class*>> new_class_roots_;
 
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
index 790f4d0..b787233 100644
--- a/runtime/indirect_reference_table-inl.h
+++ b/runtime/indirect_reference_table-inl.h
@@ -80,7 +80,7 @@
   mirror::Object* obj = *root;
   if (LIKELY(obj != kClearedJniWeakGlobal)) {
     // The read barrier or VerifyObject won't handle kClearedJniWeakGlobal.
-    obj = ReadBarrier::BarrierForWeakRoot<mirror::Object, kReadBarrierOption>(root);
+    obj = ReadBarrier::BarrierForRoot<mirror::Object, kReadBarrierOption>(root);
     VerifyObject(obj);
   }
   return obj;
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 756ac96..98e1d21 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -280,7 +280,7 @@
       // We need a read barrier if weak globals. Since this is for
       // debugging where performance isn't top priority, we
       // unconditionally enable the read barrier, which is conservative.
-      obj = ReadBarrier::BarrierForWeakRoot<mirror::Object, kWithReadBarrier>(root);
+      obj = ReadBarrier::BarrierForRoot<mirror::Object, kWithReadBarrier>(root);
       entries.push_back(obj);
     }
   }
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index f12043e..1430500 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -38,6 +38,16 @@
   return strong_interns_.size() + weak_interns_.size();
 }
 
+size_t InternTable::StrongSize() const {
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  return strong_interns_.size();
+}
+
+size_t InternTable::WeakSize() const {
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  return weak_interns_.size();
+}
+
 void InternTable::DumpForSigQuit(std::ostream& os) const {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
   os << "Intern table: " << strong_interns_.size() << " strong; "
@@ -83,24 +93,21 @@
 }
 
 mirror::String* InternTable::LookupStrong(mirror::String* s, int32_t hash_code) {
-  return Lookup<kWithoutReadBarrier>(&strong_interns_, s, hash_code);
+  return Lookup(&strong_interns_, s, hash_code);
 }
 
 mirror::String* InternTable::LookupWeak(mirror::String* s, int32_t hash_code) {
   // Weak interns need a read barrier because they are weak roots.
-  return Lookup<kWithReadBarrier>(&weak_interns_, s, hash_code);
+  return Lookup(&weak_interns_, s, hash_code);
 }
 
-template<ReadBarrierOption kReadBarrierOption>
 mirror::String* InternTable::Lookup(Table* table, mirror::String* s, int32_t hash_code) {
-  CHECK_EQ(table == &weak_interns_, kReadBarrierOption == kWithReadBarrier)
-      << "Only weak_interns_ needs a read barrier.";
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
   for (auto it = table->lower_bound(hash_code), end = table->end();
        it != end && it->first == hash_code; ++it) {
-    mirror::String** weak_root = &it->second;
-    mirror::String* existing_string =
-        ReadBarrier::BarrierForWeakRoot<mirror::String, kReadBarrierOption>(weak_root);
+    mirror::String* existing_string;
+    mirror::String** root = &it->second;
+    existing_string = ReadBarrier::BarrierForRoot<mirror::String, kWithReadBarrier>(root);
     if (existing_string->Equals(s)) {
       return existing_string;
     }
@@ -130,7 +137,7 @@
 }
 
 void InternTable::RemoveStrong(mirror::String* s, int32_t hash_code) {
-  Remove<kWithoutReadBarrier>(&strong_interns_, s, hash_code);
+  Remove(&strong_interns_, s, hash_code);
 }
 
 void InternTable::RemoveWeak(mirror::String* s, int32_t hash_code) {
@@ -138,18 +145,15 @@
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringRemoval(s, hash_code);
   }
-  Remove<kWithReadBarrier>(&weak_interns_, s, hash_code);
+  Remove(&weak_interns_, s, hash_code);
 }
 
-template<ReadBarrierOption kReadBarrierOption>
 void InternTable::Remove(Table* table, mirror::String* s, int32_t hash_code) {
-  CHECK_EQ(table == &weak_interns_, kReadBarrierOption == kWithReadBarrier)
-      << "Only weak_interns_ needs a read barrier.";
   for (auto it = table->lower_bound(hash_code), end = table->end();
        it != end && it->first == hash_code; ++it) {
-    mirror::String** weak_root = &it->second;
-    mirror::String* existing_string =
-        ReadBarrier::BarrierForWeakRoot<mirror::String, kReadBarrierOption>(weak_root);
+    mirror::String* existing_string;
+    mirror::String** root = &it->second;
+    existing_string = ReadBarrier::BarrierForRoot<mirror::String, kWithReadBarrier>(root);
     if (existing_string == s) {
       table->erase(it);
       return;
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 3df2aeb..6dc7f7b 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -64,6 +64,8 @@
   bool ContainsWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   size_t Size() const;
+  size_t StrongSize() const;
+  size_t WeakSize() const;
 
   void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags);
 
@@ -83,7 +85,6 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::String* LookupWeak(mirror::String* s, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::String* Lookup(Table* table, mirror::String* s, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::String* InsertStrong(mirror::String* s, int32_t hash_code)
@@ -96,7 +97,6 @@
   void RemoveWeak(mirror::String* s, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   void Remove(Table* table, mirror::String* s, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
@@ -117,12 +117,16 @@
   bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_);
   bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_);
   ConditionVariable new_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
+  // Since this contains (strong) roots, they need a read barrier to
+  // enable concurrent intern table (strong) root scan. Do not
+  // directly access the strings in it. Use functions that contain
+  // read barriers.
   Table strong_interns_ GUARDED_BY(Locks::intern_table_lock_);
   std::vector<std::pair<int32_t, mirror::String*>> new_strong_intern_roots_
       GUARDED_BY(Locks::intern_table_lock_);
-  // Since weak_interns_ contain weak roots, they need a read
-  // barrier. Do not directly access the strings in it. Use functions
-  // that contain read barriers.
+  // Since this contains (weak) roots, they need a read barrier. Do
+  // not directly access the strings in it. Use functions that contain
+  // read barriers.
   Table weak_interns_ GUARDED_BY(Locks::intern_table_lock_);
 };
 
diff --git a/runtime/monitor.h b/runtime/monitor.h
index bd0e23c..a28823d 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -95,7 +95,7 @@
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   mirror::Object* GetObject() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return ReadBarrier::BarrierForWeakRoot<mirror::Object, kReadBarrierOption>(&obj_);
+    return ReadBarrier::BarrierForRoot<mirror::Object, kReadBarrierOption>(&obj_);
   }
 
   void SetObject(mirror::Object* object);
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index cff5ec3..0820330 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -556,11 +556,11 @@
     } else if (option == "-Xprofile-start-immediately") {
       profiler_options_.start_immediately_ = true;
     } else if (StartsWith(option, "-Xprofile-top-k-threshold:")) {
-      if (!ParseDouble(option, ':', 10.0, 90.0, &profiler_options_.top_k_threshold_)) {
+      if (!ParseDouble(option, ':', 0.0, 100.0, &profiler_options_.top_k_threshold_)) {
         return false;
       }
     } else if (StartsWith(option, "-Xprofile-top-k-change-threshold:")) {
-      if (!ParseDouble(option, ':', 10.0, 90.0, &profiler_options_.top_k_change_threshold_)) {
+      if (!ParseDouble(option, ':', 0.0, 100.0, &profiler_options_.top_k_change_threshold_)) {
         return false;
       }
     } else if (StartsWith(option, "-implicit-checks:")) {
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index e252b7b..fd43d78 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -44,8 +44,8 @@
 }
 
 template <typename MirrorType, ReadBarrierOption kReadBarrierOption>
-inline MirrorType* ReadBarrier::BarrierForWeakRoot(MirrorType** weak_root) {
-  MirrorType* ref = *weak_root;
+inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root) {
+  MirrorType* ref = *root;
   const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
   if (with_read_barrier && kUseBakerReadBarrier) {
     // To be implemented.
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 7232a3f..451d13c 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -39,7 +39,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  ALWAYS_INLINE static MirrorType* BarrierForWeakRoot(MirrorType** weak_root)
+  ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 };