Merge "Support proxy method in StackVisitor::GetThisObject"
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 6346970..7620e76 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -237,6 +237,10 @@
   art_cflags += -DART_USE_OPTIMIZING_COMPILER=1
 endif
 
+ifeq ($(ART_HEAP_POISONING),true)
+  art_cflags += -DART_HEAP_POISONING=1
+endif
+
 # Cflags for non-debug ART and ART tools.
 art_non_debug_cflags := \
   -O3
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 764a4cf..0407e32 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -297,6 +297,41 @@
   void Start(PassDataHolder* data) const;
 };
 
+/**
+ * @class SuspendCheckElimination
+ * @brief Any simple BasicBlock optimization can be put here.
+ */
+class SuspendCheckElimination : public PassME {
+ public:
+  SuspendCheckElimination()
+    : PassME("SuspendCheckElimination", kTopologicalSortTraversal, "6_post_sce_cfg") {
+  }
+
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    return c_unit->mir_graph->EliminateSuspendChecksGate();
+  }
+
+  bool Worker(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
+    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
+    DCHECK(c_unit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
+    return c_unit->mir_graph->EliminateSuspendChecks(bb);
+  }
+
+  void End(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph->EliminateSuspendChecksEnd();
+  }
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 3f6231c..fb5dc9c 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -47,6 +47,7 @@
   // (1 << kTrackLiveTemps) |
   // (1 << kSafeOptimizations) |
   // (1 << kBBOpt) |
+  // (1 << kSuspendCheckElimination) |
   // (1 << kMatch) |
   // (1 << kPromoteCompilerTemps) |
   // (1 << kSuppressExceptionEdges) |
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
index bed3b97..1145818 100644
--- a/compiler/dex/frontend.h
+++ b/compiler/dex/frontend.h
@@ -46,6 +46,7 @@
   kTrackLiveTemps,
   kSafeOptimizations,
   kBBOpt,
+  kSuspendCheckElimination,
   kMatch,
   kPromoteCompilerTemps,
   kBranchFusing,
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 5b7ac3c..64895d8 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1343,7 +1343,7 @@
  * counts explicitly used s_regs.  A later phase will add implicit
  * counts for things such as Method*, null-checked references, etc.
  */
-void MIRGraph::CountUses(class BasicBlock* bb) {
+void MIRGraph::CountUses(BasicBlock* bb) {
   if (bb->block_type != kDalvikByteCode) {
     return;
   }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 023abca..d7ecb2c 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -127,7 +127,7 @@
       ifield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
       sfield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
       method_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
-      gen_suspend_test_list_(arena->Adapter()) {
+      suspend_checks_in_loops_(nullptr) {
   memset(&temp_, 0, sizeof(temp_));
   use_counts_.reserve(256);
   raw_use_counts_.reserve(256);
@@ -1590,6 +1590,12 @@
   return cu_->dex_file->GetShorty(method_id.proto_idx_);
 }
 
+const char* MIRGraph::GetShortyFromMethodReference(const MethodReference& target_method) {
+  const DexFile::MethodId& method_id =
+      target_method.dex_file->GetMethodId(target_method.dex_method_index);
+  return target_method.dex_file->GetShorty(method_id.proto_idx_);
+}
+
 /* Debug Utility - dump a compilation unit */
 void MIRGraph::DumpMIRGraph() {
   const char* block_type_names[] = {
@@ -1941,6 +1947,7 @@
     DCHECK_EQ(bb->hidden, false);
     DCHECK_EQ(bb->visited, false);
     bb->visited = true;
+    bb->nesting_depth = loop_head_stack.size();
 
     // Now add the basic block.
     uint16_t idx = static_cast<uint16_t>(topological_order_.size());
@@ -1982,24 +1989,6 @@
   return false;
 }
 
-bool MIRGraph::HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id) {
-  BasicBlock* target = GetBasicBlock(target_id);
-
-  if (source == nullptr || target == nullptr)
-    return false;
-
-  int idx;
-  for (idx = gen_suspend_test_list_.size() - 1; idx >= 0; idx--) {
-    BasicBlock* bb = gen_suspend_test_list_[idx];
-    if (bb == source)
-      return true;  // The block has been inserted by a suspend check before.
-    if (source->dominators->IsBitSet(bb->id) && bb->dominators->IsBitSet(target_id))
-      return true;
-  }
-
-  return false;
-}
-
 ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
     : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
       visited_taken_(false), have_successors_(false) {
@@ -2222,21 +2211,7 @@
   }
 }
 
-void BasicBlock::Hide(MIRGraph* mir_graph) {
-  // First lets make it a dalvik bytecode block so it doesn't have any special meaning.
-  block_type = kDalvikByteCode;
-
-  // Mark it as hidden.
-  hidden = true;
-
-  // Detach it from its MIRs so we don't generate code for them. Also detached MIRs
-  // are updated to know that they no longer have a parent.
-  for (MIR* mir = first_mir_insn; mir != nullptr; mir = mir->next) {
-    mir->bb = NullBasicBlockId;
-  }
-  first_mir_insn = nullptr;
-  last_mir_insn = nullptr;
-
+void BasicBlock::Kill(MIRGraph* mir_graph) {
   for (BasicBlockId pred_id : predecessors) {
     BasicBlock* pred_bb = mir_graph->GetBasicBlock(pred_id);
     DCHECK(pred_bb != nullptr);
@@ -2244,24 +2219,11 @@
     // Sadly we have to go through the children by hand here.
     pred_bb->ReplaceChild(id, NullBasicBlockId);
   }
+  predecessors.clear();
 
-  // Iterate through children of bb we are hiding.
-  ChildBlockIterator successorChildIter(this, mir_graph);
-
-  for (BasicBlock* childPtr = successorChildIter.Next(); childPtr != 0; childPtr = successorChildIter.Next()) {
-    // Erase this predecessor from child.
-    childPtr->ErasePredecessor(id);
-  }
-
-  // Remove link to children.
-  taken = NullBasicBlockId;
-  fall_through = NullBasicBlockId;
-  successor_block_list_type = kNotUsed;
+  KillUnreachable(mir_graph);
 }
 
-/*
- * Kill an unreachable block and all blocks that become unreachable by killing this one.
- */
 void BasicBlock::KillUnreachable(MIRGraph* mir_graph) {
   DCHECK(predecessors.empty());  // Unreachable.
 
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 1a18841..9890690 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -34,6 +34,7 @@
 
 namespace art {
 
+class DexFileMethodInliner;
 class GlobalValueNumbering;
 
 enum DataFlowAttributePos {
@@ -131,11 +132,9 @@
 
 enum OatMethodAttributes {
   kIsLeaf,            // Method is leaf.
-  kHasLoop,           // Method contains simple loop.
 };
 
 #define METHOD_IS_LEAF          (1 << kIsLeaf)
-#define METHOD_HAS_LOOP         (1 << kHasLoop)
 
 // Minimum field size to contain Dalvik v_reg number.
 #define VREG_NUM_WIDTH 16
@@ -410,15 +409,15 @@
   void ResetOptimizationFlags(uint16_t reset_flags);
 
   /**
-   * @brief Hide the BasicBlock.
-   * @details Set it to kDalvikByteCode, set hidden to true, remove all MIRs,
-   *          remove itself from any predecessor edges, remove itself from any
-   *          child's predecessor array.
+   * @brief Kill the BasicBlock.
+   * @details Unlink predecessors to make this block unreachable, then KillUnreachable().
    */
-  void Hide(MIRGraph* mir_graph);
+  void Kill(MIRGraph* mir_graph);
 
   /**
-   *  @brief Kill the unreachable block and all blocks that become unreachable by killing this one.
+   * @brief Kill the unreachable block and all blocks that become unreachable by killing this one.
+   * @details Set the block type to kDead and set hidden to true, remove all MIRs,
+   *          unlink all successors and recursively kill successors that become unreachable.
    */
   void KillUnreachable(MIRGraph* mir_graph);
 
@@ -731,6 +730,10 @@
     return max_nested_loops_;
   }
 
+  bool IsLoopHead(BasicBlockId bb_id) {
+    return topological_order_loop_ends_[topological_order_indexes_[bb_id]] != 0u;
+  }
+
   bool IsConst(int32_t s_reg) const {
     return is_constant_v_->IsBitSet(s_reg);
   }
@@ -969,13 +972,23 @@
     return reg_location_[method_sreg_];
   }
 
-  bool IsBackedge(BasicBlock* branch_bb, BasicBlockId target_bb_id) {
-    return ((target_bb_id != NullBasicBlockId) &&
-            (GetBasicBlock(target_bb_id)->start_offset <= branch_bb->start_offset));
+  bool IsBackEdge(BasicBlock* branch_bb, BasicBlockId target_bb_id) {
+    DCHECK_NE(target_bb_id, NullBasicBlockId);
+    DCHECK_LT(target_bb_id, topological_order_indexes_.size());
+    DCHECK_LT(branch_bb->id, topological_order_indexes_.size());
+    return topological_order_indexes_[target_bb_id] <= topological_order_indexes_[branch_bb->id];
   }
 
-  bool IsBackwardsBranch(BasicBlock* branch_bb) {
-    return IsBackedge(branch_bb, branch_bb->taken) || IsBackedge(branch_bb, branch_bb->fall_through);
+  bool IsSuspendCheckEdge(BasicBlock* branch_bb, BasicBlockId target_bb_id) {
+    if (!IsBackEdge(branch_bb, target_bb_id)) {
+      return false;
+    }
+    if (suspend_checks_in_loops_ == nullptr) {
+      // We didn't run suspend check elimination.
+      return true;
+    }
+    uint16_t target_depth = GetBasicBlock(target_bb_id)->nesting_depth;
+    return (suspend_checks_in_loops_[branch_bb->id] & (1u << (target_depth - 1u))) == 0;
   }
 
   void CountBranch(DexOffset target_offset) {
@@ -1055,6 +1068,9 @@
   bool ApplyGlobalValueNumberingGate();
   bool ApplyGlobalValueNumbering(BasicBlock* bb);
   void ApplyGlobalValueNumberingEnd();
+  bool EliminateSuspendChecksGate();
+  bool EliminateSuspendChecks(BasicBlock* bb);
+  void EliminateSuspendChecksEnd();
 
   uint16_t GetGvnIFieldId(MIR* mir) const {
     DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
@@ -1113,6 +1129,7 @@
   std::string GetSSANameWithConst(int ssa_reg, bool singles_only);
   void GetBlockName(BasicBlock* bb, char* name);
   const char* GetShortyFromTargetIdx(int);
+  const char* GetShortyFromMethodReference(const MethodReference& target_method);
   void DumpMIRGraph();
   CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range);
   BasicBlock* NewMemBB(BBType block_type, int block_id);
@@ -1165,7 +1182,7 @@
    * @brief Count the uses in the BasicBlock
    * @param bb the BasicBlock
    */
-  void CountUses(class BasicBlock* bb);
+  void CountUses(BasicBlock* bb);
 
   static uint64_t GetDataFlowAttributes(Instruction::Code opcode);
   static uint64_t GetDataFlowAttributes(MIR* mir);
@@ -1209,20 +1226,6 @@
   void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
   bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
 
-  // Used for removing redudant suspend tests
-  void AppendGenSuspendTestList(BasicBlock* bb) {
-    if (gen_suspend_test_list_.size() == 0 ||
-        gen_suspend_test_list_.back() != bb) {
-      gen_suspend_test_list_.push_back(bb);
-    }
-  }
-
-  /* This is used to check if there is already a method call dominating the
-   * source basic block of a backedge and being dominated by the target basic
-   * block of the backedge.
-   */
-  bool HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id);
-
  protected:
   int FindCommonParent(int block1, int block2);
   void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1,
@@ -1338,6 +1341,10 @@
       uint16_t* ifield_ids_;  // Part of GVN/LVN but cached here for LVN to avoid recalculation.
       uint16_t* sfield_ids_;  // Ditto.
     } gvn;
+    // Suspend check elimination.
+    struct {
+      DexFileMethodInliner* inliner;
+    } sce;
   } temp_;
   static const int kInvalidEntry = -1;
   ArenaVector<BasicBlock*> block_list_;
@@ -1373,11 +1380,19 @@
   ArenaVector<MirIFieldLoweringInfo> ifield_lowering_infos_;
   ArenaVector<MirSFieldLoweringInfo> sfield_lowering_infos_;
   ArenaVector<MirMethodLoweringInfo> method_lowering_infos_;
+
+  // In the suspend check elimination pass we determine for each basic block and enclosing
+  // loop whether there's guaranteed to be a suspend check on the path from the loop head
+  // to this block. If so, we can eliminate the back-edge suspend check.
+  // The bb->id is index into suspend_checks_in_loops_ and the loop head's depth is bit index
+  // in a suspend_checks_in_loops_[bb->id].
+  uint32_t* suspend_checks_in_loops_;
+
   static const uint64_t oat_data_flow_attributes_[kMirOpLast];
-  ArenaVector<BasicBlock*> gen_suspend_test_list_;  // List of blocks containing suspend tests
 
   friend class MirOptimizationTest;
   friend class ClassInitCheckEliminationTest;
+  friend class SuspendCheckEliminationTest;
   friend class NullCheckEliminationTest;
   friend class GlobalValueNumberingTest;
   friend class LocalValueNumberingTest;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index fd4c3d7..fc96075 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -542,28 +542,6 @@
             }
           }
           break;
-        case Instruction::RETURN_VOID:
-        case Instruction::RETURN:
-        case Instruction::RETURN_WIDE:
-        case Instruction::RETURN_OBJECT:
-          if (bb->GetFirstNonPhiInsn() == mir) {
-            // This is a simple return BB. Eliminate suspend checks on predecessor back-edges.
-            for (BasicBlockId pred_id : bb->predecessors) {
-              BasicBlock* pred_bb = GetBasicBlock(pred_id);
-              DCHECK(pred_bb != nullptr);
-              if (IsBackedge(pred_bb, bb->id) && pred_bb->last_mir_insn != nullptr &&
-                  (IsInstructionIfCc(pred_bb->last_mir_insn->dalvikInsn.opcode) ||
-                   IsInstructionIfCcZ(pred_bb->last_mir_insn->dalvikInsn.opcode) ||
-                   IsInstructionGoto(pred_bb->last_mir_insn->dalvikInsn.opcode))) {
-                pred_bb->last_mir_insn->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
-                if (cu_->verbose) {
-                  LOG(INFO) << "Suppressed suspend check on branch to return at 0x" << std::hex
-                            << pred_bb->last_mir_insn->offset;
-                }
-              }
-            }
-          }
-          break;
         default:
           break;
       }
@@ -592,12 +570,8 @@
         if ((tk_ft == NULL) && (ft_tk == NULL) && (tk_tk == ft_ft) &&
             (Predecessors(tk) == 1) && (Predecessors(ft) == 1)) {
           /*
-           * Okay - we have the basic diamond shape.  At the very least, we can eliminate the
-           * suspend check on the taken-taken branch back to the join point.
+           * Okay - we have the basic diamond shape.
            */
-          if (SelectKind(tk->last_mir_insn) == kSelectGoto) {
-              tk->last_mir_insn->optimization_flags |= (MIR_IGNORE_SUSPEND_CHECK);
-          }
 
           // TODO: Add logic for LONG.
           // Are the block bodies something we can handle?
@@ -1636,4 +1610,103 @@
   temp_scoped_alloc_.reset();
 }
 
+bool MIRGraph::EliminateSuspendChecksGate() {
+  if ((cu_->disable_opt & (1 << kSuspendCheckElimination)) != 0 ||  // Disabled.
+      GetMaxNestedLoops() == 0u ||   // Nothing to do.
+      GetMaxNestedLoops() >= 32u ||  // Only 32 bits in suspend_checks_in_loops_[.].
+                                     // Exclude 32 as well to keep bit shifts well-defined.
+      !HasInvokes()) {               // No invokes to actually eliminate any suspend checks.
+    return false;
+  }
+  if (cu_->compiler_driver != nullptr && cu_->compiler_driver->GetMethodInlinerMap() != nullptr) {
+    temp_.sce.inliner =
+        cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
+  }
+  suspend_checks_in_loops_ = static_cast<uint32_t*>(
+      arena_->Alloc(GetNumBlocks() * sizeof(*suspend_checks_in_loops_), kArenaAllocMisc));
+  return true;
+}
+
+bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) {
+  if (bb->block_type != kDalvikByteCode) {
+    return false;
+  }
+  DCHECK_EQ(GetTopologicalSortOrderLoopHeadStack()->size(), bb->nesting_depth);
+  if (bb->nesting_depth == 0u) {
+    // Out of loops.
+    DCHECK_EQ(suspend_checks_in_loops_[bb->id], 0u);  // The array was zero-initialized.
+    return false;
+  }
+  uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u;  // Start with all loop heads.
+  bool found_invoke = false;
+  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+    if (IsInstructionInvoke(mir->dalvikInsn.opcode) &&
+        (temp_.sce.inliner == nullptr ||
+         !temp_.sce.inliner->IsIntrinsic(mir->dalvikInsn.vB, nullptr))) {
+      // Non-intrinsic invoke, rely on a suspend point in the invoked method.
+      found_invoke = true;
+      break;
+    }
+  }
+  if (!found_invoke) {
+    // Intersect suspend checks from predecessors.
+    uint16_t bb_topo_idx = topological_order_indexes_[bb->id];
+    uint32_t pred_mask_union = 0u;
+    for (BasicBlockId pred_id : bb->predecessors) {
+      uint16_t pred_topo_idx = topological_order_indexes_[pred_id];
+      if (pred_topo_idx < bb_topo_idx) {
+        // Determine the loop depth of the predecessors relative to this block.
+        size_t pred_loop_depth = topological_order_loop_head_stack_.size();
+        while (pred_loop_depth != 0u &&
+            pred_topo_idx < topological_order_loop_head_stack_[pred_loop_depth - 1].first) {
+          --pred_loop_depth;
+        }
+        DCHECK_LE(pred_loop_depth, GetBasicBlock(pred_id)->nesting_depth);
+        uint32_t pred_mask = (1u << pred_loop_depth) - 1u;
+        // Intersect pred_mask bits in suspend_checks_in_loops with
+        // suspend_checks_in_loops_[pred_id].
+        uint32_t pred_loops_without_checks = pred_mask & ~suspend_checks_in_loops_[pred_id];
+        suspend_checks_in_loops = suspend_checks_in_loops & ~pred_loops_without_checks;
+        pred_mask_union |= pred_mask;
+      }
+    }
+    DCHECK_EQ(((1u << (IsLoopHead(bb->id) ? bb->nesting_depth - 1u: bb->nesting_depth)) - 1u),
+              pred_mask_union);
+    suspend_checks_in_loops &= pred_mask_union;
+  }
+  suspend_checks_in_loops_[bb->id] = suspend_checks_in_loops;
+  if (suspend_checks_in_loops == 0u) {
+    return false;
+  }
+  // Apply MIR_IGNORE_SUSPEND_CHECK if appropriate.
+  if (bb->taken != NullBasicBlockId) {
+    DCHECK(bb->last_mir_insn != nullptr);
+    DCHECK(IsInstructionIfCc(bb->last_mir_insn->dalvikInsn.opcode) ||
+           IsInstructionIfCcZ(bb->last_mir_insn->dalvikInsn.opcode) ||
+           IsInstructionGoto(bb->last_mir_insn->dalvikInsn.opcode) ||
+           (static_cast<int>(bb->last_mir_insn->dalvikInsn.opcode) >= kMirOpFusedCmplFloat &&
+            static_cast<int>(bb->last_mir_insn->dalvikInsn.opcode) <= kMirOpFusedCmpLong));
+    if (!IsSuspendCheckEdge(bb, bb->taken) &&
+        (bb->fall_through == NullBasicBlockId || !IsSuspendCheckEdge(bb, bb->fall_through))) {
+      bb->last_mir_insn->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
+    }
+  } else if (bb->fall_through != NullBasicBlockId && IsSuspendCheckEdge(bb, bb->fall_through)) {
+    // We've got a fall-through suspend edge. Add an artificial GOTO to force suspend check.
+    MIR* mir = NewMIR();
+    mir->dalvikInsn.opcode = Instruction::GOTO;
+    mir->dalvikInsn.vA = 0;  // Branch offset.
+    mir->offset = GetBasicBlock(bb->fall_through)->start_offset;
+    mir->m_unit_index = current_method_;
+    mir->ssa_rep = reinterpret_cast<SSARepresentation*>(
+        arena_->Alloc(sizeof(SSARepresentation), kArenaAllocDFInfo));  // Zero-initialized.
+    bb->AppendMIR(mir);
+    std::swap(bb->fall_through, bb->taken);  // The fall-through has become taken.
+  }
+  return true;
+}
+
+void MIRGraph::EliminateSuspendChecksEnd() {
+  temp_.sce.inliner = nullptr;
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index c794cc6..6c2e9c0 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -88,6 +88,8 @@
     { bb, opcode, 0u, vA, vB, vC }
 #define DEF_INVOKE(bb, opcode, vC, method_info) \
     { bb, opcode, method_info, 0u, 0u, vC }
+#define DEF_OTHER0(bb, opcode) \
+    { bb, opcode, 0u, 0u, 0u, 0u }
 #define DEF_OTHER1(bb, opcode, vA) \
     { bb, opcode, 0u, vA, 0u, 0u }
 #define DEF_OTHER2(bb, opcode, vA, vB) \
@@ -175,6 +177,56 @@
     PrepareBasicBlocks(bbs);
   }
 
+  void PrepareNestedLoopsWhile_While() {
+    static const BBDef bbs[] = {
+        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(8)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 8), DEF_PRED2(3, 7)),  // Outer while loop head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),  // Inner while loop head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),        // "taken" loops to inner head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(5)),        // "taken" loops to outer head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+    };
+    PrepareBasicBlocks(bbs);
+  }
+
+  void PrepareNestedLoopsWhile_WhileWhile() {
+    static const BBDef bbs[] = {
+        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(10)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 10), DEF_PRED2(3, 9)),   // Outer while loop head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),    // Inner while loop head 1.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),          // Loops to inner head 1.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED2(5, 8)),    // Inner while loop head 2.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(7)),          // loops to inner head 2.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(7)),          // loops to outer head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+    };
+    PrepareBasicBlocks(bbs);
+  }
+
+  void PrepareNestedLoopsWhile_WhileWhile_WithExtraEdge() {
+    // Extra edge from the first inner loop body to second inner loop body (6u->8u).
+    static const BBDef bbs[] = {
+        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(10)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 10), DEF_PRED2(3, 9)),   // Outer while loop head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),    // Inner while loop head 1.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 8), DEF_PRED1(5)),       // Loops to inner head 1.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED2(5, 8)),    // Inner while loop head 2.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED2(7, 6)),       // loops to inner head 2.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(7)),          // loops to outer head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+    };
+    PrepareBasicBlocks(bbs);
+  }
+
   void PrepareCatch() {
     static const BBDef bbs[] = {
         DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
@@ -397,6 +449,43 @@
   }
 };
 
+class SuspendCheckEliminationTest : public MirOptimizationTest {
+ protected:
+  bool IsBackEdge(BasicBlockId branch_bb, BasicBlockId target_bb) {
+    BasicBlock* branch = cu_.mir_graph->GetBasicBlock(branch_bb);
+    return target_bb != NullBasicBlockId && cu_.mir_graph->IsBackEdge(branch, target_bb);
+  }
+
+  bool IsSuspendCheckEdge(BasicBlockId branch_bb, BasicBlockId target_bb) {
+    BasicBlock* branch = cu_.mir_graph->GetBasicBlock(branch_bb);
+    return cu_.mir_graph->IsSuspendCheckEdge(branch, target_bb);
+  }
+
+  void PerformSuspendCheckElimination() {
+    cu_.mir_graph->SSATransformationStart();
+    cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
+    cu_.mir_graph->SSATransformationEnd();
+    bool gate_result = cu_.mir_graph->EliminateSuspendChecksGate();
+    ASSERT_TRUE(gate_result);
+    TopologicalSortIterator iterator(cu_.mir_graph.get());
+    bool change = false;
+    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
+      change = cu_.mir_graph->EliminateSuspendChecks(bb);
+    }
+    cu_.mir_graph->EliminateSuspendChecksEnd();
+  }
+
+  SuspendCheckEliminationTest()
+      : MirOptimizationTest() {
+    static const MethodDef methods[] = {
+        { 0u, 1u, 0u, 0u, kDirect, kDirect, false, false },  // Dummy.
+    };
+    PrepareMethods(methods);
+  }
+};
+
 TEST_F(ClassInitCheckEliminationTest, SingleBlock) {
   static const SFieldDef sfields[] = {
       { 0u, 1u, 0u, 0u, kDexMemAccessWord },
@@ -882,7 +971,208 @@
   }
 }
 
-// Undefine MIR_DEF for null check elimination.
-#undef MIR_DEF
+TEST_F(SuspendCheckEliminationTest, LoopNoElimination) {
+  static const MIRDef mirs[] = {
+    DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u, 0u),  // Force the pass to run.
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge back.
+  };
+
+  PrepareLoop();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(4u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(4u, 4u));  // Suspend point on loop to self.
+}
+
+TEST_F(SuspendCheckEliminationTest, LoopElimination) {
+  static const MIRDef mirs[] = {
+    DEF_INVOKE(4u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in the loop.
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge back.
+  };
+
+  PrepareLoop();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(4u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(4u, 4u));  // No suspend point on loop to self.
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_NoElimination) {
+  static const MIRDef mirs[] = {
+    DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u, 0u),  // Force the pass to run.
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_InvokeInOuterLoopHead) {
+  static const MIRDef mirs[] = {
+    DEF_INVOKE(4u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in outer loop head.
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_InvokeInOuterLoopBody) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_INVOKE(7u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in outer loop body.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_InvokeInInnerLoopHead) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_INVOKE(5u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in inner loop head.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_InvokeInInnerLoopBody) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in inner loop body.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_WhileWhile_InvokeInFirstInnerLoopHead) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_INVOKE(5u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in first inner loop head.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
+    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
+    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_WhileWhile();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(8u, 7u));
+  EXPECT_TRUE(IsSuspendCheckEdge(8u, 7u));
+  ASSERT_TRUE(IsBackEdge(9u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(9u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_WhileWhile_InvokeInFirstInnerLoopBody) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
+    DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in first inner loop body.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
+    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
+    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_WhileWhile();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(8u, 7u));
+  EXPECT_TRUE(IsSuspendCheckEdge(8u, 7u));
+  ASSERT_TRUE(IsBackEdge(9u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(9u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_WhileWhile_WithExtraEdge_InvokeInFirstInnerLoopBody) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
+    DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in first inner loop body.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
+    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
+    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_WhileWhile_WithExtraEdge();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(8u, 7u));
+  EXPECT_TRUE(IsSuspendCheckEdge(8u, 7u));  // Unaffected by the extra edge.
+  ASSERT_TRUE(IsBackEdge(9u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(9u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_WhileWhile_WithExtraEdge_InvokeInSecondInnerLoopHead) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_INVOKE(7u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in second inner loop head.
+    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
+    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
+    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_WhileWhile_WithExtraEdge();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(8u, 7u));
+  EXPECT_FALSE(IsSuspendCheckEdge(8u, 7u));  // Unaffected by the extra edge.
+  ASSERT_TRUE(IsBackEdge(9u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(9u, 4u));
+}
 
 }  // namespace art
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index a2bf8b4..c476b2a 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -46,6 +46,7 @@
   GetPassInstance<TypeInference>(),
   GetPassInstance<GlobalValueNumberingPass>(),
   GetPassInstance<BBOptimizations>(),
+  GetPassInstance<SuspendCheckElimination>(),
 };
 
 // The number of the passes in the initial list of Passes (g_passes).
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 0bc4c3b..0ae7ee3 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -26,16 +26,6 @@
 
 class ArmMir2Lir FINAL : public Mir2Lir {
  protected:
-  // TODO: Consolidate hard float target support.
-  // InToRegStorageMapper and InToRegStorageMapping can be shared with all backends.
-  // Base class used to get RegStorage for next argument.
-  class InToRegStorageMapper {
-   public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
-    virtual ~InToRegStorageMapper() {
-    }
-  };
-
   // Inherited class for ARM backend.
   class InToRegStorageArmMapper FINAL : public InToRegStorageMapper {
    public:
@@ -43,45 +33,25 @@
         : cur_core_reg_(0), cur_fp_reg_(0), cur_fp_double_reg_(0) {
     }
 
-    virtual ~InToRegStorageArmMapper() {
-    }
+    RegStorage GetNextReg(ShortyArg arg) OVERRIDE;
 
-    RegStorage GetNextReg(bool is_double_or_float, bool is_wide) OVERRIDE;
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+      cur_fp_reg_ = 0;
+      cur_fp_double_reg_ = 0;
+    }
 
    private:
-    uint32_t cur_core_reg_;
-    uint32_t cur_fp_reg_;
-    uint32_t cur_fp_double_reg_;
+    size_t cur_core_reg_;
+    size_t cur_fp_reg_;
+    size_t cur_fp_double_reg_;
   };
 
-  // Class to map argument to RegStorage. The mapping object is initialized by a mapper.
-  class InToRegStorageMapping FINAL {
-   public:
-    InToRegStorageMapping()
-        : max_mapped_in_(0), is_there_stack_mapped_(false), initialized_(false) {
-    }
-
-    int GetMaxMappedIn() const {
-      return max_mapped_in_;
-    }
-
-    bool IsThereStackMapped() const {
-      return is_there_stack_mapped_;
-    }
-
-    bool IsInitialized() const {
-      return initialized_;
-    }
-
-    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-    RegStorage Get(int in_position) const;
-
-   private:
-    std::map<int, RegStorage> mapping_;
-    int max_mapped_in_;
-    bool is_there_stack_mapped_;
-    bool initialized_;
-  };
+  InToRegStorageArmMapper in_to_reg_storage_arm_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    in_to_reg_storage_arm_mapper_.Reset();
+    return &in_to_reg_storage_arm_mapper_;
+  }
 
   public:
     ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
@@ -127,7 +97,6 @@
       }
     }
 
-    RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
     RegLocation GetReturnAlt() OVERRIDE;
     RegLocation GetReturnWideAlt() OVERRIDE;
     RegLocation LocCReturn() OVERRIDE;
@@ -290,19 +259,6 @@
     LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
     size_t GetInstructionOffset(LIR* lir);
 
-    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                             NextCallInsn next_call_insn,
-                             const MethodReference& target_method,
-                             uint32_t vtable_idx,
-                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                             bool skip_this) OVERRIDE;
-    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this) OVERRIDE;
-
   private:
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
@@ -361,7 +317,7 @@
                                      RegStorage::FloatSolo32(reg_num * 2 + 1));
     }
 
-    InToRegStorageMapping in_to_reg_storage_mapping_;
+    int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 0e8f645..7190a49 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -896,7 +896,7 @@
   Mir2Lir::InstallLiteralPools();
 }
 
-RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(ShortyArg arg) {
   const RegStorage coreArgMappingToPhysicalReg[] =
       {rs_r1, rs_r2, rs_r3};
   const int coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
@@ -906,28 +906,18 @@
   constexpr uint32_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
   static_assert(fpArgMappingToPhysicalRegSize % 2 == 0, "Number of FP Arg regs is not even");
 
-  if (kArm32QuickCodeUseSoftFloat) {
-    is_double_or_float = false;  // Regard double as long, float as int.
-    is_wide = false;  // Map long separately.
-  }
-
   RegStorage result = RegStorage::InvalidReg();
-  if (is_double_or_float) {
-    // TODO: Remove "cur_fp_double_reg_ % 2 != 0" when we return double as double.
-    if (is_wide || cur_fp_double_reg_ % 2 != 0) {
+  // Regard double as long, float as int for kArm32QuickCodeUseSoftFloat.
+  if (arg.IsFP() && !kArm32QuickCodeUseSoftFloat) {
+    if (arg.IsWide()) {
       cur_fp_double_reg_ = std::max(cur_fp_double_reg_, RoundUp(cur_fp_reg_, 2));
       if (cur_fp_double_reg_ < fpArgMappingToPhysicalRegSize) {
-        // TODO: Replace by following code in the branch when FlushIns() support 64-bit registers.
-        // result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_],
-        //                                  fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]);
-        // result = As64BitFloatReg(result);
-        // cur_fp_double_reg_ += 2;
-        result = fpArgMappingToPhysicalReg[cur_fp_double_reg_];
-        cur_fp_double_reg_++;
+        result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_],
+                                         fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]);
+        result = As64BitFloatReg(result);
+        cur_fp_double_reg_ += 2;
       }
     } else {
-      // TODO: Remove the check when we return double as double.
-      DCHECK_EQ(cur_fp_double_reg_ % 2, 0U);
       if (cur_fp_reg_ % 2 == 0) {
         cur_fp_reg_ = std::max(cur_fp_double_reg_, cur_fp_reg_);
       }
@@ -939,270 +929,23 @@
   } else {
     if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
       result = coreArgMappingToPhysicalReg[cur_core_reg_++];
-      // TODO: Enable following code when FlushIns() support 64-bit registers.
-      // if (is_wide && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      //   result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]);
-      // }
+      if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+        result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]);
+      }
     }
   }
   return result;
 }
 
-RegStorage ArmMir2Lir::InToRegStorageMapping::Get(int in_position) const {
-  DCHECK(IsInitialized());
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
-}
-
-void ArmMir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
-                                                   InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  max_mapped_in_ = -1;
-  is_there_stack_mapped_ = false;
-  for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
-                                         arg_locs[in_position].wide);
-     if (reg.Valid()) {
-       mapping_[in_position] = reg;
-       // TODO: Enable the following code when FlushIns() support 64-bit argument registers.
-       // if (arg_locs[in_position].wide) {
-       //  if (reg.Is32Bit()) {
-       //    // As it is a split long, the hi-part is on stack.
-       //    is_there_stack_mapped_ = true;
-       //  }
-       //  // We covered 2 v-registers, so skip the next one
-       //  in_position++;
-       // }
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-     } else {
-       is_there_stack_mapped_ = true;
-     }
-  }
-  initialized_ = true;
-}
-
-// TODO: Should be able to return long, double registers.
-// Need check some common code as it will break some assumption.
-RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    int start_vreg = mir_graph_->GetFirstInVR();
-    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
-
-    InToRegStorageArmMapper mapper;
-    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
-  }
-  return in_to_reg_storage_mapping_.Get(arg_num);
-}
-
-int ArmMir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                     int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                     const MethodReference& target_method,
-                                     uint32_t vtable_idx, uintptr_t direct_code,
-                                     uintptr_t direct_method, InvokeType type, bool skip_this) {
+int ArmMir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
   if (kArm32QuickCodeUseSoftFloat) {
-    return Mir2Lir::GenDalvikArgsNoRange(info, call_state, pcrLabel, next_call_insn, target_method,
-                                         vtable_idx, direct_code, direct_method, type, skip_this);
-  } else {
-    return GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method, vtable_idx,
-                              direct_code, direct_method, type, skip_this);
+    return Mir2Lir::GenDalvikArgsBulkCopy(info, first, count);
   }
-}
-
-int ArmMir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                   LIR** pcrLabel, NextCallInsn next_call_insn,
-                                   const MethodReference& target_method,
-                                   uint32_t vtable_idx, uintptr_t direct_code,
-                                   uintptr_t direct_method, InvokeType type, bool skip_this) {
-  if (kArm32QuickCodeUseSoftFloat) {
-    return Mir2Lir::GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method,
-                                       vtable_idx, direct_code, direct_method, type, skip_this);
-  }
-
-  // TODO: Rework the implementation when argument register can be long or double.
-
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0) {
-    return call_state;
-  }
-
-  const int start_index = skip_this ? 1 : 0;
-
-  InToRegStorageArmMapper mapper;
-  InToRegStorageMapping in_to_reg_storage_mapping;
-  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
-  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1);
-
-  // First of all, check whether it makes sense to use bulk copying.
-  // Bulk copying is done only for the range case.
-  // TODO: make a constant instead of 2
-  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
-    // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) {
-      RegLocation loc = info->args[next_arg];
-      if (loc.wide) {
-        // TODO: Only flush hi-part.
-        if (loc.high_word) {
-          loc = info->args[--next_arg];
-        }
-        loc = UpdateLocWide(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-        }
-        next_arg += 2;
-      } else {
-        loc = UpdateLoc(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (loc.ref) {
-            StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
-          } else {
-            StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
-                          kNotVolatile);
-          }
-        }
-        next_arg++;
-      }
-    }
-
-    // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1,
-                                                   cu_->instruction_set);
-
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      /*
-       * TODO: Improve by adding block copy for large number of arguments.  This
-       * should be done, if possible, as a target-depending helper.  For now, just
-       * copy a Dalvik vreg at a time.
-       */
-      // Moving 32-bits via general purpose register.
-      size_t bytes_to_move = sizeof(uint32_t);
-
-      // Instead of allocating a new temp, simply reuse one of the registers being used
-      // for argument passing.
-      RegStorage temp = TargetReg(kArg3, kNotWide);
-
-      // Now load the argument VR and store to the outs.
-      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
-  }
-
-  // Now handle rest not registers if they are
-  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regWide = TargetReg(kArg2, kWide);
-    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
-      // TODO: Only pass split wide hi-part via stack.
-      if (!reg.Valid() || rl_arg.wide) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              if (rl_arg.ref) {
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
-              } else {
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
-              }
-            } else {
-              if (rl_arg.ref) {
-                RegStorage regSingle = TargetReg(kArg2, kRef);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile);
-              } else {
-                RegStorage regSingle = TargetReg(kArg2, kNotWide);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
-              }
-            }
-          }
-        }
-
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with mapped registers
-  for (int i = start_index; i <= last_mapped_in; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
-    if (reg.Valid()) {
-      if (reg.Is64Bit()) {
-        LoadValueDirectWideFixed(rl_arg, reg);
-      } else {
-        // TODO: Only split long should be the case we need to care about.
-        if (rl_arg.wide) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          int high_word = rl_arg.high_word ? 1 : 0;
-          rl_arg = high_word ? info->args[i - 1] : rl_arg;
-          if (rl_arg.location == kLocPhysReg) {
-            RegStorage rs_arg = rl_arg.reg;
-            if (rs_arg.IsDouble() && rs_arg.Is64BitSolo()) {
-              rs_arg = As64BitFloatRegPair(rs_arg);
-            }
-            RegStorage rs_arg_low = rs_arg.GetLow();
-            RegStorage rs_arg_high = rs_arg.GetHigh();
-            OpRegCopy(reg, high_word ? rs_arg_high : rs_arg_low);
-          } else {
-            Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + high_word), reg);
-          }
-        } else {
-          LoadValueDirectFixed(rl_arg, reg);
-        }
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                  direct_code, direct_method, type);
-    }
-    if (reg.Is64Bit()) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
-    }
-  }
-  return call_state;
+  /*
+   * TODO: Improve by adding block copy for large number of arguments.  For now, just
+   * copy a Dalvik vreg at a time.
+   */
+  return count;
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 5e10f80..766ac23 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -27,38 +27,25 @@
 
 class Arm64Mir2Lir FINAL : public Mir2Lir {
  protected:
-  // TODO: consolidate 64-bit target support.
-  class InToRegStorageMapper {
-   public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0;
-    virtual ~InToRegStorageMapper() {}
-  };
-
   class InToRegStorageArm64Mapper : public InToRegStorageMapper {
    public:
     InToRegStorageArm64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
     virtual ~InToRegStorageArm64Mapper() {}
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref);
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+      cur_fp_reg_ = 0;
+    }
    private:
-    int cur_core_reg_;
-    int cur_fp_reg_;
+    size_t cur_core_reg_;
+    size_t cur_fp_reg_;
   };
 
-  class InToRegStorageMapping {
-   public:
-    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
-    initialized_(false) {}
-    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-    int GetMaxMappedIn() { return max_mapped_in_; }
-    bool IsThereStackMapped() { return is_there_stack_mapped_; }
-    RegStorage Get(int in_position);
-    bool IsInitialized() { return initialized_; }
-   private:
-    std::map<int, RegStorage> mapping_;
-    int max_mapped_in_;
-    bool is_there_stack_mapped_;
-    bool initialized_;
-  };
+  InToRegStorageArm64Mapper in_to_reg_storage_arm64_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    in_to_reg_storage_arm64_mapper_.Reset();
+    return &in_to_reg_storage_arm64_mapper_;
+  }
 
  public:
   Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
@@ -113,7 +100,6 @@
   RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
     return As64BitReg(TargetReg(symbolic_reg));
   }
-  RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
   RegLocation GetReturnAlt() OVERRIDE;
   RegLocation GetReturnWideAlt() OVERRIDE;
   RegLocation LocCReturn() OVERRIDE;
@@ -240,22 +226,6 @@
   bool InexpensiveConstantLong(int64_t value) OVERRIDE;
   bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
 
-  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
-
-  int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this) OVERRIDE;
-
-  int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                         NextCallInsn next_call_insn,
-                         const MethodReference& target_method,
-                         uint32_t vtable_idx,
-                         uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                         bool skip_this) OVERRIDE;
-
   bool WideGPRsAreAliases() const OVERRIDE {
     return true;  // 64b architecture.
   }
@@ -422,10 +392,11 @@
   void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                      RegLocation rl_src2, bool is_div, int flags);
 
-  InToRegStorageMapping in_to_reg_storage_mapping_;
   static const A64EncodingMap EncodingMap[kA64Last];
 
   ArenaVector<LIR*> call_method_insns_;
+
+  int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 094ff51..e7fa8ed 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -790,27 +790,23 @@
   return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
 }
 
-RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or_float,
-                                                               bool is_wide,
-                                                               bool is_ref) {
+RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) {
   const RegStorage coreArgMappingToPhysicalReg[] =
       {rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7};
-  const int coreArgMappingToPhysicalRegSize =
-      sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
   const RegStorage fpArgMappingToPhysicalReg[] =
       {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7};
-  const int fpArgMappingToPhysicalRegSize =
-      sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
 
   RegStorage result = RegStorage::InvalidReg();
-  if (is_double_or_float) {
+  if (arg.IsFP()) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      DCHECK(!is_ref);
+      DCHECK(!arg.IsRef());
       result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
         int res_reg = result.GetReg();
-        result = is_wide ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg);
+        result = arg.IsWide() ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg);
       }
     }
   } else {
@@ -819,388 +815,15 @@
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
         int res_reg = result.GetReg();
-        DCHECK(!(is_wide && is_ref));
-        result = (is_wide || is_ref) ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
+        DCHECK(!(arg.IsWide() && arg.IsRef()));
+        result = (arg.IsWide() || arg.IsRef()) ?
+                 RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
       }
     }
   }
   return result;
 }
 
-RegStorage Arm64Mir2Lir::InToRegStorageMapping::Get(int in_position) {
-  DCHECK(IsInitialized());
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
-}
-
-void Arm64Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
-                                                     InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  max_mapped_in_ = -1;
-  is_there_stack_mapped_ = false;
-  for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
-                                         arg_locs[in_position].wide,
-                                         arg_locs[in_position].ref);
-     if (reg.Valid()) {
-       mapping_[in_position] = reg;
-       if (arg_locs[in_position].wide) {
-         // We covered 2 args, so skip the next one
-         in_position++;
-       }
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-     } else {
-       is_there_stack_mapped_ = true;
-     }
-  }
-  initialized_ = true;
-}
-
-
-// Deprecate.  Use the new mechanism.
-// TODO(Arm64): reuse info in QuickArgumentVisitor?
-static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
-                                    OpSize* op_size) {
-  if (loc->fp) {
-    int n = *num_fpr_used;
-    if (n < 8) {
-      *num_fpr_used = n + 1;
-      RegStorage::RegStorageKind reg_kind;
-      if (loc->wide) {
-        *op_size = kDouble;
-        reg_kind = RegStorage::k64BitSolo;
-      } else {
-        *op_size = kSingle;
-        reg_kind = RegStorage::k32BitSolo;
-      }
-      return RegStorage(RegStorage::kValid | reg_kind | RegStorage::kFloatingPoint | n);
-    }
-  } else {
-    int n = *num_gpr_used;
-    if (n < 8) {
-      *num_gpr_used = n + 1;
-      if (loc->wide || loc->ref) {
-        *op_size = k64;
-        return RegStorage::Solo64(n);
-      } else {
-        *op_size = k32;
-        return RegStorage::Solo32(n);
-      }
-    }
-  }
-  *op_size = kWord;
-  return RegStorage::InvalidReg();
-}
-
-RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    int start_vreg = mir_graph_->GetFirstInVR();
-    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
-
-    InToRegStorageArm64Mapper mapper;
-    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
-  }
-  return in_to_reg_storage_mapping_.Get(arg_num);
-}
-
-
-/*
- * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform initial
- * assignment of promoted arguments.
- *
- * ArgLocs is an array of location records describing the incoming arguments
- * with one location record per word of argument.
- */
-void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
-  int num_gpr_used = 1;
-  int num_fpr_used = 0;
-
-  /*
-   * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod>
-   * It will attempt to keep kArg0 live (or copy it to home location
-   * if promoted).
-   */
-  RegLocation rl_src = rl_method;
-  rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0, kRef);
-  rl_src.home = false;
-  MarkLive(rl_src);
-  StoreValue(rl_method, rl_src);
-  // If Method* has been promoted, explicitly flush
-  if (rl_method.location == kLocPhysReg) {
-    StoreRefDisp(TargetPtrReg(kSp), 0, rl_src.reg, kNotVolatile);
-  }
-
-  if (mir_graph_->GetNumOfInVRs() == 0) {
-    return;
-  }
-
-  // Handle dalvik registers.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  int start_vreg = mir_graph_->GetFirstInVR();
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) {
-    RegLocation* t_loc = &ArgLocs[i];
-    OpSize op_size;
-    RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
-
-    if (reg.Valid()) {
-      // If arriving in register.
-
-      // We have already updated the arg location with promoted info
-      // so we can be based on it.
-      if (t_loc->location == kLocPhysReg) {
-        // Just copy it.
-        OpRegCopy(t_loc->reg, reg);
-      } else {
-        // Needs flush.
-        if (t_loc->ref) {
-          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile);
-        } else {
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
-              kNotVolatile);
-        }
-      }
-    } else {
-      // If arriving in frame & promoted.
-      if (t_loc->location == kLocPhysReg) {
-        if (t_loc->ref) {
-          LoadRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
-        } else {
-          LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg,
-                       t_loc->wide ? k64 : k32, kNotVolatile);
-        }
-      }
-    }
-    if (t_loc->wide) {
-      // Increment i to skip the next one.
-      i++;
-    }
-    //      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-    //        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-    //      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-    //        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
-    //      } else {
-    //        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile);
-    //        if (reg.Is64Bit()) {
-    //          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
-    //            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
-    //          }
-    //          i += 1;
-    //        }
-    //      }
-    //    } else {
-    //      // If arriving in frame & promoted
-    //      if (v_map->core_location == kLocPhysReg) {
-    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
-    //                     RegStorage::Solo32(v_map->core_reg));
-    //      }
-    //      if (v_map->fp_location == kLocPhysReg) {
-    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg));
-    //      }
-  }
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.
- */
-int Arm64Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                       int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                       const MethodReference& target_method,
-                                       uint32_t vtable_idx, uintptr_t direct_code,
-                                       uintptr_t direct_method, InvokeType type, bool skip_this) {
-  return GenDalvikArgsRange(info,
-                       call_state, pcrLabel, next_call_insn,
-                       target_method,
-                       vtable_idx, direct_code,
-                       direct_method, type, skip_this);
-}
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * FIXME: update comments.
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *
- */
-int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                     LIR** pcrLabel, NextCallInsn next_call_insn,
-                                     const MethodReference& target_method,
-                                     uint32_t vtable_idx, uintptr_t direct_code,
-                                     uintptr_t direct_method, InvokeType type, bool skip_this) {
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0)
-    return call_state;
-
-  const int start_index = skip_this ? 1 : 0;
-
-  InToRegStorageArm64Mapper mapper;
-  InToRegStorageMapping in_to_reg_storage_mapping;
-  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
-  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1);
-
-  // First of all, check whether it makes sense to use bulk copying.
-  // Bulk copying is done only for the range case.
-  // TODO: make a constant instead of 2
-  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
-    // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) {
-      RegLocation loc = info->args[next_arg];
-      if (loc.wide) {
-        loc = UpdateLocWide(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-        }
-        next_arg += 2;
-      } else {
-        loc = UpdateLoc(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (loc.ref) {
-            StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
-          } else {
-            StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
-                          kNotVolatile);
-          }
-        }
-        next_arg++;
-      }
-    }
-
-    // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1,
-                                                   cu_->instruction_set);
-
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      /*
-       * TODO: Improve by adding block copy for large number of arguments.  This
-       * should be done, if possible, as a target-depending helper.  For now, just
-       * copy a Dalvik vreg at a time.
-       */
-      // Moving 32-bits via general purpose register.
-      size_t bytes_to_move = sizeof(uint32_t);
-
-      // Instead of allocating a new temp, simply reuse one of the registers being used
-      // for argument passing.
-      RegStorage temp = TargetReg(kArg3, kNotWide);
-
-      // Now load the argument VR and store to the outs.
-      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
-  }
-
-  // Now handle rest not registers if they are
-  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regWide = TargetReg(kArg3, kWide);
-    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
-      if (!reg.Valid()) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              if (rl_arg.ref) {
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
-              } else {
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
-              }
-            } else {
-              if (rl_arg.ref) {
-                RegStorage regSingle = TargetReg(kArg2, kRef);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile);
-              } else {
-                RegStorage regSingle = TargetReg(kArg2, kNotWide);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
-              }
-            }
-          }
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with mapped registers
-  for (int i = start_index; i <= last_mapped_in; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
-    if (reg.Valid()) {
-      if (rl_arg.wide) {
-        LoadValueDirectWideFixed(rl_arg, reg);
-      } else {
-        LoadValueDirectFixed(rl_arg, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                  direct_code, direct_method, type);
-    }
-    if (rl_arg.wide) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
-    }
-  }
-  return call_state;
-}
-
 void Arm64Mir2Lir::InstallLiteralPools() {
   // PC-relative calls to methods.
   patches_.reserve(call_method_insns_.size());
@@ -1218,4 +841,12 @@
   Mir2Lir::InstallLiteralPools();
 }
 
+int Arm64Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* /*info*/, int /*first*/, int count) {
+  /*
+   * TODO: Improve by adding block copy for large number of arguments.  For now, just
+   * copy a Dalvik vreg at a time.
+   */
+  return count;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 066041c..cc61e93 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -997,7 +997,8 @@
       last_lir_insn_(nullptr),
       slow_paths_(arena->Adapter(kArenaAllocSlowPaths)),
       mem_ref_type_(ResourceMask::kHeapRef),
-      mask_cache_(arena) {
+      mask_cache_(arena),
+      in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
   tempreg_info_.reserve(20);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 774176e..50014b0 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -2163,18 +2163,15 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTest(int opt_flags) {
+  if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK) != 0) {
+    return;
+  }
   if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitSuspendChecks()) {
-    if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
-      return;
-    }
     FlushAllRegs();
     LIR* branch = OpTestSuspend(NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
     AddSlowPath(new (arena_) SuspendCheckSlowPath(this, branch, cont));
   } else {
-    if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
-      return;
-    }
     FlushAllRegs();     // TODO: needed?
     LIR* inst = CheckSuspendUsingLoad();
     MarkSafepointPC(inst);
@@ -2183,11 +2180,11 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) {
+  if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK) != 0) {
+    OpUnconditionalBranch(target);
+    return;
+  }
   if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitSuspendChecks()) {
-    if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
-      OpUnconditionalBranch(target);
-      return;
-    }
     OpTestSuspend(target);
     FlushAllRegs();
     LIR* branch = OpUnconditionalBranch(nullptr);
@@ -2195,10 +2192,6 @@
   } else {
     // For the implicit suspend check, just perform the trigger
     // load and branch to the target.
-    if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
-      OpUnconditionalBranch(target);
-      return;
-    }
     FlushAllRegs();
     LIR* inst = CheckSuspendUsingLoad();
     MarkSafepointPC(inst);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 31b81bf..9462d3d 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -401,59 +401,50 @@
    * half to memory as well.
    */
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) {
-    PromotionMap* v_map = &promotion_map_[start_vreg + i];
+  RegLocation* t_loc = nullptr;
+  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i += t_loc->wide ? 2 : 1) {
+    // get reg corresponding to input
     RegStorage reg = GetArgMappingToPhysicalReg(i);
+    t_loc = &ArgLocs[i];
+
+    // If the wide input appeared as single, flush it and go
+    // as it comes from memory.
+    if (t_loc->wide && reg.Valid() && !reg.Is64Bit()) {
+      StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, k32, kNotVolatile);
+      reg = RegStorage::InvalidReg();
+    }
 
     if (reg.Valid()) {
-      // If arriving in register
-      bool need_flush = true;
-      RegLocation* t_loc = &ArgLocs[i];
-      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-        need_flush = false;
-      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
-        need_flush = false;
-      } else {
-        need_flush = true;
-      }
+      // If arriving in register.
 
-      // For wide args, force flush if not fully promoted
-      if (t_loc->wide) {
-        PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
-        // Is only half promoted?
-        need_flush |= (p_map->core_location != v_map->core_location) ||
-            (p_map->fp_location != v_map->fp_location);
-        if ((cu_->instruction_set == kThumb2) && t_loc->fp && !need_flush) {
-          /*
-           * In Arm, a double is represented as a pair of consecutive single float
-           * registers starting at an even number.  It's possible that both Dalvik vRegs
-           * representing the incoming double were independently promoted as singles - but
-           * not in a form usable as a double.  If so, we need to flush - even though the
-           * incoming arg appears fully in register.  At this point in the code, both
-           * halves of the double are promoted.  Make sure they are in a usable form.
-           */
-          int lowreg_index = start_vreg + i + (t_loc->high_word ? -1 : 0);
-          int low_reg = promotion_map_[lowreg_index].fp_reg;
-          int high_reg = promotion_map_[lowreg_index + 1].fp_reg;
-          if (((low_reg & 0x1) != 0) || (high_reg != (low_reg + 1))) {
-            need_flush = true;
-          }
+      // We have already updated the arg location with promoted info
+      // so we can be based on it.
+      if (t_loc->location == kLocPhysReg) {
+        // Just copy it.
+        if (t_loc->wide) {
+          OpRegCopyWide(t_loc->reg, reg);
+        } else {
+          OpRegCopy(t_loc->reg, reg);
+        }
+      } else {
+        // Needs flush.
+        int offset = SRegOffset(start_vreg + i);
+        if (t_loc->ref) {
+          StoreRefDisp(TargetPtrReg(kSp), offset, reg, kNotVolatile);
+        } else {
+          StoreBaseDisp(TargetPtrReg(kSp), offset, reg, t_loc->wide ? k64 : k32, kNotVolatile);
         }
       }
-      if (need_flush) {
-        Store32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg);
-      }
     } else {
-      // If arriving in frame & promoted
-      if (v_map->core_location == kLocPhysReg) {
-        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i),
-                   RegStorage::Solo32(v_map->core_reg));
-      }
-      if (v_map->fp_location == kLocPhysReg) {
-        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i),
-                   RegStorage::Solo32(v_map->fp_reg));
+      // If arriving in frame & promoted.
+      if (t_loc->location == kLocPhysReg) {
+        int offset = SRegOffset(start_vreg + i);
+        if (t_loc->ref) {
+          LoadRefDisp(TargetPtrReg(kSp), offset, t_loc->reg, kNotVolatile);
+        } else {
+          LoadBaseDisp(TargetPtrReg(kSp), offset, t_loc->reg, t_loc->wide ? k64 : k32,
+                       kNotVolatile);
+        }
       }
     }
   }
@@ -568,7 +559,7 @@
  * emit the next instruction in a virtual invoke sequence.
  * We can use kLr as a temp prior to target address loading
  * Note also that we'll load the first argument ("this") into
- * kArg1 here rather than the standard LoadArgRegs.
+ * kArg1 here rather than the standard GenDalvikArgs.
  */
 static int NextVCallInsn(CompilationUnit* cu, CallInfo* info,
                          int state, const MethodReference& target_method,
@@ -612,7 +603,7 @@
  * Emit the next instruction in an invoke interface sequence. This will do a lookup in the
  * class's IMT, calling either the actual method or art_quick_imt_conflict_trampoline if
  * more than one interface method map to the same index. Note also that we'll load the first
- * argument ("this") into kArg1 here rather than the standard LoadArgRegs.
+ * argument ("this") into kArg1 here rather than the standard GenDalvikArgs.
  */
 static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state,
                                  const MethodReference& target_method,
@@ -719,158 +710,6 @@
                           target_method, 0);
 }
 
-int Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
-                         NextCallInsn next_call_insn,
-                         const MethodReference& target_method,
-                         uint32_t vtable_idx, uintptr_t direct_code,
-                         uintptr_t direct_method, InvokeType type, bool skip_this) {
-  int last_arg_reg = 3 - 1;
-  int arg_regs[3] = {TargetReg(kArg1, kNotWide).GetReg(), TargetReg(kArg2, kNotWide).GetReg(),
-                     TargetReg(kArg3, kNotWide).GetReg()};
-
-  int next_reg = 0;
-  int next_arg = 0;
-  if (skip_this) {
-    next_reg++;
-    next_arg++;
-  }
-  for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
-    RegLocation rl_arg = info->args[next_arg++];
-    rl_arg = UpdateRawLoc(rl_arg);
-    if (rl_arg.wide && (next_reg <= last_arg_reg - 1)) {
-      RegStorage r_tmp(RegStorage::k64BitPair, arg_regs[next_reg], arg_regs[next_reg + 1]);
-      LoadValueDirectWideFixed(rl_arg, r_tmp);
-      next_reg++;
-      next_arg++;
-    } else {
-      if (rl_arg.wide) {
-        rl_arg = NarrowRegLoc(rl_arg);
-        rl_arg.is_const = false;
-      }
-      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(arg_regs[next_reg]));
-    }
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                direct_code, direct_method, type);
-  }
-  return call_state;
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.  Note, this may also be called
- * for "range" variants if the number of arguments is 5 or fewer.
- */
-int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                  const MethodReference& target_method,
-                                  uint32_t vtable_idx, uintptr_t direct_code,
-                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
-  RegLocation rl_arg;
-
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0)
-    return call_state;
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                              direct_code, direct_method, type);
-
-  DCHECK_LE(info->num_arg_words, 5);
-  if (info->num_arg_words > 3) {
-    int32_t next_use = 3;
-    // Detect special case of wide arg spanning arg3/arg4
-    RegLocation rl_use0 = info->args[0];
-    RegLocation rl_use1 = info->args[1];
-    RegLocation rl_use2 = info->args[2];
-    if (((!rl_use0.wide && !rl_use1.wide) || rl_use0.wide) && rl_use2.wide) {
-      RegStorage reg;
-      // Wide spans, we need the 2nd half of uses[2].
-      rl_arg = UpdateLocWide(rl_use2);
-      if (rl_arg.location == kLocPhysReg) {
-        if (rl_arg.reg.IsPair()) {
-          reg = rl_arg.reg.GetHigh();
-        } else {
-          RegisterInfo* reg_info = GetRegInfo(rl_arg.reg);
-          reg_info = reg_info->FindMatchingView(RegisterInfo::kHighSingleStorageMask);
-          if (reg_info == nullptr) {
-            // NOTE: For hard float convention we won't split arguments across reg/mem.
-            UNIMPLEMENTED(FATAL) << "Needs hard float api.";
-          }
-          reg = reg_info->GetReg();
-        }
-      } else {
-        // kArg2 & rArg3 can safely be used here
-        reg = TargetReg(kArg3, kNotWide);
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetPtrReg(kSp), (next_use + 1) * 4, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                  direct_code, direct_method, type);
-      next_use++;
-    }
-    // Loop through the rest
-    while (next_use < info->num_arg_words) {
-      RegStorage arg_reg;
-      rl_arg = info->args[next_use];
-      rl_arg = UpdateRawLoc(rl_arg);
-      if (rl_arg.location == kLocPhysReg) {
-        arg_reg = rl_arg.reg;
-      } else {
-        arg_reg = TargetReg(kArg2, rl_arg.wide ? kWide : kNotWide);
-        if (rl_arg.wide) {
-          LoadValueDirectWideFixed(rl_arg, arg_reg);
-        } else {
-          LoadValueDirectFixed(rl_arg, arg_reg);
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      int outs_offset = (next_use + 1) * 4;
-      {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        if (rl_arg.wide) {
-          StoreBaseDisp(TargetPtrReg(kSp), outs_offset, arg_reg, k64, kNotVolatile);
-          next_use += 2;
-        } else {
-          Store32Disp(TargetPtrReg(kSp), outs_offset, arg_reg);
-          next_use++;
-        }
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
-  }
-
-  call_state = LoadArgRegs(info, call_state, next_call_insn,
-                           target_method, vtable_idx, direct_code, direct_method,
-                           type, skip_this);
-
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
-          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
-        return call_state;
-      }
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    }
-  }
-  return call_state;
-}
-
 // Default implementation of implicit null pointer check.
 // Overridden by arch specific as necessary.
 void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
@@ -883,209 +722,194 @@
   FreeTemp(tmp);
 }
 
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *
+/**
+ * @brief Used to flush promoted registers if they are used as argument
+ * in an invocation.
+ * @param info the infromation about arguments for invocation.
+ * @param start the first argument we should start to look from.
  */
-int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                LIR** pcrLabel, NextCallInsn next_call_insn,
-                                const MethodReference& target_method,
-                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
-                                InvokeType type, bool skip_this) {
-  // If we can treat it as non-range (Jumbo ops will use range form)
-  if (info->num_arg_words <= 5)
-    return GenDalvikArgsNoRange(info, call_state, pcrLabel,
-                                next_call_insn, target_method, vtable_idx,
-                                direct_code, direct_method, type, skip_this);
-  /*
-   * First load the non-register arguments.  Both forms expect all
-   * of the source arguments to be in their home frame location, so
-   * scan the s_reg names and flush any that have been promoted to
-   * frame backing storage.
-   */
+void Mir2Lir::GenDalvikArgsFlushPromoted(CallInfo* info, int start) {
+  if (cu_->disable_opt & (1 << kPromoteRegs)) {
+    // This make sense only if promotion is enabled.
+    return;
+  }
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   // Scan the rest of the args - if in phys_reg flush to memory
-  for (int next_arg = 0; next_arg < info->num_arg_words;) {
+  for (int next_arg = start; next_arg < info->num_arg_words;) {
     RegLocation loc = info->args[next_arg];
     if (loc.wide) {
       loc = UpdateLocWide(loc);
-      if ((next_arg >= 2) && (loc.location == kLocPhysReg)) {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+      if (loc.location == kLocPhysReg) {
         StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
       }
       next_arg += 2;
     } else {
       loc = UpdateLoc(loc);
-      if ((next_arg >= 3) && (loc.location == kLocPhysReg)) {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+      if (loc.location == kLocPhysReg) {
+        if (loc.ref) {
+          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+        } else {
+          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
+                        kNotVolatile);
+        }
       }
       next_arg++;
     }
   }
+}
 
-  // The first 3 arguments are passed via registers.
-  // TODO: For 64-bit, instead of hardcoding 4 for Method* size, we should either
-  // get size of uintptr_t or size of object reference according to model being used.
-  int outs_offset = 4 /* Method* */ + (3 * sizeof(uint32_t));
-  int start_offset = SRegOffset(info->args[3].s_reg_low);
-  int regs_left_to_pass_via_stack = info->num_arg_words - 3;
-  DCHECK_GT(regs_left_to_pass_via_stack, 0);
+/**
+ * @brief Used to optimize the copying of VRs which are arguments of invocation.
+ * Please note that you should flush promoted registers first if you copy.
+ * If implementation does copying it may skip several of the first VRs but must copy
+ * till the end. Implementation must return the number of skipped VRs
+ * (it might be all VRs).
+ * @see GenDalvikArgsFlushPromoted
+ * @param info the information about arguments for invocation.
+ * @param first the first argument we should start to look from.
+ * @param count the number of remaining arguments we can handle.
+ * @return the number of arguments which we did not handle. Unhandled arguments
+ * must be attached to the first one.
+ */
+int Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
+  // call is pretty expensive, let's use it if count is big.
+  if (count > 16) {
+    GenDalvikArgsFlushPromoted(info, first);
+    int start_offset = SRegOffset(info->args[first].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set);
 
-  if (cu_->instruction_set == kThumb2 && regs_left_to_pass_via_stack <= 16) {
-    // Use vldm/vstm pair using kArg3 as a temp
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), start_offset);
-    LIR* ld = nullptr;
-    {
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      ld = OpVldm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack);
-    }
-    // TUNING: loosen barrier
-    ld->u.m.def_mask = &kEncodeAll;
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), 4 /* Method* */ + (3 * 4));
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-    LIR* st = nullptr;
-    {
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      st = OpVstm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack);
-    }
-    st->u.m.def_mask = &kEncodeAll;
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-  } else if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      // This is based on the knowledge that the stack itself is 16-byte aligned.
-      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
-      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
-      size_t bytes_to_move;
-
-      /*
-       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
-       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
-       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
-       * We do this because we could potentially do a smaller move to align.
-       */
-      if (regs_left_to_pass_via_stack == 4 ||
-          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
-        // Moving 128-bits via xmm register.
-        bytes_to_move = sizeof(uint32_t) * 4;
-
-        // Allocate a free xmm temp. Since we are working through the calling sequence,
-        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
-        // there are no free registers.
-        RegStorage temp = AllocTempDouble();
-
-        LIR* ld1 = nullptr;
-        LIR* ld2 = nullptr;
-        LIR* st1 = nullptr;
-        LIR* st2 = nullptr;
-
-        /*
-         * The logic is similar for both loads and stores. If we have 16-byte alignment,
-         * do an aligned move. If we have 8-byte alignment, then do the move in two
-         * parts. This approach prevents possible cache line splits. Finally, fall back
-         * to doing an unaligned move. In most cases we likely won't split the cache
-         * line but we cannot prove it and thus take a conservative approach.
-         */
-        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
-        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
-
-        if (src_is_16b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP);
-        } else if (src_is_8b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP);
-          ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1),
-                            kMovHi128FP);
-        } else {
-          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP);
-        }
-
-        if (dest_is_16b_aligned) {
-          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP);
-        } else if (dest_is_8b_aligned) {
-          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP);
-          st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1),
-                            temp, kMovHi128FP);
-        } else {
-          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP);
-        }
-
-        // TODO If we could keep track of aliasing information for memory accesses that are wider
-        // than 64-bit, we wouldn't need to set up a barrier.
-        if (ld1 != nullptr) {
-          if (ld2 != nullptr) {
-            // For 64-bit load we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
-            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true,
-                                    true);
-          } else {
-            // Set barrier for 128-bit load.
-            ld1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-        if (st1 != nullptr) {
-          if (st2 != nullptr) {
-            // For 64-bit store we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
-            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false,
-                                    true);
-          } else {
-            // Set barrier for 128-bit store.
-            st1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-
-        // Free the temporary used for the data movement.
-        FreeTemp(temp);
-      } else {
-        // Moving 32-bits via general purpose register.
-        bytes_to_move = sizeof(uint32_t);
-
-        // Instead of allocating a new temp, simply reuse one of the registers being used
-        // for argument passing.
-        RegStorage temp = TargetReg(kArg3, kNotWide);
-
-        // Now load the argument VR and store to the outs.
-        Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-        Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
-      }
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-  } else {
-    // Generate memcpy
     OpRegRegImm(kOpAdd, TargetReg(kArg0, kRef), TargetPtrReg(kSp), outs_offset);
     OpRegRegImm(kOpAdd, TargetReg(kArg1, kRef), TargetPtrReg(kSp), start_offset);
     CallRuntimeHelperRegRegImm(kQuickMemcpy, TargetReg(kArg0, kRef), TargetReg(kArg1, kRef),
-                               (info->num_arg_words - 3) * 4, false);
+                               count * 4, false);
+    count = 0;
+  }
+  return count;
+}
+
+int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
+                           LIR** pcrLabel, NextCallInsn next_call_insn,
+                           const MethodReference& target_method,
+                           uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
+                           InvokeType type, bool skip_this) {
+  // If no arguments, just return.
+  if (info->num_arg_words == 0)
+    return call_state;
+
+  const int start_index = skip_this ? 1 : 0;
+
+  // Get architecture dependent mapping between output VRs and physical registers
+  // basing on shorty of method to call.
+  InToRegStorageMapping in_to_reg_storage_mapping(arena_);
+  {
+    const char* target_shorty = mir_graph_->GetShortyFromMethodReference(target_method);
+    ShortyIterator shorty_iterator(target_shorty, type == kStatic);
+    in_to_reg_storage_mapping.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
   }
 
-  call_state = LoadArgRegs(info, call_state, next_call_insn,
-                           target_method, vtable_idx, direct_code, direct_method,
-                           type, skip_this);
+  int stack_map_start = std::max(in_to_reg_storage_mapping.GetMaxMappedIn() + 1, start_index);
+  if ((stack_map_start < info->num_arg_words) && info->args[stack_map_start].high_word) {
+    // It is possible that the last mapped reg is 32 bit while arg is 64-bit.
+    // It will be handled together with low part mapped to register.
+    stack_map_start++;
+  }
+  int regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
+
+  // If it is a range case we can try to copy remaining VRs (not mapped to physical registers)
+  // using more optimal algorithm.
+  if (info->is_range && regs_left_to_pass_via_stack > 1) {
+    regs_left_to_pass_via_stack = GenDalvikArgsBulkCopy(info, stack_map_start,
+                                                        regs_left_to_pass_via_stack);
+  }
+
+  // Now handle any remaining VRs mapped to stack.
+  if (in_to_reg_storage_mapping.HasArgumentsOnStack()) {
+    // Two temps but do not use kArg1, it might be this which we can skip.
+    // Separate single and wide - it can give some advantage.
+    RegStorage regRef = TargetReg(kArg3, kRef);
+    RegStorage regSingle = TargetReg(kArg3, kNotWide);
+    RegStorage regWide = TargetReg(kArg2, kWide);
+    for (int i = start_index;
+         i < stack_map_start + regs_left_to_pass_via_stack; i++) {
+      RegLocation rl_arg = info->args[i];
+      rl_arg = UpdateRawLoc(rl_arg);
+      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      if (!reg.Valid()) {
+        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+        {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.wide) {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
+            } else {
+              LoadValueDirectWideFixed(rl_arg, regWide);
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
+            }
+          } else {
+            if (rl_arg.location == kLocPhysReg) {
+              if (rl_arg.ref) {
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
+              } else {
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
+              }
+            } else {
+              if (rl_arg.ref) {
+                LoadValueDirectFixed(rl_arg, regRef);
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, regRef, kNotVolatile);
+              } else {
+                LoadValueDirectFixed(rl_arg, regSingle);
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
+              }
+            }
+          }
+        }
+        call_state = next_call_insn(cu_, info, call_state, target_method,
+                                    vtable_idx, direct_code, direct_method, type);
+      }
+      if (rl_arg.wide) {
+        i++;
+      }
+    }
+  }
+
+  // Finish with VRs mapped to physical registers.
+  for (int i = start_index; i < stack_map_start; i++) {
+    RegLocation rl_arg = info->args[i];
+    rl_arg = UpdateRawLoc(rl_arg);
+    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    if (reg.Valid()) {
+      if (rl_arg.wide) {
+        // if reg is not 64-bit (it is half of 64-bit) then handle it separately.
+        if (!reg.Is64Bit()) {
+          // TODO: REVISIT: This adds a spill of low part while we could just copy it.
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.location == kLocPhysReg) {
+            int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+            // Dump it to memory and then load only low part
+            StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
+            LoadBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile);
+          } else {
+            int out_offset = StackVisitor::GetOutVROffset(i + 1, cu_->instruction_set);
+            // First, use target reg for high part.
+            LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + 1), reg, k32,
+                         kNotVolatile);
+            StoreBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile);
+            // Now load target reg with low part.
+            LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low), reg, k32, kNotVolatile);
+          }
+        } else {
+          LoadValueDirectWideFixed(rl_arg, reg);
+        }
+      } else {
+        LoadValueDirectFixed(rl_arg, reg);
+      }
+      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                               direct_code, direct_method, type);
+    }
+    if (rl_arg.wide) {
+      i++;
+    }
+  }
 
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
@@ -1094,18 +918,20 @@
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
-      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
-          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
-        return call_state;
-      }
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
       GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     }
   }
   return call_state;
 }
 
+RegStorage Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  if (!in_to_reg_storage_mapping_.IsInitialized()) {
+    ShortyIterator shorty_iterator(cu_->shorty, cu_->invoke_type == kStatic);
+    in_to_reg_storage_mapping_.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
+  }
+  return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
 RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
@@ -1719,17 +1545,10 @@
     skip_this = fast_path;
   }
   MethodReference target_method = method_info.GetTargetMethod();
-  if (!info->is_range) {
-    call_state = GenDalvikArgsNoRange(info, call_state, p_null_ck,
-                                      next_call_insn, target_method, method_info.VTableIndex(),
-                                      method_info.DirectCode(), method_info.DirectMethod(),
-                                      original_type, skip_this);
-  } else {
-    call_state = GenDalvikArgsRange(info, call_state, p_null_ck,
-                                    next_call_insn, target_method, method_info.VTableIndex(),
-                                    method_info.DirectCode(), method_info.DirectMethod(),
-                                    original_type, skip_this);
-  }
+  call_state = GenDalvikArgs(info, call_state, p_null_ck,
+                             next_call_insn, target_method, method_info.VTableIndex(),
+                             method_info.DirectCode(), method_info.DirectMethod(),
+                             original_type, skip_this);
   // Finish up any of the call sequence not interleaved in arg loading
   while (call_state >= 0) {
     call_state = next_call_insn(cu_, info, call_state, target_method, method_info.VTableIndex(),
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index e08846c..8f976df 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -24,6 +24,26 @@
 namespace art {
 
 class MipsMir2Lir FINAL : public Mir2Lir {
+ protected:
+  class InToRegStorageMipsMapper : public InToRegStorageMapper {
+   public:
+    explicit InToRegStorageMipsMapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {}
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+    }
+   protected:
+    Mir2Lir* m2l_;
+   private:
+    size_t cur_core_reg_;
+  };
+
+  InToRegStorageMipsMapper in_to_reg_storage_mips_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    in_to_reg_storage_mips_mapper_.Reset();
+    return &in_to_reg_storage_mips_mapper_;
+  }
+
   public:
     MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -56,7 +76,6 @@
     // Required for target - register utilities.
     RegStorage Solo64ToPair64(RegStorage reg);
     RegStorage TargetReg(SpecialTargetRegister reg);
-    RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 185112d..efa130c 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -122,18 +122,20 @@
   return res_reg;
 }
 
-RegStorage MipsMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  switch (arg_num) {
-    case 0:
-      return rs_rMIPS_ARG1;
-    case 1:
-      return rs_rMIPS_ARG2;
-    case 2:
-      return rs_rMIPS_ARG3;
-    default:
-      return RegStorage::InvalidReg();
+RegStorage MipsMir2Lir::InToRegStorageMipsMapper::GetNextReg(ShortyArg arg) {
+  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
+
+  RegStorage result = RegStorage::InvalidReg();
+  if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+    result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                             arg.IsRef() ? kRef : kNotWide);
+    if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = RegStorage::MakeRegPair(
+          result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
+    }
   }
+  return result;
 }
 
 /*
@@ -602,7 +604,7 @@
 }
 
 MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena) {
+    : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this) {
   for (int i = 0; i < kMipsLast; i++) {
     DCHECK_EQ(MipsMir2Lir::EncodingMap[i].opcode, i)
         << "Encoding order for " << MipsMir2Lir::EncodingMap[i].name
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 0aefc2d..144790e 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -276,6 +276,24 @@
   }
 }
 
+inline Mir2Lir::ShortyIterator::ShortyIterator(const char* shorty, bool is_static)
+    : cur_(shorty + 1), pending_this_(!is_static), initialized_(false) {
+  DCHECK(shorty != nullptr);
+  DCHECK_NE(*shorty, 0);
+}
+
+inline bool Mir2Lir::ShortyIterator::Next() {
+  if (!initialized_) {
+    initialized_ = true;
+  } else if (pending_this_) {
+    pending_this_ = false;
+  } else if (*cur_ != 0) {
+    cur_++;
+  }
+
+  return *cur_ != 0 || pending_this_;
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 320c0f4..524ee21 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -53,20 +53,14 @@
   return res;
 }
 
-void Mir2Lir::LockArg(int in_position, bool wide) {
-  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
-      RegStorage::InvalidReg();
+void Mir2Lir::LockArg(int in_position, bool) {
+  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
 
-  if (reg_arg_low.Valid()) {
-    LockTemp(reg_arg_low);
-  }
-  if (reg_arg_high.Valid() && reg_arg_low.NotExactlyEquals(reg_arg_high)) {
-    LockTemp(reg_arg_high);
+  if (reg_arg.Valid()) {
+    LockTemp(reg_arg);
   }
 }
 
-// TODO: simplify when 32-bit targets go hard-float.
 RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -87,81 +81,38 @@
     offset += sizeof(uint64_t);
   }
 
-  if (cu_->target64) {
-    RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
-    if (!reg_arg.Valid()) {
-      RegStorage new_reg =
-          wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
-      LoadBaseDisp(TargetPtrReg(kSp), offset, new_reg, wide ? k64 : k32, kNotVolatile);
-      return new_reg;
-    } else {
-      // Check if we need to copy the arg to a different reg_class.
-      if (!RegClassMatches(reg_class, reg_arg)) {
-        if (wide) {
-          RegStorage new_reg = AllocTypedTempWide(false, reg_class);
-          OpRegCopyWide(new_reg, reg_arg);
-          reg_arg = new_reg;
-        } else {
-          RegStorage new_reg = AllocTypedTemp(false, reg_class);
-          OpRegCopy(new_reg, reg_arg);
-          reg_arg = new_reg;
-        }
+  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+
+  // TODO: REVISIT: This adds a spill of low part while we could just copy it.
+  if (reg_arg.Valid() && wide && (reg_arg.GetWideKind() == kNotWide)) {
+    // For wide register we've got only half of it.
+    // Flush it to memory then.
+    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile);
+    reg_arg = RegStorage::InvalidReg();
+  }
+
+  if (!reg_arg.Valid()) {
+    reg_arg = wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+    LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, wide ? k64 : k32, kNotVolatile);
+  } else {
+    // Check if we need to copy the arg to a different reg_class.
+    if (!RegClassMatches(reg_class, reg_arg)) {
+      if (wide) {
+        RegStorage new_reg = AllocTypedTempWide(false, reg_class);
+        OpRegCopyWide(new_reg, reg_arg);
+        reg_arg = new_reg;
+      } else {
+        RegStorage new_reg = AllocTypedTemp(false, reg_class);
+        OpRegCopy(new_reg, reg_arg);
+        reg_arg = new_reg;
       }
     }
-    return reg_arg;
-  }
-
-  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
-      RegStorage::InvalidReg();
-
-  // If the VR is wide and there is no register for high part, we need to load it.
-  if (wide && !reg_arg_high.Valid()) {
-    // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
-    if (!reg_arg_low.Valid()) {
-      RegStorage new_regs = AllocTypedTempWide(false, reg_class);
-      LoadBaseDisp(TargetPtrReg(kSp), offset, new_regs, k64, kNotVolatile);
-      return new_regs;  // The reg_class is OK, we can return.
-    } else {
-      // Assume that no ABI allows splitting a wide fp reg between a narrow fp reg and memory,
-      // i.e. the low part is in a core reg. Load the second part in a core reg as well for now.
-      DCHECK(!reg_arg_low.IsFloat());
-      reg_arg_high = AllocTemp();
-      int offset_high = offset + sizeof(uint32_t);
-      Load32Disp(TargetPtrReg(kSp), offset_high, reg_arg_high);
-      // Continue below to check the reg_class.
-    }
-  }
-
-  // If the low part is not in a register yet, we need to load it.
-  if (!reg_arg_low.Valid()) {
-    // Assume that if the low part of a wide arg is passed in memory, so is the high part,
-    // thus we don't get here for wide args as it's handled above. Big-endian ABIs could
-    // conceivably break this assumption but Android supports only little-endian architectures.
-    DCHECK(!wide);
-    reg_arg_low = AllocTypedTemp(false, reg_class);
-    Load32Disp(TargetPtrReg(kSp), offset, reg_arg_low);
-    return reg_arg_low;  // The reg_class is OK, we can return.
-  }
-
-  RegStorage reg_arg = wide ? RegStorage::MakeRegPair(reg_arg_low, reg_arg_high) : reg_arg_low;
-  // Check if we need to copy the arg to a different reg_class.
-  if (!RegClassMatches(reg_class, reg_arg)) {
-    if (wide) {
-      RegStorage new_regs = AllocTypedTempWide(false, reg_class);
-      OpRegCopyWide(new_regs, reg_arg);
-      reg_arg = new_regs;
-    } else {
-      RegStorage new_reg = AllocTypedTemp(false, reg_class);
-      OpRegCopy(new_reg, reg_arg);
-      reg_arg = new_reg;
-    }
   }
   return reg_arg;
 }
 
-// TODO: simpilfy when 32-bit targets go hard float.
 void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
+  DCHECK_EQ(rl_dest.location, kLocPhysReg);
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
   if (cu_->instruction_set == kX86) {
@@ -180,48 +131,23 @@
     offset += sizeof(uint64_t);
   }
 
-  if (!rl_dest.wide) {
-    RegStorage reg = GetArgMappingToPhysicalReg(in_position);
-    if (reg.Valid()) {
-      OpRegCopy(rl_dest.reg, reg);
-    } else {
-      Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg);
-    }
+  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+
+  // TODO: REVISIT: This adds a spill of low part while we could just copy it.
+  if (reg_arg.Valid() && rl_dest.wide && (reg_arg.GetWideKind() == kNotWide)) {
+    // For wide register we've got only half of it.
+    // Flush it to memory then.
+    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile);
+    reg_arg = RegStorage::InvalidReg();
+  }
+
+  if (!reg_arg.Valid()) {
+    LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, rl_dest.wide ? k64 : k32, kNotVolatile);
   } else {
-    if (cu_->target64) {
-      RegStorage reg = GetArgMappingToPhysicalReg(in_position);
-      if (reg.Valid()) {
-        OpRegCopy(rl_dest.reg, reg);
-      } else {
-        LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
-      }
-      return;
-    }
-
-    RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-    RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
-
-    if (cu_->instruction_set == kX86) {
-      // Can't handle double split between reg & memory.  Flush reg half to memory.
-      if (rl_dest.reg.IsDouble() && (reg_arg_low.Valid() != reg_arg_high.Valid())) {
-        DCHECK(reg_arg_low.Valid());
-        DCHECK(!reg_arg_high.Valid());
-        Store32Disp(TargetPtrReg(kSp), offset, reg_arg_low);
-        reg_arg_low = RegStorage::InvalidReg();
-      }
-    }
-
-    if (reg_arg_low.Valid() && reg_arg_high.Valid()) {
-      OpRegCopyWide(rl_dest.reg, RegStorage::MakeRegPair(reg_arg_low, reg_arg_high));
-    } else if (reg_arg_low.Valid() && !reg_arg_high.Valid()) {
-      OpRegCopy(rl_dest.reg, reg_arg_low);
-      int offset_high = offset + sizeof(uint32_t);
-      Load32Disp(TargetPtrReg(kSp), offset_high, rl_dest.reg.GetHigh());
-    } else if (!reg_arg_low.Valid() && reg_arg_high.Valid()) {
-      OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high);
-      Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg.GetLow());
+    if (rl_dest.wide) {
+      OpRegCopyWide(rl_dest.reg, reg_arg);
     } else {
-      LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
+      OpRegCopy(rl_dest.reg, reg_arg);
     }
   }
 }
@@ -623,8 +549,7 @@
     case Instruction::GOTO:
     case Instruction::GOTO_16:
     case Instruction::GOTO_32:
-      if (mir_graph_->IsBackedge(bb, bb->taken) &&
-          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken))) {
+      if (mir_graph_->IsBackEdge(bb, bb->taken)) {
         GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken]);
       } else {
         OpUnconditionalBranch(&label_list[bb->taken]);
@@ -656,12 +581,10 @@
     case Instruction::IF_GE:
     case Instruction::IF_GT:
     case Instruction::IF_LE: {
-      LIR* taken = &label_list[bb->taken];
-      if (mir_graph_->IsBackwardsBranch(bb) &&
-          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
-           !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
         GenSuspendTest(opt_flags);
       }
+      LIR* taken = &label_list[bb->taken];
       GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken);
       break;
     }
@@ -671,12 +594,10 @@
     case Instruction::IF_GEZ:
     case Instruction::IF_GTZ:
     case Instruction::IF_LEZ: {
-      LIR* taken = &label_list[bb->taken];
-      if (mir_graph_->IsBackwardsBranch(bb) &&
-          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
-           !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
         GenSuspendTest(opt_flags);
       }
+      LIR* taken = &label_list[bb->taken];
       GenCompareZeroAndBranch(opcode, rl_src[0], taken);
       break;
     }
@@ -845,69 +766,37 @@
 
     case Instruction::INVOKE_STATIC_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, true));
-      if (!kLeafOptimization) {
-        // If the invocation is not inlined, we can assume there is already a
-        // suspend check at the return site
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_STATIC:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::INVOKE_DIRECT:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_DIRECT_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, true));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::INVOKE_VIRTUAL:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_VIRTUAL_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, true));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::INVOKE_SUPER:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_SUPER_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, true));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::INVOKE_INTERFACE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_INTERFACE_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, true));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::NEG_INT:
@@ -1108,18 +997,33 @@
       break;
     }
     case kMirOpFusedCmplFloat:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedFPCmpBranch(bb, mir, false /*gt bias*/, false /*double*/);
       break;
     case kMirOpFusedCmpgFloat:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedFPCmpBranch(bb, mir, true /*gt bias*/, false /*double*/);
       break;
     case kMirOpFusedCmplDouble:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedFPCmpBranch(bb, mir, false /*gt bias*/, true /*double*/);
       break;
     case kMirOpFusedCmpgDouble:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedFPCmpBranch(bb, mir, true /*gt bias*/, true /*double*/);
       break;
     case kMirOpFusedCmpLong:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedLongCmpBranch(bb, mir);
       break;
     case kMirOpSelect:
@@ -1372,4 +1276,35 @@
   UNREACHABLE();
 }
 
+void Mir2Lir::InToRegStorageMapping::Initialize(ShortyIterator* shorty,
+                                                InToRegStorageMapper* mapper) {
+  DCHECK(mapper != nullptr);
+  DCHECK(shorty != nullptr);
+  max_mapped_in_ = -1;
+  has_arguments_on_stack_ = false;
+  while (shorty->Next()) {
+     ShortyArg arg = shorty->GetArg();
+     RegStorage reg = mapper->GetNextReg(arg);
+     if (reg.Valid()) {
+       mapping_.Put(count_, reg);
+       max_mapped_in_ = count_;
+       // If the VR is wide and was mapped as wide then account for it.
+       if (arg.IsWide() && reg.Is64Bit()) {
+         max_mapped_in_++;
+       }
+     } else {
+       has_arguments_on_stack_ = true;
+     }
+     count_ += arg.IsWide() ? 2 : 1;
+  }
+  initialized_ = true;
+}
+
+RegStorage Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+  DCHECK(IsInitialized());
+  DCHECK_LT(in_position, count_);
+  auto res = mapping_.find(in_position);
+  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 5d78a6e..a2b85ff 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -843,8 +843,8 @@
     virtual void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                 RegLocation rl_src1, RegLocation rl_src2, int flags);
     void GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest, RegLocation rl_src);
-    virtual void GenSuspendTest(int opt_flags);
-    virtual void GenSuspendTestAndBranch(int opt_flags, LIR* target);
+    void GenSuspendTest(int opt_flags);
+    void GenSuspendTestAndBranch(int opt_flags, LIR* target);
 
     // This will be overridden by x86 implementation.
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
@@ -905,19 +905,14 @@
     virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info);
 
     virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
-    virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                             NextCallInsn next_call_insn,
-                             const MethodReference& target_method,
-                             uint32_t vtable_idx,
-                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                             bool skip_this);
-    virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this);
-
+    virtual int GenDalvikArgs(CallInfo* info, int call_state, LIR** pcrLabel,
+                      NextCallInsn next_call_insn,
+                      const MethodReference& target_method,
+                      uint32_t vtable_idx,
+                      uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                      bool skip_this);
+    virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count);
+    virtual void GenDalvikArgsFlushPromoted(CallInfo* info, int start);
     /**
      * @brief Used to determine the register location of destination.
      * @details This is needed during generation of inline intrinsics because it finds destination
@@ -958,12 +953,6 @@
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
-    virtual int LoadArgRegs(CallInfo* info, int call_state,
-                    NextCallInsn next_call_insn,
-                    const MethodReference& target_method,
-                    uint32_t vtable_idx,
-                    uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                    bool skip_this);
 
     // Shared by all targets - implemented in gen_loadstore.cc.
     RegLocation LoadCurrMethod();
@@ -1228,7 +1217,7 @@
       }
     }
 
-    virtual RegStorage GetArgMappingToPhysicalReg(int arg_num) = 0;
+    RegStorage GetArgMappingToPhysicalReg(int arg_num);
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
@@ -1780,6 +1769,63 @@
     // to deduplicate the masks.
     ResourceMaskCache mask_cache_;
 
+  protected:
+    // ABI support
+    class ShortyArg {
+      public:
+        explicit ShortyArg(char type) : type_(type) { }
+        bool IsFP() { return type_ == 'F' || type_ == 'D'; }
+        bool IsWide() { return type_ == 'J' || type_ == 'D'; }
+        bool IsRef() { return type_ == 'L'; }
+        char GetType() { return type_; }
+      private:
+        char type_;
+    };
+
+    class ShortyIterator {
+      public:
+        ShortyIterator(const char* shorty, bool is_static);
+        bool Next();
+        ShortyArg GetArg() { return ShortyArg(pending_this_ ? 'L' : *cur_); }
+      private:
+        const char* cur_;
+        bool pending_this_;
+        bool initialized_;
+    };
+
+    class InToRegStorageMapper {
+     public:
+      virtual RegStorage GetNextReg(ShortyArg arg) = 0;
+      virtual ~InToRegStorageMapper() {}
+      virtual void Reset() = 0;
+    };
+
+    class InToRegStorageMapping {
+     public:
+      explicit InToRegStorageMapping(ArenaAllocator* arena)
+          : mapping_(std::less<int>(), arena->Adapter()), count_(0),
+            max_mapped_in_(0), has_arguments_on_stack_(false),  initialized_(false) {}
+      void Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper);
+      /**
+       * @return the index of last VR mapped to physical register. In other words
+       * any VR starting from (return value + 1) index is mapped to memory.
+       */
+      int GetMaxMappedIn() { return max_mapped_in_; }
+      bool HasArgumentsOnStack() { return has_arguments_on_stack_; }
+      RegStorage Get(int in_position);
+      bool IsInitialized() { return initialized_; }
+     private:
+      ArenaSafeMap<int, RegStorage> mapping_;
+      int count_;
+      int max_mapped_in_;
+      bool has_arguments_on_stack_;
+      bool initialized_;
+    };
+
+  // Cached mapping of method input to reg storage according to ABI.
+  InToRegStorageMapping in_to_reg_storage_mapping_;
+  virtual InToRegStorageMapper* GetResetedInToRegStorageMapper() = 0;
+
   private:
     static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type);
 };  // Class Mir2Lir
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 9cb0bf5..c7d83dd 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -28,40 +28,48 @@
 
 class X86Mir2Lir : public Mir2Lir {
  protected:
-  class InToRegStorageMapper {
-   public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0;
-    virtual ~InToRegStorageMapper() {}
-  };
-
   class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
    public:
-    explicit InToRegStorageX86_64Mapper(Mir2Lir* ml) : ml_(ml), cur_core_reg_(0), cur_fp_reg_(0) {}
-    virtual ~InToRegStorageX86_64Mapper() {}
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref);
+    explicit InToRegStorageX86_64Mapper(Mir2Lir* m2l)
+        : m2l_(m2l), cur_core_reg_(0), cur_fp_reg_(0) {}
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+      cur_fp_reg_ = 0;
+    }
    protected:
-    Mir2Lir* ml_;
+    Mir2Lir* m2l_;
    private:
-    int cur_core_reg_;
-    int cur_fp_reg_;
+    size_t cur_core_reg_;
+    size_t cur_fp_reg_;
   };
 
-  class InToRegStorageMapping {
+  class InToRegStorageX86Mapper : public InToRegStorageMapper {
    public:
-    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
-    initialized_(false) {}
-    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-    int GetMaxMappedIn() { return max_mapped_in_; }
-    bool IsThereStackMapped() { return is_there_stack_mapped_; }
-    RegStorage Get(int in_position);
-    bool IsInitialized() { return initialized_; }
+    explicit InToRegStorageX86Mapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {}
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+    }
+   protected:
+    Mir2Lir* m2l_;
    private:
-    std::map<int, RegStorage> mapping_;
-    int max_mapped_in_;
-    bool is_there_stack_mapped_;
-    bool initialized_;
+    size_t cur_core_reg_;
   };
 
+  InToRegStorageX86_64Mapper in_to_reg_storage_x86_64_mapper_;
+  InToRegStorageX86Mapper in_to_reg_storage_x86_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    InToRegStorageMapper* res;
+    if (cu_->target64) {
+      res = &in_to_reg_storage_x86_64_mapper_;
+    } else {
+      res = &in_to_reg_storage_x86_mapper_;
+    }
+    res->Reset();
+    return res;
+  }
+
   class ExplicitTempRegisterLock {
   public:
     ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir, int n_regs, ...);
@@ -71,6 +79,8 @@
     X86Mir2Lir* const mir_to_lir_;
   };
 
+  virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
+
  public:
   X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -125,8 +135,6 @@
     return TargetReg(symbolic_reg, cu_->target64 ? kWide : kNotWide);
   }
 
-  RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
-
   RegLocation GetReturnAlt() OVERRIDE;
   RegLocation GetReturnWideAlt() OVERRIDE;
   RegLocation LocCReturn() OVERRIDE;
@@ -350,22 +358,7 @@
   void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
                      SpecialTargetRegister symbolic_reg) OVERRIDE;
 
-  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
-
   NextCallInsn GetNextSDCallInsn() OVERRIDE;
-  int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this) OVERRIDE;
-
-  int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                         NextCallInsn next_call_insn,
-                         const MethodReference& target_method,
-                         uint32_t vtable_idx,
-                         uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                         bool skip_this) OVERRIDE;
 
   /*
    * @brief Generate a relative call to the method that will be patched at link time.
@@ -439,8 +432,6 @@
   LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                             RegStorage r_src, OpSize size, int opt_flags = 0);
 
-  RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num) const;
-
   int AssignInsnOffsets();
   void AssignOffsets();
   AssemblerStatus AssembleInstructions(CodeOffset start_addr);
@@ -1000,8 +991,6 @@
    */
   static void DumpRegLocation(RegLocation loc);
 
-  InToRegStorageMapping in_to_reg_storage_mapping_;
-
  private:
   void SwapBits(RegStorage result_reg, int shift, int32_t value);
   void SwapBits64(RegStorage result_reg, int shift, int64_t value);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index ae80e9f..5f6cdda 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -814,6 +814,7 @@
 
 X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
+      in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this),
       base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
       method_address_insns_(arena->Adapter()),
       class_type_address_insns_(arena->Adapter()),
@@ -2407,452 +2408,45 @@
 }
 
 // ------------ ABI support: mapping of args to physical registers -------------
-RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide,
-                                                              bool is_ref) {
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(ShortyArg arg) {
   const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3, kArg4, kArg5};
-  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) /
-      sizeof(SpecialTargetRegister);
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
   const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3,
                                                              kFArg4, kFArg5, kFArg6, kFArg7};
-  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) /
-      sizeof(SpecialTargetRegister);
+  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
 
-  if (is_double_or_float) {
+  if (arg.IsFP()) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      return ml_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], is_wide ? kWide : kNotWide);
+      return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++],
+                             arg.IsWide() ? kWide : kNotWide);
     }
   } else {
     if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      return ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
-                            is_ref ? kRef : (is_wide ? kWide : kNotWide));
+      return m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                             arg.IsRef() ? kRef : (arg.IsWide() ? kWide : kNotWide));
     }
   }
   return RegStorage::InvalidReg();
 }
 
-RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
-  DCHECK(IsInitialized());
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
-}
+RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) {
+  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
 
-void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
-                                                   InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  max_mapped_in_ = -1;
-  is_there_stack_mapped_ = false;
-  for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
-             arg_locs[in_position].wide, arg_locs[in_position].ref);
-     if (reg.Valid()) {
-       mapping_[in_position] = reg;
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-       if (arg_locs[in_position].wide) {
-         // We covered 2 args, so skip the next one
-         in_position++;
-       }
-     } else {
-       is_there_stack_mapped_ = true;
-     }
+  RegStorage result = RegStorage::InvalidReg();
+  if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+    result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                          arg.IsRef() ? kRef : kNotWide);
+    if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = RegStorage::MakeRegPair(
+          result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
+    }
   }
-  initialized_ = true;
-}
-
-RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!cu_->target64) {
-    return GetCoreArgMappingToPhysicalReg(arg_num);
-  }
-
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    int start_vreg = cu_->mir_graph->GetFirstInVR();
-    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
-
-    InToRegStorageX86_64Mapper mapper(this);
-    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
-  }
-  return in_to_reg_storage_mapping_.Get(arg_num);
-}
-
-RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) const {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  // Not used for 64-bit, TODO: Move X86_32 to the same framework
-  switch (core_arg_num) {
-    case 0: return TargetReg32(kArg1);
-    case 1: return TargetReg32(kArg2);
-    case 2: return TargetReg32(kArg3);
-    default: return RegStorage::InvalidReg();
-  }
+  return result;
 }
 
 // ---------End of ABI support: mapping of args to physical registers -------------
 
-/*
- * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform initial
- * assignment of promoted arguments.
- *
- * ArgLocs is an array of location records describing the incoming arguments
- * with one location record per word of argument.
- */
-void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
-  if (!cu_->target64) return Mir2Lir::FlushIns(ArgLocs, rl_method);
-  /*
-   * Dummy up a RegLocation for the incoming Method*
-   * It will attempt to keep kArg0 live (or copy it to home location
-   * if promoted).
-   */
-
-  RegLocation rl_src = rl_method;
-  rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0, kRef);
-  rl_src.home = false;
-  MarkLive(rl_src);
-  StoreValue(rl_method, rl_src);
-  // If Method* has been promoted, explicitly flush
-  if (rl_method.location == kLocPhysReg) {
-    const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-    StoreRefDisp(rs_rSP, 0, As32BitReg(TargetReg(kArg0, kRef)), kNotVolatile);
-  }
-
-  if (mir_graph_->GetNumOfInVRs() == 0) {
-    return;
-  }
-
-  int start_vreg = cu_->mir_graph->GetFirstInVR();
-  /*
-   * Copy incoming arguments to their proper home locations.
-   * NOTE: an older version of dx had an issue in which
-   * it would reuse static method argument registers.
-   * This could result in the same Dalvik virtual register
-   * being promoted to both core and fp regs. To account for this,
-   * we only copy to the corresponding promoted physical register
-   * if it matches the type of the SSA name for the incoming
-   * argument.  It is also possible that long and double arguments
-   * end up half-promoted.  In those cases, we must flush the promoted
-   * half to memory as well.
-   */
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) {
-    // get reg corresponding to input
-    RegStorage reg = GetArgMappingToPhysicalReg(i);
-
-    RegLocation* t_loc = &ArgLocs[i];
-    if (reg.Valid()) {
-      // If arriving in register.
-
-      // We have already updated the arg location with promoted info
-      // so we can be based on it.
-      if (t_loc->location == kLocPhysReg) {
-        // Just copy it.
-        OpRegCopy(t_loc->reg, reg);
-      } else {
-        // Needs flush.
-        if (t_loc->ref) {
-          StoreRefDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), reg, kNotVolatile);
-        } else {
-          StoreBaseDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
-                        kNotVolatile);
-        }
-      }
-    } else {
-      // If arriving in frame & promoted.
-      if (t_loc->location == kLocPhysReg) {
-        if (t_loc->ref) {
-          LoadRefDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
-        } else {
-          LoadBaseDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), t_loc->reg,
-                       t_loc->wide ? k64 : k32, kNotVolatile);
-        }
-      }
-    }
-    if (t_loc->wide) {
-      // Increment i to skip the next one.
-      i++;
-    }
-  }
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.  Note, this may also be called
- * for "range" variants if the number of arguments is 5 or fewer.
- */
-int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                  const MethodReference& target_method,
-                                  uint32_t vtable_idx, uintptr_t direct_code,
-                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
-  if (!cu_->target64) {
-    return Mir2Lir::GenDalvikArgsNoRange(info,
-                                         call_state, pcrLabel, next_call_insn,
-                                         target_method,
-                                         vtable_idx, direct_code,
-                                         direct_method, type, skip_this);
-  }
-  return GenDalvikArgsRange(info,
-                            call_state, pcrLabel, next_call_insn,
-                            target_method,
-                            vtable_idx, direct_code,
-                            direct_method, type, skip_this);
-}
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *
- */
-int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                LIR** pcrLabel, NextCallInsn next_call_insn,
-                                const MethodReference& target_method,
-                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
-                                InvokeType type, bool skip_this) {
-  if (!cu_->target64) {
-    return Mir2Lir::GenDalvikArgsRange(info, call_state,
-                                pcrLabel, next_call_insn,
-                                target_method,
-                                vtable_idx, direct_code, direct_method,
-                                type, skip_this);
-  }
-
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0)
-    return call_state;
-
-  const int start_index = skip_this ? 1 : 0;
-
-  InToRegStorageX86_64Mapper mapper(this);
-  InToRegStorageMapping in_to_reg_storage_mapping;
-  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
-  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
-          info->args[last_mapped_in].wide ? 2 : 1;
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
-
-  // Fisrt of all, check whether it make sense to use bulk copying
-  // Optimization is aplicable only for range case
-  // TODO: make a constant instead of 2
-  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
-    // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
-      RegLocation loc = info->args[next_arg];
-      if (loc.wide) {
-        loc = UpdateLocWide(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(rs_rX86_SP_64, SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-        }
-        next_arg += 2;
-      } else {
-        loc = UpdateLoc(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(rs_rX86_SP_64, SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
-        }
-        next_arg++;
-      }
-    }
-
-    // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped,
-                                                   cu_->instruction_set);
-
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      // This is based on the knowledge that the stack itself is 16-byte aligned.
-      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
-      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
-      size_t bytes_to_move;
-
-      /*
-       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
-       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
-       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
-       * We do this because we could potentially do a smaller move to align.
-       */
-      if (regs_left_to_pass_via_stack == 4 ||
-          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
-        // Moving 128-bits via xmm register.
-        bytes_to_move = sizeof(uint32_t) * 4;
-
-        // Allocate a free xmm temp. Since we are working through the calling sequence,
-        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
-        // there are no free registers.
-        RegStorage temp = AllocTempDouble();
-
-        LIR* ld1 = nullptr;
-        LIR* ld2 = nullptr;
-        LIR* st1 = nullptr;
-        LIR* st2 = nullptr;
-
-        /*
-         * The logic is similar for both loads and stores. If we have 16-byte alignment,
-         * do an aligned move. If we have 8-byte alignment, then do the move in two
-         * parts. This approach prevents possible cache line splits. Finally, fall back
-         * to doing an unaligned move. In most cases we likely won't split the cache
-         * line but we cannot prove it and thus take a conservative approach.
-         */
-        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
-        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
-
-        ScopedMemRefType mem_ref_type2(this, ResourceMask::kDalvikReg);
-        if (src_is_16b_aligned) {
-          ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovA128FP);
-        } else if (src_is_8b_aligned) {
-          ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovLo128FP);
-          ld2 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset + (bytes_to_move >> 1),
-                            kMovHi128FP);
-        } else {
-          ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovU128FP);
-        }
-
-        if (dest_is_16b_aligned) {
-          st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovA128FP);
-        } else if (dest_is_8b_aligned) {
-          st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovLo128FP);
-          st2 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset + (bytes_to_move >> 1),
-                            temp, kMovHi128FP);
-        } else {
-          st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovU128FP);
-        }
-
-        // TODO If we could keep track of aliasing information for memory accesses that are wider
-        // than 64-bit, we wouldn't need to set up a barrier.
-        if (ld1 != nullptr) {
-          if (ld2 != nullptr) {
-            // For 64-bit load we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
-            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
-          } else {
-            // Set barrier for 128-bit load.
-            ld1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-        if (st1 != nullptr) {
-          if (st2 != nullptr) {
-            // For 64-bit store we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
-            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
-          } else {
-            // Set barrier for 128-bit store.
-            st1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-
-        // Free the temporary used for the data movement.
-        FreeTemp(temp);
-      } else {
-        // Moving 32-bits via general purpose register.
-        bytes_to_move = sizeof(uint32_t);
-
-        // Instead of allocating a new temp, simply reuse one of the registers being used
-        // for argument passing.
-        RegStorage temp = TargetReg(kArg3, kNotWide);
-
-        // Now load the argument VR and store to the outs.
-        Load32Disp(rs_rX86_SP_64, current_src_offset, temp);
-        Store32Disp(rs_rX86_SP_64, current_dest_offset, temp);
-      }
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
-  }
-
-  // Now handle rest not registers if they are
-  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regSingle = TargetReg(kArg2, kNotWide);
-    RegStorage regWide = TargetReg(kArg3, kWide);
-    for (int i = start_index;
-         i < last_mapped_in + size_of_the_last_mapped + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
-      if (!reg.Valid()) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, rl_arg.reg, k32, kNotVolatile);
-            } else {
-              LoadValueDirectFixed(rl_arg, regSingle);
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, regSingle, k32, kNotVolatile);
-            }
-          }
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with mapped registers
-  for (int i = start_index; i <= last_mapped_in; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
-    if (reg.Valid()) {
-      if (rl_arg.wide) {
-        LoadValueDirectWideFixed(rl_arg, reg);
-      } else {
-        LoadValueDirectFixed(rl_arg, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
-    if (rl_arg.wide) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
-    }
-  }
-  return call_state;
-}
-
 bool X86Mir2Lir::GenInlinedCharAt(CallInfo* info) {
   // Location of reference to data array
   int value_offset = mirror::String::ValueOffset().Int32Value();
@@ -2980,4 +2574,122 @@
   }
 }
 
+int X86Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
+  if (count < 4) {
+    // It does not make sense to use this utility if we have no chance to use
+    // 128-bit move.
+    return count;
+  }
+  GenDalvikArgsFlushPromoted(info, first);
+
+  // The rest can be copied together
+  int current_src_offset = SRegOffset(info->args[first].s_reg_low);
+  int current_dest_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set);
+
+  // Only davik regs are accessed in this loop; no next_call_insn() calls.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+  while (count > 0) {
+    // This is based on the knowledge that the stack itself is 16-byte aligned.
+    bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+    bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+    size_t bytes_to_move;
+
+    /*
+     * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+     * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+     * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+     * We do this because we could potentially do a smaller move to align.
+     */
+    if (count == 4 || (count > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+      // Moving 128-bits via xmm register.
+      bytes_to_move = sizeof(uint32_t) * 4;
+
+      // Allocate a free xmm temp. Since we are working through the calling sequence,
+      // we expect to have an xmm temporary available. AllocTempDouble will abort if
+      // there are no free registers.
+      RegStorage temp = AllocTempDouble();
+
+      LIR* ld1 = nullptr;
+      LIR* ld2 = nullptr;
+      LIR* st1 = nullptr;
+      LIR* st2 = nullptr;
+
+      /*
+       * The logic is similar for both loads and stores. If we have 16-byte alignment,
+       * do an aligned move. If we have 8-byte alignment, then do the move in two
+       * parts. This approach prevents possible cache line splits. Finally, fall back
+       * to doing an unaligned move. In most cases we likely won't split the cache
+       * line but we cannot prove it and thus take a conservative approach.
+       */
+      bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+      bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+      if (src_is_16b_aligned) {
+        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP);
+      } else if (src_is_8b_aligned) {
+        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP);
+        ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1),
+                          kMovHi128FP);
+      } else {
+        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP);
+      }
+
+      if (dest_is_16b_aligned) {
+        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP);
+      } else if (dest_is_8b_aligned) {
+        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP);
+        st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+                          temp, kMovHi128FP);
+      } else {
+        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP);
+      }
+
+      // TODO If we could keep track of aliasing information for memory accesses that are wider
+      // than 64-bit, we wouldn't need to set up a barrier.
+      if (ld1 != nullptr) {
+        if (ld2 != nullptr) {
+          // For 64-bit load we can actually set up the aliasing information.
+          AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+          AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true,
+                                  true);
+        } else {
+          // Set barrier for 128-bit load.
+          ld1->u.m.def_mask = &kEncodeAll;
+        }
+      }
+      if (st1 != nullptr) {
+        if (st2 != nullptr) {
+          // For 64-bit store we can actually set up the aliasing information.
+          AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+          AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false,
+                                  true);
+        } else {
+          // Set barrier for 128-bit store.
+          st1->u.m.def_mask = &kEncodeAll;
+        }
+      }
+
+      // Free the temporary used for the data movement.
+      FreeTemp(temp);
+    } else {
+      // Moving 32-bits via general purpose register.
+      bytes_to_move = sizeof(uint32_t);
+
+      // Instead of allocating a new temp, simply reuse one of the registers being used
+      // for argument passing.
+      RegStorage temp = TargetReg(kArg3, kNotWide);
+
+      // Now load the argument VR and store to the outs.
+      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
+      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
+    }
+
+    current_src_offset += bytes_to_move;
+    current_dest_offset += bytes_to_move;
+    count -= (bytes_to_move >> 2);
+  }
+  DCHECK_EQ(count, 0);
+  return count;
+}
+
 }  // namespace art
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index ed33882..7cd431e 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -104,11 +104,11 @@
   num_reachable_blocks_ = dfs_order_.size();
 
   if (num_reachable_blocks_ != num_blocks_) {
-    // Hide all unreachable blocks.
+    // Kill all unreachable blocks.
     AllNodesIterator iter(this);
     for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
       if (!bb->visited) {
-        bb->Hide(this);
+        bb->Kill(this);
       }
     }
   }
@@ -197,12 +197,6 @@
         dom_post_order_traversal_.push_back(curr_bb->id);
       }
       work_stack.pop_back();
-
-      /* hacky loop detection */
-      if ((curr_bb->taken != NullBasicBlockId) && curr_bb->dominators->IsBitSet(curr_bb->taken)) {
-        curr_bb->nesting_depth++;
-        attributes_ |= METHOD_HAS_LOOP;
-      }
     }
   }
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8a0c2de..4757235 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2391,7 +2391,7 @@
           __ shrl(first_reg, second_reg);
         }
       } else {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
         if (op->IsShl()) {
           __ shll(first_reg, imm);
         } else if (op->IsShr()) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 233f4a4..f8651f6 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2223,7 +2223,7 @@
           __ shrl(first_reg, second_reg);
         }
       } else {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
         if (op->IsShl()) {
           __ shll(first_reg, imm);
         } else if (op->IsShr()) {
@@ -2245,7 +2245,7 @@
           __ shrq(first_reg, second_reg);
         }
       } else {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
         if (op->IsShl()) {
           __ shlq(first_reg, imm);
         } else if (op->IsShr()) {
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index bdc4cf6..b781d60 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -35,6 +35,8 @@
 
 LogVerbosity gLogVerbosity;
 
+unsigned int gAborting = 0;
+
 static LogSeverity gMinimumLogSeverity = INFO;
 static std::unique_ptr<std::string> gCmdLine;
 static std::unique_ptr<std::string> gProgramInvocationName;
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index a9cc99b..ae83e33 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -55,6 +55,11 @@
 // Global log verbosity setting, initialized by InitLogging.
 extern LogVerbosity gLogVerbosity;
 
+// 0 if not abort, non-zero if an abort is in progress. Used on fatal exit to prevents recursive
+// aborts. Global declaration allows us to disable some error checking to ensure fatal shutdown
+// makes forward progress.
+extern unsigned int gAborting;
+
 // Configure logging based on ANDROID_LOG_TAGS environment variable.
 // We need to parse a string that looks like
 //
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 0206341..cb69817 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -97,7 +97,9 @@
         }
       }
     }
-    CHECK(!bad_mutexes_held);
+    if (gAborting == 0) {  // Avoid recursive aborts.
+      CHECK(!bad_mutexes_held);
+    }
   }
   // Don't record monitors as they are outside the scope of analysis. They may be inspected off of
   // the monitor list.
@@ -112,7 +114,7 @@
     return;
   }
   if (level_ != kMonitorLock) {
-    if (kDebugLocking) {
+    if (kDebugLocking && gAborting == 0) {  // Avoid recursive aborts.
       CHECK(self->GetHeldMutex(level_) == this) << "Unlocking on unacquired mutex: " << name_;
     }
     self->SetHeldMutex(level_, NULL);
@@ -176,7 +178,7 @@
   bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
   if (kDebugLocking) {
     // Sanity debug check that if we think it is locked we have it in our held mutexes.
-    if (result && self != NULL && level_ != kMonitorLock) {
+    if (result && self != NULL && level_ != kMonitorLock && !gAborting) {
       CHECK_EQ(self->GetHeldMutex(level_), this);
     }
   }
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 4957988..aa2aefc 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -209,7 +209,9 @@
         }
       }
     }
-    CHECK(!bad_mutexes_held);
+    if (gAborting == 0) {  // Avoid recursive aborts.
+      CHECK(!bad_mutexes_held);
+    }
   }
 }
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 41b5f12..9c93cc6 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -220,7 +220,7 @@
 
   // Assert that the Mutex is exclusively held by the current thread.
   void AssertExclusiveHeld(const Thread* self) {
-    if (kDebugLocking) {
+    if (kDebugLocking && (gAborting == 0)) {
       CHECK(IsExclusiveHeld(self)) << *this;
     }
   }
@@ -228,7 +228,7 @@
 
   // Assert that the Mutex is not held by the current thread.
   void AssertNotHeldExclusive(const Thread* self) {
-    if (kDebugLocking) {
+    if (kDebugLocking && (gAborting == 0)) {
       CHECK(!IsExclusiveHeld(self)) << *this;
     }
   }
@@ -318,7 +318,7 @@
 
   // Assert the current thread has exclusive access to the ReaderWriterMutex.
   void AssertExclusiveHeld(const Thread* self) {
-    if (kDebugLocking) {
+    if (kDebugLocking && (gAborting == 0)) {
       CHECK(IsExclusiveHeld(self)) << *this;
     }
   }
@@ -326,7 +326,7 @@
 
   // Assert the current thread doesn't have exclusive access to the ReaderWriterMutex.
   void AssertNotExclusiveHeld(const Thread* self) {
-    if (kDebugLocking) {
+    if (kDebugLocking && (gAborting == 0)) {
       CHECK(!IsExclusiveHeld(self)) << *this;
     }
   }
@@ -337,7 +337,7 @@
 
   // Assert the current thread has shared access to the ReaderWriterMutex.
   void AssertSharedHeld(const Thread* self) {
-    if (kDebugLocking) {
+    if (kDebugLocking && (gAborting == 0)) {
       // TODO: we can only assert this well when self != NULL.
       CHECK(IsSharedHeld(self) || self == NULL) << *this;
     }
@@ -347,7 +347,7 @@
   // Assert the current thread doesn't hold this ReaderWriterMutex either in shared or exclusive
   // mode.
   void AssertNotHeld(const Thread* self) {
-    if (kDebugLocking) {
+    if (kDebugLocking && (gAborting == 0)) {
       CHECK(!IsSharedHeld(self)) << *this;
     }
   }
diff --git a/runtime/globals.h b/runtime/globals.h
index 3104229..beabf55 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -101,7 +101,11 @@
 static constexpr bool kUseBakerOrBrooksReadBarrier = kUseBakerReadBarrier || kUseBrooksReadBarrier;
 
 // If true, references within the heap are poisoned (negated).
+#ifdef ART_HEAP_POISONING
+static constexpr bool kPoisonHeapReferences = true;
+#else
 static constexpr bool kPoisonHeapReferences = false;
+#endif
 
 // Kinds of tracing clocks.
 enum TraceClockSource {
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index be34bd3..a39a7b7 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -1345,8 +1345,10 @@
       }
       break;
     default:
-      LOG(WARNING) << "GLITCH: unsupported modKind=" << mod.modKind;
-      break;
+      LOG(WARNING) << "Unsupported modifier " << mod.modKind << " for event " << pEvent->eventKind;
+      // Free allocated event to avoid leak before leaving.
+      EventFree(pEvent);
+      return JDWP::ERR_NOT_IMPLEMENTED;
     }
   }
 
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 01c5070..c917d84 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -72,39 +72,29 @@
 }
 
 struct ObjectComparator {
-  bool operator()(GcRoot<mirror::Object> root1, GcRoot<mirror::Object> root2)
+  bool operator()(GcRoot<mirror::Object> root1, GcRoot<mirror::Object> root2) const
     // TODO: enable analysis when analysis can work with the STL.
       NO_THREAD_SAFETY_ANALYSIS {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
     mirror::Object* obj1 = root1.Read<kWithoutReadBarrier>();
     mirror::Object* obj2 = root2.Read<kWithoutReadBarrier>();
-    // Ensure null references and cleared jweaks appear at the end.
-    if (obj1 == NULL) {
-      return true;
-    } else if (obj2 == NULL) {
-      return false;
-    }
+    DCHECK(obj1 != nullptr);
+    DCHECK(obj2 != nullptr);
     Runtime* runtime = Runtime::Current();
-    if (runtime->IsClearedJniWeakGlobal(obj1)) {
-      return true;
-    } else if (runtime->IsClearedJniWeakGlobal(obj2)) {
-      return false;
-    }
-
+    DCHECK(!runtime->IsClearedJniWeakGlobal(obj1));
+    DCHECK(!runtime->IsClearedJniWeakGlobal(obj2));
     // Sort by class...
     if (obj1->GetClass() != obj2->GetClass()) {
-      return obj1->GetClass()->IdentityHashCode() < obj2->IdentityHashCode();
-    } else {
-      // ...then by size...
-      size_t count1 = obj1->SizeOf();
-      size_t count2 = obj2->SizeOf();
-      if (count1 != count2) {
-        return count1 < count2;
-      } else {
-        // ...and finally by identity hash code.
-        return obj1->IdentityHashCode() < obj2->IdentityHashCode();
-      }
+      return obj1->GetClass()->IdentityHashCode() < obj2->GetClass()->IdentityHashCode();
     }
+    // ...then by size...
+    const size_t size1 = obj1->SizeOf();
+    const size_t size2 = obj2->SizeOf();
+    if (size1 != size2) {
+      return size1 < size2;
+    }
+    // ...and finally by identity hash code.
+    return obj1->IdentityHashCode() < obj2->IdentityHashCode();
   }
 };
 
@@ -166,16 +156,17 @@
     first = 0;
   }
   os << "  Last " << (count - first) << " entries (of " << count << "):\n";
+  Runtime* runtime = Runtime::Current();
   for (int idx = count - 1; idx >= first; --idx) {
     mirror::Object* ref = entries[idx].Read();
-    if (ref == NULL) {
+    if (ref == nullptr) {
       continue;
     }
-    if (Runtime::Current()->IsClearedJniWeakGlobal(ref)) {
+    if (runtime->IsClearedJniWeakGlobal(ref)) {
       os << StringPrintf("    %5d: cleared jweak\n", idx);
       continue;
     }
-    if (ref->GetClass() == NULL) {
+    if (ref->GetClass() == nullptr) {
       // should only be possible right after a plain dvmMalloc().
       size_t size = ref->SizeOf();
       os << StringPrintf("    %5d: %p (raw) (%zd bytes)\n", idx, ref, size);
@@ -189,7 +180,7 @@
     if (element_count != 0) {
       StringAppendF(&extras, " (%zd elements)", element_count);
     } else if (ref->GetClass()->IsStringClass()) {
-      mirror::String* s = const_cast<mirror::Object*>(ref)->AsString();
+      mirror::String* s = ref->AsString();
       std::string utf8(s->ToModifiedUtf8());
       if (s->GetLength() <= 16) {
         StringAppendF(&extras, " \"%s\"", utf8.c_str());
@@ -200,51 +191,45 @@
     os << StringPrintf("    %5d: ", idx) << ref << " " << className << extras << "\n";
   }
 
-  // Make a copy of the table and sort it.
+  // Make a copy of the table and sort it, only adding non null and not cleared elements.
   Table sorted_entries;
-  for (size_t i = 0; i < entries.size(); ++i) {
-    mirror::Object* entry = entries[i].Read();
-    sorted_entries.push_back(GcRoot<mirror::Object>(entry));
-  }
-  std::sort(sorted_entries.begin(), sorted_entries.end(), ObjectComparator());
-
-  // Remove any uninteresting stuff from the list. The sort moved them all to the end.
-  while (!sorted_entries.empty() && sorted_entries.back().IsNull()) {
-    sorted_entries.pop_back();
-  }
-  while (!sorted_entries.empty() &&
-         Runtime::Current()->IsClearedJniWeakGlobal(
-             sorted_entries.back().Read<kWithoutReadBarrier>())) {
-    sorted_entries.pop_back();
+  for (GcRoot<mirror::Object>& root : entries) {
+    if (!root.IsNull() && !runtime->IsClearedJniWeakGlobal(root.Read())) {
+      sorted_entries.push_back(root);
+    }
   }
   if (sorted_entries.empty()) {
     return;
   }
+  std::sort(sorted_entries.begin(), sorted_entries.end(), ObjectComparator());
 
   // Dump a summary of the whole table.
   os << "  Summary:\n";
   size_t equiv = 0;
   size_t identical = 0;
-  for (size_t idx = 1; idx < count; idx++) {
-    mirror::Object* prev = sorted_entries[idx-1].Read<kWithoutReadBarrier>();
-    mirror::Object* current = sorted_entries[idx].Read<kWithoutReadBarrier>();
-    size_t element_count = GetElementCount(prev);
-    if (current == prev) {
-      // Same reference, added more than once.
-      identical++;
-    } else if (current->GetClass() == prev->GetClass() && GetElementCount(current) == element_count) {
-      // Same class / element count, different object.
-      equiv++;
-    } else {
-      // Different class.
-      DumpSummaryLine(os, prev, element_count, identical, equiv);
-      equiv = identical = 0;
+  mirror::Object* prev = nullptr;
+  for (GcRoot<mirror::Object>& root : sorted_entries) {
+    mirror::Object* current = root.Read<kWithoutReadBarrier>();
+    if (prev != nullptr) {
+      const size_t element_count = GetElementCount(prev);
+      if (current == prev) {
+        // Same reference, added more than once.
+        ++identical;
+      } else if (current->GetClass() == prev->GetClass() &&
+          GetElementCount(current) == element_count) {
+        // Same class / element count, different object.
+        ++equiv;
+      } else {
+        // Different class.
+        DumpSummaryLine(os, prev, element_count, identical, equiv);
+        equiv = 0;
+        identical = 0;
+      }
     }
+    prev = current;
   }
   // Handle the last entry.
-  DumpSummaryLine(os, sorted_entries.back().Read<kWithoutReadBarrier>(),
-                  GetElementCount(sorted_entries.back().Read<kWithoutReadBarrier>()),
-                  identical, equiv);
+  DumpSummaryLine(os, prev, GetElementCount(prev), identical, equiv);
 }
 
 void ReferenceTable::VisitRoots(RootCallback* visitor, void* arg, uint32_t tid,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index e792031..078e7d2 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -126,8 +126,6 @@
 static constexpr bool kEnableJavaStackTraceHandler = false;
 Runtime* Runtime::instance_ = nullptr;
 
-volatile unsigned int gAborting = 0;
-
 Runtime::Runtime()
     : instruction_set_(kNone),
       compiler_callbacks_(nullptr),
@@ -238,8 +236,13 @@
 
 struct AbortState {
   void Dump(std::ostream& os) const {
+    if (gAborting > 1) {
+      os << "Runtime aborting --- recursively, so no thread-specific detail!\n";
+      return;
+    }
+    gAborting++;
     os << "Runtime aborting...\n";
-    if (Runtime::Current() == nullptr) {
+    if (Runtime::Current() == NULL) {
       os << "(Runtime does not yet exist!)\n";
       return;
     }
@@ -297,18 +300,13 @@
 
 void Runtime::Abort() {
   gAborting++;  // set before taking any locks
-  if (gAborting > 1) {
-    LogMessage::LogLine(__FILE__, __LINE__, INTERNAL_FATAL,
-                        "Runtime aborting --- recursively, so no thread-specific detail!\n");
-    return;
-  }
 
   // Ensure that we don't have multiple threads trying to abort at once,
   // which would result in significantly worse diagnostics.
   MutexLock mu(Thread::Current(), *Locks::abort_lock_);
 
   // Get any pending output out of the way.
-  fflush(nullptr);
+  fflush(NULL);
 
   // Many people have difficulty distinguish aborts from crashes,
   // so be explicit.
@@ -316,7 +314,7 @@
   LOG(INTERNAL_FATAL) << Dumpable<AbortState>(state);
 
   // Call the abort hook if we have one.
-  if (Runtime::Current() != nullptr && Runtime::Current()->abort_ != nullptr) {
+  if (Runtime::Current() != NULL && Runtime::Current()->abort_ != NULL) {
     LOG(INTERNAL_FATAL) << "Calling abort hook...";
     Runtime::Current()->abort_();
     // notreached
@@ -344,7 +342,7 @@
 }
 
 void Runtime::CallExitHook(jint status) {
-  if (exit_ != nullptr) {
+  if (exit_ != NULL) {
     ScopedThreadStateChange tsc(Thread::Current(), kNative);
     exit_(status);
     LOG(WARNING) << "Exit hook returned instead of exiting!";
@@ -359,14 +357,14 @@
 
 bool Runtime::Create(const RuntimeOptions& options, bool ignore_unrecognized) {
   // TODO: acquire a static mutex on Runtime to avoid racing.
-  if (Runtime::instance_ != nullptr) {
+  if (Runtime::instance_ != NULL) {
     return false;
   }
-  InitLogging(nullptr);  // Calls Locks::Init() as a side effect.
+  InitLogging(NULL);  // Calls Locks::Init() as a side effect.
   instance_ = new Runtime;
   if (!instance_->Init(options, ignore_unrecognized)) {
     delete instance_;
-    instance_ = nullptr;
+    instance_ = NULL;
     return false;
   }
   return true;
@@ -374,7 +372,7 @@
 
 static jobject CreateSystemClassLoader() {
   if (Runtime::Current()->UseCompileTimeClassPath()) {
-    return nullptr;
+    return NULL;
   }
 
   ScopedObjectAccess soa(Thread::Current());
@@ -387,7 +385,7 @@
 
   mirror::ArtMethod* getSystemClassLoader =
       class_loader_class->FindDirectMethod("getSystemClassLoader", "()Ljava/lang/ClassLoader;");
-  CHECK(getSystemClassLoader != nullptr);
+  CHECK(getSystemClassLoader != NULL);
 
   JValue result = InvokeWithJValues(soa, nullptr, soa.EncodeMethod(getSystemClassLoader), nullptr);
   JNIEnv* env = soa.Self()->GetJniEnv();
@@ -403,7 +401,7 @@
 
   mirror::ArtField* contextClassLoader =
       thread_class->FindDeclaredInstanceField("contextClassLoader", "Ljava/lang/ClassLoader;");
-  CHECK(contextClassLoader != nullptr);
+  CHECK(contextClassLoader != NULL);
 
   // We can't run in a transaction yet.
   contextClassLoader->SetObject<false>(soa.Self()->GetPeer(),
@@ -529,7 +527,7 @@
 
   // Mark rootfs as being a slave so that changes from default
   // namespace only flow into our children.
-  if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) {
+  if (mount("rootfs", "/", NULL, (MS_SLAVE | MS_REC), NULL) == -1) {
     PLOG(WARNING) << "Failed to mount() rootfs as MS_SLAVE";
     return false;
   }
@@ -538,7 +536,7 @@
   // bind mount storage into their respective private namespaces, which
   // are isolated from each other.
   const char* target_base = getenv("EMULATED_STORAGE_TARGET");
-  if (target_base != nullptr) {
+  if (target_base != NULL) {
     if (mount("tmpfs", target_base, "tmpfs", MS_NOSUID | MS_NODEV,
               "uid=0,gid=1028,mode=0751") == -1) {
       LOG(WARNING) << "Failed to mount tmpfs to " << target_base;
@@ -895,14 +893,14 @@
   self->ThrowNewException(ThrowLocation(), "Ljava/lang/OutOfMemoryError;",
                           "OutOfMemoryError thrown while trying to throw OutOfMemoryError; "
                           "no stack trace available");
-  pre_allocated_OutOfMemoryError_ = GcRoot<mirror::Throwable>(self->GetException(nullptr));
+  pre_allocated_OutOfMemoryError_ = GcRoot<mirror::Throwable>(self->GetException(NULL));
   self->ClearException();
 
   // Pre-allocate a NoClassDefFoundError for the common case of failing to find a system class
   // ahead of checking the application's class loader.
   self->ThrowNewException(ThrowLocation(), "Ljava/lang/NoClassDefFoundError;",
                           "Class not found using the boot class loader; no stack trace available");
-  pre_allocated_NoClassDefFoundError_ = GcRoot<mirror::Throwable>(self->GetException(nullptr));
+  pre_allocated_NoClassDefFoundError_ = GcRoot<mirror::Throwable>(self->GetException(NULL));
   self->ClearException();
 
   // Look for a native bridge.
@@ -978,26 +976,26 @@
       env->NewGlobalRef(env->GetStaticObjectField(
           WellKnownClasses::java_lang_ThreadGroup,
           WellKnownClasses::java_lang_ThreadGroup_mainThreadGroup));
-  CHECK(main_thread_group_ != nullptr || IsCompiler());
+  CHECK(main_thread_group_ != NULL || IsCompiler());
   system_thread_group_ =
       env->NewGlobalRef(env->GetStaticObjectField(
           WellKnownClasses::java_lang_ThreadGroup,
           WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup));
-  CHECK(system_thread_group_ != nullptr || IsCompiler());
+  CHECK(system_thread_group_ != NULL || IsCompiler());
 }
 
 jobject Runtime::GetMainThreadGroup() const {
-  CHECK(main_thread_group_ != nullptr || IsCompiler());
+  CHECK(main_thread_group_ != NULL || IsCompiler());
   return main_thread_group_;
 }
 
 jobject Runtime::GetSystemThreadGroup() const {
-  CHECK(system_thread_group_ != nullptr || IsCompiler());
+  CHECK(system_thread_group_ != NULL || IsCompiler());
   return system_thread_group_;
 }
 
 jobject Runtime::GetSystemClassLoader() const {
-  CHECK(system_class_loader_ != nullptr || IsCompiler());
+  CHECK(system_class_loader_ != NULL || IsCompiler());
   return system_class_loader_;
 }
 
@@ -1123,12 +1121,12 @@
 
 bool Runtime::AttachCurrentThread(const char* thread_name, bool as_daemon, jobject thread_group,
                                   bool create_peer) {
-  return Thread::Attach(thread_name, as_daemon, thread_group, create_peer) != nullptr;
+  return Thread::Attach(thread_name, as_daemon, thread_group, create_peer) != NULL;
 }
 
 void Runtime::DetachCurrentThread() {
   Thread* self = Thread::Current();
-  if (self == nullptr) {
+  if (self == NULL) {
     LOG(FATAL) << "attempting to detach thread that is not attached";
   }
   if (self->HasManagedStack()) {
@@ -1353,7 +1351,7 @@
 }
 
 const std::vector<const DexFile*>& Runtime::GetCompileTimeClassPath(jobject class_loader) {
-  if (class_loader == nullptr) {
+  if (class_loader == NULL) {
     return GetClassLinker()->GetBootClassPath();
   }
   CHECK(UseCompileTimeClassPath());
diff --git a/runtime/runtime.h b/runtime/runtime.h
index e334764..39fd910 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -71,11 +71,6 @@
 class Trace;
 class Transaction;
 
-// 0 if not abort, non-zero if an abort is in progress. Used on fatal exit to prevents recursive
-// aborts. Global declaration allows us to disable some error checking to ensure fatal shutdown
-// makes forward progress.
-extern volatile unsigned int gAborting;
-
 typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
 
 // Not all combinations of flags are valid. You may not visit all roots as well as the new roots
@@ -180,9 +175,9 @@
     return instance_;
   }
 
-  // Aborts semi-cleanly. Used in the implementation of LOG(FATAL), which most callers should
-  // prefer. Not [[noreturn]] due to returning early in the case of recursive aborts.
-  static void Abort() LOCKS_EXCLUDED(Locks::abort_lock_);
+  // Aborts semi-cleanly. Used in the implementation of LOG(FATAL), which most
+  // callers should prefer.
+  [[noreturn]] static void Abort() LOCKS_EXCLUDED(Locks::abort_lock_);
 
   // Returns the "main" ThreadGroup, used when attaching user threads.
   jobject GetMainThreadGroup() const;
diff --git a/runtime/runtime_android.cc b/runtime/runtime_android.cc
index 33641ed..33600dd 100644
--- a/runtime/runtime_android.cc
+++ b/runtime/runtime_android.cc
@@ -38,6 +38,7 @@
     _exit(1);
   }
   handling_unexpected_signal = true;
+  gAborting++;  // set before taking any locks
   MutexLock mu(Thread::Current(), *Locks::unexpected_signal_lock_);
 
   Runtime* runtime = Runtime::Current();
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index 9273091..1de035c 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -284,6 +284,7 @@
   }
   handlingUnexpectedSignal = true;
 
+  gAborting++;  // set before taking any locks
   MutexLock mu(Thread::Current(), *Locks::unexpected_signal_lock_);
 
   bool has_address = (signal_number == SIGILL || signal_number == SIGBUS ||
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 49b7be9..7aed8b0 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -83,7 +83,9 @@
 
 inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
   if (kIsDebugBuild) {
-    CHECK_EQ(0u, tls32_.no_thread_suspension) << tlsPtr_.last_no_thread_suspension_cause;
+    if (gAborting == 0) {
+      CHECK_EQ(0u, tls32_.no_thread_suspension) << tlsPtr_.last_no_thread_suspension_cause;
+    }
     if (check_locks) {
       bool bad_mutexes_held = false;
       for (int i = kLockLevelCount - 1; i >= 0; --i) {
@@ -97,7 +99,9 @@
           }
         }
       }
-      CHECK(!bad_mutexes_held);
+      if (gAborting == 0) {
+        CHECK(!bad_mutexes_held);
+      }
     }
   }
 }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index a6e5b0c..f7c7106 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -742,7 +742,7 @@
 
   // Don't do this if we are aborting since the GC may have all the threads suspended. This will
   // cause ScopedObjectAccessUnchecked to deadlock.
-  if (self != nullptr && thread != nullptr && thread->tlsPtr_.opeer != nullptr) {
+  if (gAborting == 0 && self != nullptr && thread != nullptr && thread->tlsPtr_.opeer != nullptr) {
     ScopedObjectAccessUnchecked soa(self);
     priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)
         ->GetInt(thread->tlsPtr_.opeer);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 71325a5..beafcda 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -168,7 +168,9 @@
     const uint32_t kWaitTimeoutMs = 10000;
     bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kWaitTimeoutMs);
     if (timed_out) {
-      LOG(kIsDebugBuild ? FATAL : ERROR) << "Unexpected time out during dump checkpoint.";
+      // Avoid a recursive abort.
+      LOG((kIsDebugBuild && (gAborting == 0)) ? FATAL : ERROR)
+          << "Unexpected time out during dump checkpoint.";
     }
   }
 
@@ -241,7 +243,7 @@
   Locks::mutator_lock_->AssertNotExclusiveHeld(self);
   Locks::thread_list_lock_->AssertNotHeld(self);
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
-  if (kDebugLocking) {
+  if (kDebugLocking && gAborting == 0) {
     CHECK_NE(self->GetState(), kRunnable);
   }
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 1b3cc8f..c206b94 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -725,7 +725,16 @@
     /* Flag instructions that are garbage collection points */
     // All invoke points are marked as "Throw" points already.
     // We are relying on this to also count all the invokes as interesting.
-    if (inst->IsBranch() || inst->IsSwitch() || inst->IsThrow()) {
+    if (inst->IsBranch()) {
+      insn_flags_[dex_pc].SetCompileTimeInfoPoint();
+      // The compiler also needs safepoints for fall-through to loop heads.
+      // Such a loop head must be a target of a branch.
+      int32_t offset = 0;
+      bool cond, self_ok;
+      bool target_ok = GetBranchOffset(dex_pc, &offset, &cond, &self_ok);
+      DCHECK(target_ok);
+      insn_flags_[dex_pc + offset].SetCompileTimeInfoPoint();
+    } else if (inst->IsSwitch() || inst->IsThrow()) {
       insn_flags_[dex_pc].SetCompileTimeInfoPoint();
     } else if (inst->IsReturn()) {
       insn_flags_[dex_pc].SetCompileTimeInfoPointAndReturn();
diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
index 631c4be..40be56c 100644
--- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc
@@ -52,11 +52,11 @@
       // v2 is added because of the instruction at DexPC 0024. Object merges with 0 is Object. See:
       //   0024: move-object v3, v2
       //   0025: goto 0013
-      // Detaled dex instructions for ReferenceMap.java are at the end of this function.
+      // Detailed dex instructions for ReferenceMap.java are at the end of this function.
       // CHECK_REGS_CONTAIN_REFS(8, 3, 2, 1);  // v8: this, v3: y, v2: y, v1: x
       // We eliminate the non-live registers at a return, so only v3 is live.
       // Note that it is OK for a compiler to not have a dex map at this dex PC because
-      // a return is not a safepoint.
+      // a return is not necessarily a safepoint.
       CHECK_REGS_CONTAIN_REFS(0x13U, false);  // v3: y
       CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1, 0);  // v8: this, v2: y, v1: x, v0: ex
       CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1, 0);  // v8: this, v5: x[1], v2: y, v1: x, v0: ex
@@ -68,8 +68,10 @@
       CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x29U, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
       CHECK_REGS_CONTAIN_REFS(0x2cU, true, 8, 4, 2, 1);  // v8: this, v4: ex, v2: y, v1: x
-      CHECK_REGS_CONTAIN_REFS(0x2fU, true, 8, 4, 3, 2, 1);  // v8: this, v4: ex, v3: y, v2: y, v1: x
-      CHECK_REGS_CONTAIN_REFS(0x32U, true, 8, 3, 2, 1, 0);  // v8: this, v3: y, v2: y, v1: x, v0: ex
+      // Note that it is OK for a compiler to not have a dex map at these two dex PCs because
+      // a goto is not necessarily a safepoint.
+      CHECK_REGS_CONTAIN_REFS(0x2fU, false, 8, 4, 3, 2, 1);  // v8: this, v4: ex, v3: y, v2: y, v1: x
+      CHECK_REGS_CONTAIN_REFS(0x32U, false, 8, 3, 2, 1, 0);  // v8: this, v3: y, v2: y, v1: x, v0: ex
     }
 
     return true;
diff --git a/test/109-suspend-check/src/Main.java b/test/109-suspend-check/src/Main.java
index ae10576..cd5130d 100644
--- a/test/109-suspend-check/src/Main.java
+++ b/test/109-suspend-check/src/Main.java
@@ -21,10 +21,15 @@
         System.out.println("Running (" + TEST_TIME + " seconds) ...");
         InfiniteForLoop forLoop = new InfiniteForLoop();
         InfiniteWhileLoop whileLoop = new InfiniteWhileLoop();
+        InfiniteWhileLoopWithIntrinsic whileLoopWithIntrinsic =
+            new InfiniteWhileLoopWithIntrinsic();
+        InfiniteDoWhileLoopWithLong doWhileLoopWithLong = new InfiniteDoWhileLoopWithLong();
         InfiniteDoWhileLoop doWhileLoop = new InfiniteDoWhileLoop();
         MakeGarbage garbage = new MakeGarbage();
         forLoop.start();
         whileLoop.start();
+        whileLoopWithIntrinsic.start();
+        doWhileLoopWithLong.start();
         doWhileLoop.start();
         garbage.start();
         for (int i = 0; i < TEST_TIME; i++) {
@@ -34,6 +39,8 @@
         }
         forLoop.stopNow();
         whileLoop.stopNow();
+        whileLoopWithIntrinsic.stopNow();
+        doWhileLoopWithLong.stopNow();
         doWhileLoop.stopNow();
         garbage.stopNow();
         System.out.println("Done.");
@@ -48,6 +55,35 @@
     }
 }
 
+class InfiniteWhileLoopWithIntrinsic extends Thread {
+  volatile private boolean keepGoing = true;
+  private String[] strings = { "a", "b", "c", "d" };
+  private int sum = 0;
+  public void run() {
+    int i = 0;
+    while (keepGoing) {
+      i++;
+      sum += strings[i & 3].length();
+    }
+  }
+  public void stopNow() {
+    keepGoing = false;
+  }
+}
+
+class InfiniteDoWhileLoopWithLong extends Thread {
+  volatile private long keepGoing = 7L;
+  public void run() {
+    int i = 0;
+    do {
+      i++;
+    } while (keepGoing >= 4L);
+  }
+  public void stopNow() {
+    keepGoing = 1L;
+  }
+}
+
 class InfiniteWhileLoop extends Thread {
   volatile private boolean keepGoing = true;
   public void run() {
diff --git a/test/436-shift-constant/expected.txt b/test/436-shift-constant/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/436-shift-constant/expected.txt
diff --git a/test/436-shift-constant/info.txt b/test/436-shift-constant/info.txt
new file mode 100644
index 0000000..dc20646
--- /dev/null
+++ b/test/436-shift-constant/info.txt
@@ -0,0 +1 @@
+Regression tests for shift instructions and constants larger than 8bits.
diff --git a/test/436-shift-constant/src/Main.java b/test/436-shift-constant/src/Main.java
new file mode 100644
index 0000000..e69f64b
--- /dev/null
+++ b/test/436-shift-constant/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    assertEquals(0x80000000, doShiftInt(1));
+    assertEquals(0x8000000000000000L, doShiftLong(1L));
+  }
+
+  public static int doShiftInt(int value) {
+    return value << 0xFFFF;
+  }
+
+  public static long doShiftLong(long value) {
+    return value << 0xFFFF;
+  }
+
+  public static void assertEquals(int a, int b) {
+    if (a != b) {
+      throw new Error("Expected " + a + ", got " + b);
+    }
+  }
+
+  public static void assertEquals(long a, long b) {
+    if (a != b) {
+      throw new Error("Expected " + a + ", got " + b);
+    }
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 0f7001f..5f86f1e 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -8,4 +8,5 @@
 invoke-super abstract
 BadCaseInOpRegRegReg
 CmpLong
+FloatIntConstPassing
 Done!
diff --git a/test/800-smali/smali/FloatIntConstPassing.smali b/test/800-smali/smali/FloatIntConstPassing.smali
new file mode 100644
index 0000000..a2916c5
--- /dev/null
+++ b/test/800-smali/smali/FloatIntConstPassing.smali
@@ -0,0 +1,29 @@
+.class public LFloatIntConstPassing;
+
+.super Ljava/lang/Object;
+
+.method public static getInt(I)I
+  .registers 2
+  const/4 v0, 1
+  add-int/2addr v0, p0
+  return v0
+.end method
+
+.method public static getFloat(F)F
+  .registers 2
+  const/4 v0, 0
+  mul-float/2addr v0, p0
+  return v0
+.end method
+
+.method public static run()I
+  .registers 3
+  const/4 v0, 1
+  invoke-static {v0}, LFloatIntConstPassing;->getInt(I)I
+  move-result v1
+  invoke-static {v0}, LFloatIntConstPassing;->getFloat(F)F
+  move-result v2
+  float-to-int v2, v2
+  add-int/2addr v1, v2
+  return v1
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index f2c1ab5..a2db051 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -64,6 +64,7 @@
             new Object[]{0}, new AbstractMethodError(), null));
         testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2));
         testCases.add(new TestCase("CmpLong", "CmpLong", "run", null, null, 0));
+        testCases.add(new TestCase("FloatIntConstPassing", "FloatIntConstPassing", "run", null, null, 2));
     }
 
     public void runTests() {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c3fec5d..d2a0f4c 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -303,6 +303,7 @@
   005-annotations \
   009-instanceof \
   010-instance \
+  012-math \
   023-many-interfaces \
   044-proxy \
   045-reflect-array \