Quick: Redefine the notion of back-egdes.

Redefine a back-edge to really mean an edge to a loop head
instead of comparing instruction offsets. Generate suspend
checks also on fall-through to a loop head; insert an extra
GOTO for these edges.

Add suspend checks to fused cmp instructions.

Rewrite suspend check elimination to track whether there is
an invoke on each path from the loop head to a given back
edge, instead of using domination info to look for a basic
block with invoke that must be on each path. Ignore invokes
to intrinsics and move the optimization to a its own pass.

The new loops in 109-suspend-check should prevent intrinsics
and fused cmp-related regressions.

Bug: 18522004
Change-Id: I96ac818f76ccf9419a6e70e9ec00555f9d487a9e
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 764a4cf..0407e32 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -297,6 +297,41 @@
   void Start(PassDataHolder* data) const;
 };
 
+/**
+ * @class SuspendCheckElimination
+ * @brief Any simple BasicBlock optimization can be put here.
+ */
+class SuspendCheckElimination : public PassME {
+ public:
+  SuspendCheckElimination()
+    : PassME("SuspendCheckElimination", kTopologicalSortTraversal, "6_post_sce_cfg") {
+  }
+
+  bool Gate(const PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    return c_unit->mir_graph->EliminateSuspendChecksGate();
+  }
+
+  bool Worker(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
+    CompilationUnit* c_unit = pass_me_data_holder->c_unit;
+    DCHECK(c_unit != nullptr);
+    BasicBlock* bb = pass_me_data_holder->bb;
+    DCHECK(bb != nullptr);
+    return c_unit->mir_graph->EliminateSuspendChecks(bb);
+  }
+
+  void End(PassDataHolder* data) const {
+    DCHECK(data != nullptr);
+    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+    DCHECK(c_unit != nullptr);
+    c_unit->mir_graph->EliminateSuspendChecksEnd();
+  }
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 3f6231c..fb5dc9c 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -47,6 +47,7 @@
   // (1 << kTrackLiveTemps) |
   // (1 << kSafeOptimizations) |
   // (1 << kBBOpt) |
+  // (1 << kSuspendCheckElimination) |
   // (1 << kMatch) |
   // (1 << kPromoteCompilerTemps) |
   // (1 << kSuppressExceptionEdges) |
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
index bed3b97..1145818 100644
--- a/compiler/dex/frontend.h
+++ b/compiler/dex/frontend.h
@@ -46,6 +46,7 @@
   kTrackLiveTemps,
   kSafeOptimizations,
   kBBOpt,
+  kSuspendCheckElimination,
   kMatch,
   kPromoteCompilerTemps,
   kBranchFusing,
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 5b7ac3c..64895d8 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1343,7 +1343,7 @@
  * counts explicitly used s_regs.  A later phase will add implicit
  * counts for things such as Method*, null-checked references, etc.
  */
-void MIRGraph::CountUses(class BasicBlock* bb) {
+void MIRGraph::CountUses(BasicBlock* bb) {
   if (bb->block_type != kDalvikByteCode) {
     return;
   }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 023abca..046cfdc 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -127,7 +127,7 @@
       ifield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
       sfield_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
       method_lowering_infos_(arena->Adapter(kArenaAllocLoweringInfo)),
-      gen_suspend_test_list_(arena->Adapter()) {
+      suspend_checks_in_loops_(nullptr) {
   memset(&temp_, 0, sizeof(temp_));
   use_counts_.reserve(256);
   raw_use_counts_.reserve(256);
@@ -1941,6 +1941,7 @@
     DCHECK_EQ(bb->hidden, false);
     DCHECK_EQ(bb->visited, false);
     bb->visited = true;
+    bb->nesting_depth = loop_head_stack.size();
 
     // Now add the basic block.
     uint16_t idx = static_cast<uint16_t>(topological_order_.size());
@@ -1982,24 +1983,6 @@
   return false;
 }
 
-bool MIRGraph::HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id) {
-  BasicBlock* target = GetBasicBlock(target_id);
-
-  if (source == nullptr || target == nullptr)
-    return false;
-
-  int idx;
-  for (idx = gen_suspend_test_list_.size() - 1; idx >= 0; idx--) {
-    BasicBlock* bb = gen_suspend_test_list_[idx];
-    if (bb == source)
-      return true;  // The block has been inserted by a suspend check before.
-    if (source->dominators->IsBitSet(bb->id) && bb->dominators->IsBitSet(target_id))
-      return true;
-  }
-
-  return false;
-}
-
 ChildBlockIterator::ChildBlockIterator(BasicBlock* bb, MIRGraph* mir_graph)
     : basic_block_(bb), mir_graph_(mir_graph), visited_fallthrough_(false),
       visited_taken_(false), have_successors_(false) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 1a18841..208cbdf 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -34,6 +34,7 @@
 
 namespace art {
 
+class DexFileMethodInliner;
 class GlobalValueNumbering;
 
 enum DataFlowAttributePos {
@@ -131,11 +132,9 @@
 
 enum OatMethodAttributes {
   kIsLeaf,            // Method is leaf.
-  kHasLoop,           // Method contains simple loop.
 };
 
 #define METHOD_IS_LEAF          (1 << kIsLeaf)
-#define METHOD_HAS_LOOP         (1 << kHasLoop)
 
 // Minimum field size to contain Dalvik v_reg number.
 #define VREG_NUM_WIDTH 16
@@ -731,6 +730,10 @@
     return max_nested_loops_;
   }
 
+  bool IsLoopHead(BasicBlockId bb_id) {
+    return topological_order_loop_ends_[topological_order_indexes_[bb_id]] != 0u;
+  }
+
   bool IsConst(int32_t s_reg) const {
     return is_constant_v_->IsBitSet(s_reg);
   }
@@ -969,13 +972,23 @@
     return reg_location_[method_sreg_];
   }
 
-  bool IsBackedge(BasicBlock* branch_bb, BasicBlockId target_bb_id) {
-    return ((target_bb_id != NullBasicBlockId) &&
-            (GetBasicBlock(target_bb_id)->start_offset <= branch_bb->start_offset));
+  bool IsBackEdge(BasicBlock* branch_bb, BasicBlockId target_bb_id) {
+    DCHECK_NE(target_bb_id, NullBasicBlockId);
+    DCHECK_LT(target_bb_id, topological_order_indexes_.size());
+    DCHECK_LT(branch_bb->id, topological_order_indexes_.size());
+    return topological_order_indexes_[target_bb_id] <= topological_order_indexes_[branch_bb->id];
   }
 
-  bool IsBackwardsBranch(BasicBlock* branch_bb) {
-    return IsBackedge(branch_bb, branch_bb->taken) || IsBackedge(branch_bb, branch_bb->fall_through);
+  bool IsSuspendCheckEdge(BasicBlock* branch_bb, BasicBlockId target_bb_id) {
+    if (!IsBackEdge(branch_bb, target_bb_id)) {
+      return false;
+    }
+    if (suspend_checks_in_loops_ == nullptr) {
+      // We didn't run suspend check elimination.
+      return true;
+    }
+    uint16_t target_depth = GetBasicBlock(target_bb_id)->nesting_depth;
+    return (suspend_checks_in_loops_[branch_bb->id] & (1u << (target_depth - 1u))) == 0;
   }
 
   void CountBranch(DexOffset target_offset) {
@@ -1055,6 +1068,9 @@
   bool ApplyGlobalValueNumberingGate();
   bool ApplyGlobalValueNumbering(BasicBlock* bb);
   void ApplyGlobalValueNumberingEnd();
+  bool EliminateSuspendChecksGate();
+  bool EliminateSuspendChecks(BasicBlock* bb);
+  void EliminateSuspendChecksEnd();
 
   uint16_t GetGvnIFieldId(MIR* mir) const {
     DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
@@ -1165,7 +1181,7 @@
    * @brief Count the uses in the BasicBlock
    * @param bb the BasicBlock
    */
-  void CountUses(class BasicBlock* bb);
+  void CountUses(BasicBlock* bb);
 
   static uint64_t GetDataFlowAttributes(Instruction::Code opcode);
   static uint64_t GetDataFlowAttributes(MIR* mir);
@@ -1209,20 +1225,6 @@
   void HandleSSADef(int* defs, int dalvik_reg, int reg_index);
   bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed);
 
-  // Used for removing redudant suspend tests
-  void AppendGenSuspendTestList(BasicBlock* bb) {
-    if (gen_suspend_test_list_.size() == 0 ||
-        gen_suspend_test_list_.back() != bb) {
-      gen_suspend_test_list_.push_back(bb);
-    }
-  }
-
-  /* This is used to check if there is already a method call dominating the
-   * source basic block of a backedge and being dominated by the target basic
-   * block of the backedge.
-   */
-  bool HasSuspendTestBetween(BasicBlock* source, BasicBlockId target_id);
-
  protected:
   int FindCommonParent(int block1, int block2);
   void ComputeSuccLineIn(ArenaBitVector* dest, const ArenaBitVector* src1,
@@ -1338,6 +1340,10 @@
       uint16_t* ifield_ids_;  // Part of GVN/LVN but cached here for LVN to avoid recalculation.
       uint16_t* sfield_ids_;  // Ditto.
     } gvn;
+    // Suspend check elimination.
+    struct {
+      DexFileMethodInliner* inliner;
+    } sce;
   } temp_;
   static const int kInvalidEntry = -1;
   ArenaVector<BasicBlock*> block_list_;
@@ -1373,11 +1379,19 @@
   ArenaVector<MirIFieldLoweringInfo> ifield_lowering_infos_;
   ArenaVector<MirSFieldLoweringInfo> sfield_lowering_infos_;
   ArenaVector<MirMethodLoweringInfo> method_lowering_infos_;
+
+  // In the suspend check elimination pass we determine for each basic block and enclosing
+  // loop whether there's guaranteed to be a suspend check on the path from the loop head
+  // to this block. If so, we can eliminate the back-edge suspend check.
+  // The bb->id is index into suspend_checks_in_loops_ and the loop head's depth is bit index
+  // in a suspend_checks_in_loops_[bb->id].
+  uint32_t* suspend_checks_in_loops_;
+
   static const uint64_t oat_data_flow_attributes_[kMirOpLast];
-  ArenaVector<BasicBlock*> gen_suspend_test_list_;  // List of blocks containing suspend tests
 
   friend class MirOptimizationTest;
   friend class ClassInitCheckEliminationTest;
+  friend class SuspendCheckEliminationTest;
   friend class NullCheckEliminationTest;
   friend class GlobalValueNumberingTest;
   friend class LocalValueNumberingTest;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 55f2abc..5c4790a 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -539,28 +539,6 @@
             }
           }
           break;
-        case Instruction::RETURN_VOID:
-        case Instruction::RETURN:
-        case Instruction::RETURN_WIDE:
-        case Instruction::RETURN_OBJECT:
-          if (bb->GetFirstNonPhiInsn() == mir) {
-            // This is a simple return BB. Eliminate suspend checks on predecessor back-edges.
-            for (BasicBlockId pred_id : bb->predecessors) {
-              BasicBlock* pred_bb = GetBasicBlock(pred_id);
-              DCHECK(pred_bb != nullptr);
-              if (IsBackedge(pred_bb, bb->id) && pred_bb->last_mir_insn != nullptr &&
-                  (IsInstructionIfCc(pred_bb->last_mir_insn->dalvikInsn.opcode) ||
-                   IsInstructionIfCcZ(pred_bb->last_mir_insn->dalvikInsn.opcode) ||
-                   IsInstructionGoto(pred_bb->last_mir_insn->dalvikInsn.opcode))) {
-                pred_bb->last_mir_insn->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
-                if (cu_->verbose) {
-                  LOG(INFO) << "Suppressed suspend check on branch to return at 0x" << std::hex
-                            << pred_bb->last_mir_insn->offset;
-                }
-              }
-            }
-          }
-          break;
         default:
           break;
       }
@@ -589,12 +567,8 @@
         if ((tk_ft == NULL) && (ft_tk == NULL) && (tk_tk == ft_ft) &&
             (Predecessors(tk) == 1) && (Predecessors(ft) == 1)) {
           /*
-           * Okay - we have the basic diamond shape.  At the very least, we can eliminate the
-           * suspend check on the taken-taken branch back to the join point.
+           * Okay - we have the basic diamond shape.
            */
-          if (SelectKind(tk->last_mir_insn) == kSelectGoto) {
-              tk->last_mir_insn->optimization_flags |= (MIR_IGNORE_SUSPEND_CHECK);
-          }
 
           // TODO: Add logic for LONG.
           // Are the block bodies something we can handle?
@@ -1637,4 +1611,103 @@
   temp_scoped_alloc_.reset();
 }
 
+bool MIRGraph::EliminateSuspendChecksGate() {
+  if ((cu_->disable_opt & (1 << kSuspendCheckElimination)) != 0 ||  // Disabled.
+      GetMaxNestedLoops() == 0u ||   // Nothing to do.
+      GetMaxNestedLoops() >= 32u ||  // Only 32 bits in suspend_checks_in_loops_[.].
+                                     // Exclude 32 as well to keep bit shifts well-defined.
+      !HasInvokes()) {               // No invokes to actually eliminate any suspend checks.
+    return false;
+  }
+  if (cu_->compiler_driver != nullptr && cu_->compiler_driver->GetMethodInlinerMap() != nullptr) {
+    temp_.sce.inliner =
+        cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
+  }
+  suspend_checks_in_loops_ = static_cast<uint32_t*>(
+      arena_->Alloc(GetNumBlocks() * sizeof(*suspend_checks_in_loops_), kArenaAllocMisc));
+  return true;
+}
+
+bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) {
+  if (bb->block_type != kDalvikByteCode) {
+    return false;
+  }
+  DCHECK_EQ(GetTopologicalSortOrderLoopHeadStack()->size(), bb->nesting_depth);
+  if (bb->nesting_depth == 0u) {
+    // Out of loops.
+    DCHECK_EQ(suspend_checks_in_loops_[bb->id], 0u);  // The array was zero-initialized.
+    return false;
+  }
+  uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u;  // Start with all loop heads.
+  bool found_invoke = false;
+  for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+    if (IsInstructionInvoke(mir->dalvikInsn.opcode) &&
+        (temp_.sce.inliner == nullptr ||
+         !temp_.sce.inliner->IsIntrinsic(mir->dalvikInsn.vB, nullptr))) {
+      // Non-intrinsic invoke, rely on a suspend point in the invoked method.
+      found_invoke = true;
+      break;
+    }
+  }
+  if (!found_invoke) {
+    // Intersect suspend checks from predecessors.
+    uint16_t bb_topo_idx = topological_order_indexes_[bb->id];
+    uint32_t pred_mask_union = 0u;
+    for (BasicBlockId pred_id : bb->predecessors) {
+      uint16_t pred_topo_idx = topological_order_indexes_[pred_id];
+      if (pred_topo_idx < bb_topo_idx) {
+        // Determine the loop depth of the predecessors relative to this block.
+        size_t pred_loop_depth = topological_order_loop_head_stack_.size();
+        while (pred_loop_depth != 0u &&
+            pred_topo_idx < topological_order_loop_head_stack_[pred_loop_depth - 1].first) {
+          --pred_loop_depth;
+        }
+        DCHECK_LE(pred_loop_depth, GetBasicBlock(pred_id)->nesting_depth);
+        uint32_t pred_mask = (1u << pred_loop_depth) - 1u;
+        // Intersect pred_mask bits in suspend_checks_in_loops with
+        // suspend_checks_in_loops_[pred_id].
+        uint32_t pred_loops_without_checks = pred_mask & ~suspend_checks_in_loops_[pred_id];
+        suspend_checks_in_loops = suspend_checks_in_loops & ~pred_loops_without_checks;
+        pred_mask_union |= pred_mask;
+      }
+    }
+    DCHECK_EQ(((1u << (IsLoopHead(bb->id) ? bb->nesting_depth - 1u: bb->nesting_depth)) - 1u),
+              pred_mask_union);
+    suspend_checks_in_loops &= pred_mask_union;
+  }
+  suspend_checks_in_loops_[bb->id] = suspend_checks_in_loops;
+  if (suspend_checks_in_loops == 0u) {
+    return false;
+  }
+  // Apply MIR_IGNORE_SUSPEND_CHECK if appropriate.
+  if (bb->taken != NullBasicBlockId) {
+    DCHECK(bb->last_mir_insn != nullptr);
+    DCHECK(IsInstructionIfCc(bb->last_mir_insn->dalvikInsn.opcode) ||
+           IsInstructionIfCcZ(bb->last_mir_insn->dalvikInsn.opcode) ||
+           IsInstructionGoto(bb->last_mir_insn->dalvikInsn.opcode) ||
+           (static_cast<int>(bb->last_mir_insn->dalvikInsn.opcode) >= kMirOpFusedCmplFloat &&
+            static_cast<int>(bb->last_mir_insn->dalvikInsn.opcode) <= kMirOpFusedCmpLong));
+    if (!IsSuspendCheckEdge(bb, bb->taken) &&
+        (bb->fall_through == NullBasicBlockId || !IsSuspendCheckEdge(bb, bb->fall_through))) {
+      bb->last_mir_insn->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
+    }
+  } else if (bb->fall_through != NullBasicBlockId && IsSuspendCheckEdge(bb, bb->fall_through)) {
+    // We've got a fall-through suspend edge. Add an artificial GOTO to force suspend check.
+    MIR* mir = NewMIR();
+    mir->dalvikInsn.opcode = Instruction::GOTO;
+    mir->dalvikInsn.vA = 0;  // Branch offset.
+    mir->offset = GetBasicBlock(bb->fall_through)->start_offset;
+    mir->m_unit_index = current_method_;
+    mir->ssa_rep = reinterpret_cast<SSARepresentation*>(
+        arena_->Alloc(sizeof(SSARepresentation), kArenaAllocDFInfo));  // Zero-initialized.
+    bb->AppendMIR(mir);
+    std::swap(bb->fall_through, bb->taken);  // The fall-through has become taken.
+  }
+  return true;
+}
+
+void MIRGraph::EliminateSuspendChecksEnd() {
+  temp_.sce.inliner = nullptr;
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index c794cc6..6c2e9c0 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -88,6 +88,8 @@
     { bb, opcode, 0u, vA, vB, vC }
 #define DEF_INVOKE(bb, opcode, vC, method_info) \
     { bb, opcode, method_info, 0u, 0u, vC }
+#define DEF_OTHER0(bb, opcode) \
+    { bb, opcode, 0u, 0u, 0u, 0u }
 #define DEF_OTHER1(bb, opcode, vA) \
     { bb, opcode, 0u, vA, 0u, 0u }
 #define DEF_OTHER2(bb, opcode, vA, vB) \
@@ -175,6 +177,56 @@
     PrepareBasicBlocks(bbs);
   }
 
+  void PrepareNestedLoopsWhile_While() {
+    static const BBDef bbs[] = {
+        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(8)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 8), DEF_PRED2(3, 7)),  // Outer while loop head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),  // Inner while loop head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),        // "taken" loops to inner head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(5)),        // "taken" loops to outer head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+    };
+    PrepareBasicBlocks(bbs);
+  }
+
+  void PrepareNestedLoopsWhile_WhileWhile() {
+    static const BBDef bbs[] = {
+        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(10)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 10), DEF_PRED2(3, 9)),   // Outer while loop head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),    // Inner while loop head 1.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED1(5)),          // Loops to inner head 1.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED2(5, 8)),    // Inner while loop head 2.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED1(7)),          // loops to inner head 2.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(7)),          // loops to outer head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+    };
+    PrepareBasicBlocks(bbs);
+  }
+
+  void PrepareNestedLoopsWhile_WhileWhile_WithExtraEdge() {
+    // Extra edge from the first inner loop body to second inner loop body (6u->8u).
+    static const BBDef bbs[] = {
+        DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+        DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+        DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(10)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 10), DEF_PRED2(3, 9)),   // Outer while loop head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(6, 7), DEF_PRED2(4, 6)),    // Inner while loop head 1.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 8), DEF_PRED1(5)),       // Loops to inner head 1.
+        DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 9), DEF_PRED2(5, 8)),    // Inner while loop head 2.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(7), DEF_PRED2(7, 6)),       // loops to inner head 2.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(7)),          // loops to outer head.
+        DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+    };
+    PrepareBasicBlocks(bbs);
+  }
+
   void PrepareCatch() {
     static const BBDef bbs[] = {
         DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
@@ -397,6 +449,43 @@
   }
 };
 
+class SuspendCheckEliminationTest : public MirOptimizationTest {
+ protected:
+  bool IsBackEdge(BasicBlockId branch_bb, BasicBlockId target_bb) {
+    BasicBlock* branch = cu_.mir_graph->GetBasicBlock(branch_bb);
+    return target_bb != NullBasicBlockId && cu_.mir_graph->IsBackEdge(branch, target_bb);
+  }
+
+  bool IsSuspendCheckEdge(BasicBlockId branch_bb, BasicBlockId target_bb) {
+    BasicBlock* branch = cu_.mir_graph->GetBasicBlock(branch_bb);
+    return cu_.mir_graph->IsSuspendCheckEdge(branch, target_bb);
+  }
+
+  void PerformSuspendCheckElimination() {
+    cu_.mir_graph->SSATransformationStart();
+    cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->ComputeDominators();
+    cu_.mir_graph->ComputeTopologicalSortOrder();
+    cu_.mir_graph->SSATransformationEnd();
+    bool gate_result = cu_.mir_graph->EliminateSuspendChecksGate();
+    ASSERT_TRUE(gate_result);
+    TopologicalSortIterator iterator(cu_.mir_graph.get());
+    bool change = false;
+    for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
+      change = cu_.mir_graph->EliminateSuspendChecks(bb);
+    }
+    cu_.mir_graph->EliminateSuspendChecksEnd();
+  }
+
+  SuspendCheckEliminationTest()
+      : MirOptimizationTest() {
+    static const MethodDef methods[] = {
+        { 0u, 1u, 0u, 0u, kDirect, kDirect, false, false },  // Dummy.
+    };
+    PrepareMethods(methods);
+  }
+};
+
 TEST_F(ClassInitCheckEliminationTest, SingleBlock) {
   static const SFieldDef sfields[] = {
       { 0u, 1u, 0u, 0u, kDexMemAccessWord },
@@ -882,7 +971,208 @@
   }
 }
 
-// Undefine MIR_DEF for null check elimination.
-#undef MIR_DEF
+TEST_F(SuspendCheckEliminationTest, LoopNoElimination) {
+  static const MIRDef mirs[] = {
+    DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u, 0u),  // Force the pass to run.
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge back.
+  };
+
+  PrepareLoop();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(4u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(4u, 4u));  // Suspend point on loop to self.
+}
+
+TEST_F(SuspendCheckEliminationTest, LoopElimination) {
+  static const MIRDef mirs[] = {
+    DEF_INVOKE(4u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in the loop.
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge back.
+  };
+
+  PrepareLoop();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(4u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(4u, 4u));  // No suspend point on loop to self.
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_NoElimination) {
+  static const MIRDef mirs[] = {
+    DEF_INVOKE(3u, Instruction::INVOKE_STATIC, 0u, 0u),  // Force the pass to run.
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_InvokeInOuterLoopHead) {
+  static const MIRDef mirs[] = {
+    DEF_INVOKE(4u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in outer loop head.
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_InvokeInOuterLoopBody) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_INVOKE(7u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in outer loop body.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_InvokeInInnerLoopHead) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_INVOKE(5u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in inner loop head.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_While_InvokeInInnerLoopBody) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop.
+    DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in inner loop body.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER0(7u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_While();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(7u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(7u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_WhileWhile_InvokeInFirstInnerLoopHead) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_INVOKE(5u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in first inner loop head.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
+    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
+    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_WhileWhile();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(8u, 7u));
+  EXPECT_TRUE(IsSuspendCheckEdge(8u, 7u));
+  ASSERT_TRUE(IsBackEdge(9u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(9u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_WhileWhile_InvokeInFirstInnerLoopBody) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
+    DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in first inner loop body.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
+    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
+    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_WhileWhile();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(8u, 7u));
+  EXPECT_TRUE(IsSuspendCheckEdge(8u, 7u));
+  ASSERT_TRUE(IsBackEdge(9u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(9u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_WhileWhile_WithExtraEdge_InvokeInFirstInnerLoopBody) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
+    DEF_INVOKE(6u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in first inner loop body.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
+    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
+    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_WhileWhile_WithExtraEdge();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_FALSE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(8u, 7u));
+  EXPECT_TRUE(IsSuspendCheckEdge(8u, 7u));  // Unaffected by the extra edge.
+  ASSERT_TRUE(IsBackEdge(9u, 4u));
+  EXPECT_TRUE(IsSuspendCheckEdge(9u, 4u));
+}
+
+TEST_F(SuspendCheckEliminationTest, While_WhileWhile_WithExtraEdge_InvokeInSecondInnerLoopHead) {
+  static const MIRDef mirs[] = {
+    DEF_OTHER1(4u, Instruction::IF_NEZ, 1u),             // Edge out of outer loop.
+    DEF_OTHER1(5u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 1.
+    DEF_OTHER0(6u, Instruction::GOTO),                   // Edge back to inner loop head.
+    DEF_INVOKE(7u, Instruction::INVOKE_STATIC, 0u, 0u),  // Invoke in second inner loop head.
+    DEF_OTHER1(7u, Instruction::IF_NEZ, 2u),             // Edge out of inner loop 2.
+    DEF_OTHER0(8u, Instruction::GOTO),                   // Edge back to inner loop 2 head.
+    DEF_OTHER0(9u, Instruction::GOTO),                   // Edge back to outer loop head.
+  };
+
+  PrepareNestedLoopsWhile_WhileWhile_WithExtraEdge();
+  PrepareMIRs(mirs);
+  PerformSuspendCheckElimination();
+  ASSERT_TRUE(IsBackEdge(6u, 5u));
+  EXPECT_TRUE(IsSuspendCheckEdge(6u, 5u));
+  ASSERT_TRUE(IsBackEdge(8u, 7u));
+  EXPECT_FALSE(IsSuspendCheckEdge(8u, 7u));  // Unaffected by the extra edge.
+  ASSERT_TRUE(IsBackEdge(9u, 4u));
+  EXPECT_FALSE(IsSuspendCheckEdge(9u, 4u));
+}
 
 }  // namespace art
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index a2bf8b4..c476b2a 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -46,6 +46,7 @@
   GetPassInstance<TypeInference>(),
   GetPassInstance<GlobalValueNumberingPass>(),
   GetPassInstance<BBOptimizations>(),
+  GetPassInstance<SuspendCheckElimination>(),
 };
 
 // The number of the passes in the initial list of Passes (g_passes).
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 774176e..50014b0 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -2163,18 +2163,15 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTest(int opt_flags) {
+  if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK) != 0) {
+    return;
+  }
   if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitSuspendChecks()) {
-    if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
-      return;
-    }
     FlushAllRegs();
     LIR* branch = OpTestSuspend(NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
     AddSlowPath(new (arena_) SuspendCheckSlowPath(this, branch, cont));
   } else {
-    if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
-      return;
-    }
     FlushAllRegs();     // TODO: needed?
     LIR* inst = CheckSuspendUsingLoad();
     MarkSafepointPC(inst);
@@ -2183,11 +2180,11 @@
 
 /* Check if we need to check for pending suspend request */
 void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) {
+  if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK) != 0) {
+    OpUnconditionalBranch(target);
+    return;
+  }
   if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitSuspendChecks()) {
-    if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
-      OpUnconditionalBranch(target);
-      return;
-    }
     OpTestSuspend(target);
     FlushAllRegs();
     LIR* branch = OpUnconditionalBranch(nullptr);
@@ -2195,10 +2192,6 @@
   } else {
     // For the implicit suspend check, just perform the trigger
     // load and branch to the target.
-    if (NO_SUSPEND || (opt_flags & MIR_IGNORE_SUSPEND_CHECK)) {
-      OpUnconditionalBranch(target);
-      return;
-    }
     FlushAllRegs();
     LIR* inst = CheckSuspendUsingLoad();
     MarkSafepointPC(inst);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 320c0f4..d398830 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -623,8 +623,7 @@
     case Instruction::GOTO:
     case Instruction::GOTO_16:
     case Instruction::GOTO_32:
-      if (mir_graph_->IsBackedge(bb, bb->taken) &&
-          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken))) {
+      if (mir_graph_->IsBackEdge(bb, bb->taken)) {
         GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken]);
       } else {
         OpUnconditionalBranch(&label_list[bb->taken]);
@@ -656,12 +655,10 @@
     case Instruction::IF_GE:
     case Instruction::IF_GT:
     case Instruction::IF_LE: {
-      LIR* taken = &label_list[bb->taken];
-      if (mir_graph_->IsBackwardsBranch(bb) &&
-          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
-           !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
         GenSuspendTest(opt_flags);
       }
+      LIR* taken = &label_list[bb->taken];
       GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken);
       break;
     }
@@ -671,12 +668,10 @@
     case Instruction::IF_GEZ:
     case Instruction::IF_GTZ:
     case Instruction::IF_LEZ: {
-      LIR* taken = &label_list[bb->taken];
-      if (mir_graph_->IsBackwardsBranch(bb) &&
-          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
-           !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
         GenSuspendTest(opt_flags);
       }
+      LIR* taken = &label_list[bb->taken];
       GenCompareZeroAndBranch(opcode, rl_src[0], taken);
       break;
     }
@@ -845,69 +840,37 @@
 
     case Instruction::INVOKE_STATIC_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, true));
-      if (!kLeafOptimization) {
-        // If the invocation is not inlined, we can assume there is already a
-        // suspend check at the return site
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_STATIC:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::INVOKE_DIRECT:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_DIRECT_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, true));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::INVOKE_VIRTUAL:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_VIRTUAL_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, true));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::INVOKE_SUPER:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_SUPER_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, true));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::INVOKE_INTERFACE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, false));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
     case Instruction::INVOKE_INTERFACE_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, true));
-      if (!kLeafOptimization) {
-        mir_graph_->AppendGenSuspendTestList(bb);
-      }
       break;
 
     case Instruction::NEG_INT:
@@ -1108,18 +1071,33 @@
       break;
     }
     case kMirOpFusedCmplFloat:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedFPCmpBranch(bb, mir, false /*gt bias*/, false /*double*/);
       break;
     case kMirOpFusedCmpgFloat:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedFPCmpBranch(bb, mir, true /*gt bias*/, false /*double*/);
       break;
     case kMirOpFusedCmplDouble:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedFPCmpBranch(bb, mir, false /*gt bias*/, true /*double*/);
       break;
     case kMirOpFusedCmpgDouble:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedFPCmpBranch(bb, mir, true /*gt bias*/, true /*double*/);
       break;
     case kMirOpFusedCmpLong:
+      if (mir_graph_->IsBackEdge(bb, bb->taken) || mir_graph_->IsBackEdge(bb, bb->fall_through)) {
+        GenSuspendTest(mir->optimization_flags);
+      }
       GenFusedLongCmpBranch(bb, mir);
       break;
     case kMirOpSelect:
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 5d78a6e..156d6fd 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -843,8 +843,8 @@
     virtual void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                 RegLocation rl_src1, RegLocation rl_src2, int flags);
     void GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest, RegLocation rl_src);
-    virtual void GenSuspendTest(int opt_flags);
-    virtual void GenSuspendTestAndBranch(int opt_flags, LIR* target);
+    void GenSuspendTest(int opt_flags);
+    void GenSuspendTestAndBranch(int opt_flags, LIR* target);
 
     // This will be overridden by x86 implementation.
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index ed33882..19a1afd 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -197,12 +197,6 @@
         dom_post_order_traversal_.push_back(curr_bb->id);
       }
       work_stack.pop_back();
-
-      /* hacky loop detection */
-      if ((curr_bb->taken != NullBasicBlockId) && curr_bb->dominators->IsBitSet(curr_bb->taken)) {
-        curr_bb->nesting_depth++;
-        attributes_ |= METHOD_HAS_LOOP;
-      }
     }
   }
 }