Do not select EhPad BB in MachineBlockPlacement when there is regular BB to schedule

Summary:
EHPad BB are not entered the classic way and therefor do not need to be placed after their predecessors. This patch make sure EHPad BB are not chosen amongst successors to form chains, and are selected as last resort when selecting the best candidate.

EHPad are scheduled in reverse probability order in order to have them flow into each others naturally.

Reviewers: chandlerc, majnemer, rafael, MatzeB, escha, silvas

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D17625

llvm-svn: 265726
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 69c0a53..af78d8a 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -262,6 +262,7 @@
 
   void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
                            SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+                           SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
                            const BlockFilterSet *BlockFilter = nullptr);
   MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
                                          BlockChain &Chain,
@@ -282,9 +283,11 @@
   void fillWorkLists(MachineBasicBlock *MBB,
                      SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
                      SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+                     SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
                      const BlockFilterSet *BlockFilter);
   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
                   SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+                  SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
                   const BlockFilterSet *BlockFilter = nullptr);
   MachineBasicBlock *findBestLoopTop(MachineLoop &L,
                                      const BlockFilterSet &LoopBlockSet);
@@ -350,6 +353,7 @@
 void MachineBlockPlacement::markChainSuccessors(
     BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+    SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
     const BlockFilterSet *BlockFilter) {
   // Walk all the blocks in this chain, marking their successors as having
   // a predecessor placed.
@@ -368,8 +372,15 @@
 
       // This is a cross-chain edge that is within the loop, so decrement the
       // loop predecessor count of the destination chain.
-      if (SuccChain.UnscheduledPredecessors > 0 && --SuccChain.UnscheduledPredecessors == 0)
-        BlockWorkList.push_back(*SuccChain.begin());
+      if (SuccChain.UnscheduledPredecessors == 0 ||
+          --SuccChain.UnscheduledPredecessors > 0)
+        continue;
+
+      auto *MBB = *SuccChain.begin();
+      if (MBB->isEHPad())
+        EHPadWorkList.push_back(MBB);
+      else
+        BlockWorkList.push_back(MBB);
     }
   }
 }
@@ -412,7 +423,7 @@
   SmallVector<MachineBasicBlock *, 4> Successors;
   for (MachineBasicBlock *Succ : BB->successors()) {
     bool SkipSucc = false;
-    if (BlockFilter && !BlockFilter->count(Succ)) {
+    if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
       SkipSucc = true;
     } else {
       BlockChain *SuccChain = BlockToChain[Succ];
@@ -532,9 +543,16 @@
                                 }),
                  WorkList.end());
 
+  if (WorkList.empty())
+    return nullptr;
+
+  bool IsEHPad = WorkList[0]->isEHPad();
+
   MachineBasicBlock *BestBlock = nullptr;
   BlockFrequency BestFreq;
   for (MachineBasicBlock *MBB : WorkList) {
+    assert(MBB->isEHPad() == IsEHPad);
+
     BlockChain &SuccChain = *BlockToChain[MBB];
     if (&SuccChain == &Chain)
       continue;
@@ -544,11 +562,32 @@
     BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
     DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> ";
           MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
-    if (BestBlock && BestFreq >= CandidateFreq)
+
+    // For ehpad, we layout the least probable first as to avoid jumping back
+    // from least probable landingpads to more probable ones.
+    //
+    // FIXME: Using probability is probably (!) not the best way to achieve
+    // this. We should probably have a more principled approach to layout
+    // cleanup code.
+    //
+    // The goal is to get:
+    //
+    //                 +--------------------------+
+    //                 |                          V
+    // InnerLp -> InnerCleanup    OuterLp -> OuterCleanup -> Resume
+    //
+    // Rather than:
+    //
+    //                 +-------------------------------------+
+    //                 V                                     |
+    // OuterLp -> OuterCleanup -> Resume     InnerLp -> InnerCleanup
+    if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
       continue;
+
     BestBlock = MBB;
     BestFreq = CandidateFreq;
   }
+
   return BestBlock;
 }
 
@@ -582,6 +621,7 @@
     MachineBasicBlock *MBB,
     SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+    SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
     const BlockFilterSet *BlockFilter = nullptr) {
   BlockChain &Chain = *BlockToChain[MBB];
   if (!UpdatedPreds.insert(&Chain).second)
@@ -599,13 +639,20 @@
     }
   }
 
-  if (Chain.UnscheduledPredecessors == 0)
-    BlockWorkList.push_back(*Chain.begin());
+  if (Chain.UnscheduledPredecessors != 0)
+    return;
+
+  MBB = *Chain.begin();
+  if (MBB->isEHPad())
+    EHPadWorkList.push_back(MBB);
+  else
+    BlockWorkList.push_back(MBB);
 }
 
 void MachineBlockPlacement::buildChain(
     MachineBasicBlock *BB, BlockChain &Chain,
     SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+    SmallVectorImpl<MachineBasicBlock *> &EHPadWorkList,
     const BlockFilterSet *BlockFilter) {
   assert(BB);
   assert(BlockToChain[BB] == &Chain);
@@ -613,7 +660,8 @@
   MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
 
   MachineBasicBlock *LoopHeaderBB = BB;
-  markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
+  markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, EHPadWorkList,
+                      BlockFilter);
   BB = *std::prev(Chain.end());
   for (;;) {
     assert(BB);
@@ -629,6 +677,8 @@
     // this point. This won't be a fallthrough, but it will increase locality.
     if (!BestSucc)
       BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
+    if (!BestSucc)
+      BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
 
     if (!BestSucc) {
       BestSucc =
@@ -647,7 +697,8 @@
     SuccChain.UnscheduledPredecessors = 0;
     DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
                  << getBlockName(BestSucc) << "\n");
-    markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
+    markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, EHPadWorkList,
+                        BlockFilter);
     Chain.merge(BestSucc, &SuccChain);
     BB = *std::prev(Chain.end());
   }
@@ -1067,6 +1118,7 @@
     buildLoopChains(F, *InnerLoop);
 
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+  SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
   BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L);
 
   // Check if we have profile data for this function. If yes, we will rotate
@@ -1100,9 +1152,10 @@
   UpdatedPreds.insert(&LoopChain);
 
   for (MachineBasicBlock *LoopBB : LoopBlockSet)
-    fillWorkLists(LoopBB, UpdatedPreds, BlockWorkList, &LoopBlockSet);
+    fillWorkLists(LoopBB, UpdatedPreds, BlockWorkList, EHPadWorkList,
+                  &LoopBlockSet);
 
-  buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
+  buildChain(LoopTop, LoopChain, BlockWorkList, EHPadWorkList, &LoopBlockSet);
 
   if (RotateLoopWithProfile)
     rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
@@ -1199,13 +1252,14 @@
     buildLoopChains(F, *L);
 
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+  SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
 
   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
   for (MachineBasicBlock &MBB : F)
-    fillWorkLists(&MBB, UpdatedPreds, BlockWorkList);
+    fillWorkLists(&MBB, UpdatedPreds, BlockWorkList, EHPadWorkList);
 
   BlockChain &FunctionChain = *BlockToChain[&F.front()];
-  buildChain(&F.front(), FunctionChain, BlockWorkList);
+  buildChain(&F.front(), FunctionChain, BlockWorkList, EHPadWorkList);
 
 #ifndef NDEBUG
   typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;