Update branch coalescing to be a PowerPC specific pass
Implementing this pass as a PowerPC specific pass. Branch coalescing utilizes
the analyzeBranch method which currently does not include any implicit operands.
This is not an issue on PPC but must be handled on other targets.
Pass is currently off by default. Enabled via -enable-ppc-branch-coalesce.
Differential Revision : https: // reviews.llvm.org/D32776
llvm-svn: 313061
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 96cfce5..1377a6d 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -409,9 +409,6 @@
/// This pass frees the memory occupied by the MachineFunction.
FunctionPass *createFreeMachineFunctionPass();
- /// This pass combine basic blocks guarded by the same branch.
- extern char &BranchCoalescingID;
-
/// This pass performs outlining on machine instructions directly before
/// printing assembly.
ModulePass *createMachineOutlinerPass();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 8ee6a05..bf54b64 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -76,7 +76,6 @@
void initializeBlockExtractorPassPass(PassRegistry&);
void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&);
void initializeBoundsCheckingPass(PassRegistry&);
-void initializeBranchCoalescingPass(PassRegistry&);
void initializeBranchFolderPassPass(PassRegistry&);
void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&);
void initializeBranchRelaxationPass(PassRegistry&);
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 7f3c6da..7ec7fda 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -4,7 +4,6 @@
Analysis.cpp
AtomicExpandPass.cpp
BasicTargetTransformInfo.cpp
- BranchCoalescing.cpp
BranchFolding.cpp
BranchRelaxation.cpp
BuiltinGCs.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 75f62ba..f4ccb48 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -21,7 +21,6 @@
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
- initializeBranchCoalescingPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCodeGenPreparePass(Registry);
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 3297687..4584f65 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -927,9 +927,6 @@
addPass(&MachineLICMID, false);
addPass(&MachineCSEID, false);
- // Coalesce basic blocks with the same branch condition
- addPass(&BranchCoalescingID);
-
addPass(&MachineSinkingID);
addPass(&PeepholeOptimizerID);
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 7ca4c19..4aa6dfa 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -16,6 +16,7 @@
PPCBoolRetToInt.cpp
PPCAsmPrinter.cpp
PPCBranchSelector.cpp
+ PPCBranchCoalescing.cpp
PPCCCState.cpp
PPCCTRLoops.cpp
PPCHazardRecognizers.cpp
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index ad92ac8..4079001 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -41,6 +41,7 @@
FunctionPass *createPPCVSXSwapRemovalPass();
FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
+ FunctionPass *createPPCBranchCoalescingPass();
FunctionPass *createPPCQPXLoadSplatPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL);
FunctionPass *createPPCTLSDynamicCallPass();
diff --git a/llvm/lib/CodeGen/BranchCoalescing.cpp b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
similarity index 88%
rename from llvm/lib/CodeGen/BranchCoalescing.cpp
rename to llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 2c41b59..33085a4 100644
--- a/llvm/lib/CodeGen/BranchCoalescing.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -13,6 +13,7 @@
///
//===----------------------------------------------------------------------===//
+#include "PPC.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -27,18 +28,18 @@
using namespace llvm;
-#define DEBUG_TYPE "branch-coalescing"
-
-static cl::opt<cl::boolOrDefault>
- EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden,
- cl::desc("enable coalescing of duplicate branches"));
+#define DEBUG_TYPE "ppc-branch-coalescing"
STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
+namespace llvm {
+ void initializePPCBranchCoalescingPass(PassRegistry&);
+}
+
//===----------------------------------------------------------------------===//
-// BranchCoalescing
+// PPCBranchCoalescing
//===----------------------------------------------------------------------===//
///
/// Improve scheduling by coalescing branches that depend on the same condition.
@@ -46,13 +47,17 @@
/// and attempts to merge the blocks together. Such opportunities arise from
/// the expansion of select statements in the IR.
///
-/// For example, consider the following LLVM IR:
+/// This pass does not handle implicit operands on branch statements. In order
+/// to run on targets that use implicit operands, changes need to be made in the
+/// canCoalesceBranch and canMerge methods.
///
-/// %test = icmp eq i32 %x 0
-/// %tmp1 = select i1 %test, double %a, double 2.000000e-03
-/// %tmp2 = select i1 %test, double %b, double 5.000000e-03
+/// Example: the following LLVM IR
///
-/// This IR expands to the following machine code on PowerPC:
+/// %test = icmp eq i32 %x 0
+/// %tmp1 = select i1 %test, double %a, double 2.000000e-03
+/// %tmp2 = select i1 %test, double %b, double 5.000000e-03
+///
+/// expands to the following machine code:
///
/// BB#0: derived from LLVM BB %entry
/// Live Ins: %F1 %F3 %X6
@@ -132,7 +137,7 @@
namespace {
-class BranchCoalescing : public MachineFunctionPass {
+class PPCBranchCoalescing : public MachineFunctionPass {
struct CoalescingCandidateInfo {
MachineBasicBlock *BranchBlock; // Block containing the branch
MachineBasicBlock *BranchTargetBlock; // Block branched to
@@ -157,15 +162,11 @@
bool validateCandidates(CoalescingCandidateInfo &SourceRegion,
CoalescingCandidateInfo &TargetRegion) const;
- static bool isBranchCoalescingEnabled() {
- return EnableBranchCoalescing == cl::BOU_TRUE;
- }
-
public:
static char ID;
- BranchCoalescing() : MachineFunctionPass(ID) {
- initializeBranchCoalescingPass(*PassRegistry::getPassRegistry());
+ PPCBranchCoalescing() : MachineFunctionPass(ID) {
+ initializePPCBranchCoalescingPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -190,21 +191,25 @@
};
} // End anonymous namespace.
-char BranchCoalescing::ID = 0;
-char &llvm::BranchCoalescingID = BranchCoalescing::ID;
+char PPCBranchCoalescing::ID = 0;
+/// createPPCBranchCoalescingPass - returns an instance of the Branch Coalescing
+/// Pass
+FunctionPass *llvm::createPPCBranchCoalescingPass() {
+ return new PPCBranchCoalescing();
+}
-INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(PPCBranchCoalescing, DEBUG_TYPE,
"Branch Coalescing", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
-INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing",
+INITIALIZE_PASS_END(PPCBranchCoalescing, DEBUG_TYPE, "Branch Coalescing",
false, false)
-BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
+PPCBranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
: BranchBlock(nullptr), BranchTargetBlock(nullptr),
FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {}
-void BranchCoalescing::CoalescingCandidateInfo::clear() {
+void PPCBranchCoalescing::CoalescingCandidateInfo::clear() {
BranchBlock = nullptr;
BranchTargetBlock = nullptr;
FallThroughBlock = nullptr;
@@ -213,7 +218,7 @@
MustMoveUp = false;
}
-void BranchCoalescing::initialize(MachineFunction &MF) {
+void PPCBranchCoalescing::initialize(MachineFunction &MF) {
MDT = &getAnalysis<MachineDominatorTree>();
MPDT = &getAnalysis<MachinePostDominatorTree>();
TII = MF.getSubtarget().getInstrInfo();
@@ -230,7 +235,7 @@
///\param[in,out] Cand The coalescing candidate to analyze
///\return true if and only if the branch can be coalesced, false otherwise
///
-bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
+bool PPCBranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber()
<< " can be coalesced:");
MachineBasicBlock *FalseMBB = nullptr;
@@ -246,6 +251,19 @@
if (!I.isBranch())
continue;
+ // The analyzeBranch method does not include any implicit operands.
+ // This is not an issue on PPC but must be handled on other targets.
+ // For this pass to be made target-independent, the analyzeBranch API
+ // need to be updated to support implicit operands and there would
+ // need to be a way to verify that any implicit operands would not be
+ // clobbered by merging blocks. This would include identifying the
+ // implicit operands as well as the basic block they are defined in.
+ // This could be done by changing the analyzeBranch API to have it also
+ // record and return the implicit operands and the blocks where they are
+ // defined. Alternatively, the BranchCoalescing code would need to be
+ // extended to identify the implicit operands. The analysis in canMerge
+ // must then be extended to prove that none of the implicit operands are
+ // changed in the blocks that are combined during coalescing.
if (I.getNumOperands() != I.getNumExplicitOperands()) {
DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I
<< "\n");
@@ -309,7 +327,7 @@
/// \param[in] OpList2 operand list
/// \return true if and only if the operands lists are identical
///
-bool BranchCoalescing::identicalOperands(
+bool PPCBranchCoalescing::identicalOperands(
ArrayRef<MachineOperand> OpList1, ArrayRef<MachineOperand> OpList2) const {
if (OpList1.size() != OpList2.size()) {
@@ -325,6 +343,14 @@
<< "Op2: " << Op2 << "\n");
if (Op1.isIdenticalTo(Op2)) {
+ // filter out instructions with physical-register uses
+ if (Op1.isReg() && TargetRegisterInfo::isPhysicalRegister(Op1.getReg())
+ // If the physical register is constant then we can assume the value
+ // has not changed between uses.
+ && !(Op1.isUse() && MRI->isConstantPhysReg(Op1.getReg()))) {
+ DEBUG(dbgs() << "The operands are not provably identical.\n");
+ return false;
+ }
DEBUG(dbgs() << "Op1 and Op2 are identical!\n");
continue;
}
@@ -349,6 +375,7 @@
return false;
}
}
+
return true;
}
@@ -361,7 +388,7 @@
/// \param[in] SourceMBB block to move PHI instructions from
/// \param[in] TargetMBB block to move PHI instructions to
///
-void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
+void PPCBranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
MachineBasicBlock *TargetMBB) {
MachineBasicBlock::iterator MI = SourceMBB->begin();
@@ -394,7 +421,7 @@
/// \return true if it is safe to move MI to beginning of TargetMBB,
/// false otherwise.
///
-bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
+bool PPCBranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
const MachineBasicBlock &TargetMBB
) const {
@@ -425,7 +452,7 @@
/// \return true if it is safe to move MI to end of TargetMBB,
/// false otherwise.
///
-bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI,
+bool PPCBranchCoalescing::canMoveToEnd(const MachineInstr &MI,
const MachineBasicBlock &TargetMBB
) const {
@@ -457,7 +484,7 @@
/// \return true if all instructions in SourceRegion.BranchBlock can be merged
/// into a block in TargetRegion; false otherwise.
///
-bool BranchCoalescing::validateCandidates(
+bool PPCBranchCoalescing::validateCandidates(
CoalescingCandidateInfo &SourceRegion,
CoalescingCandidateInfo &TargetRegion) const {
@@ -500,7 +527,7 @@
/// \return true if all instructions in SourceRegion.BranchBlock can be merged
/// into a block in TargetRegion, false otherwise.
///
-bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
+bool PPCBranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
CoalescingCandidateInfo &TargetRegion) const {
if (!validateCandidates(SourceRegion, TargetRegion))
return false;
@@ -605,7 +632,7 @@
/// \param[in] SourceRegion The candidate to move blocks from
/// \param[in] TargetRegion The candidate to move blocks to
///
-bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
+bool PPCBranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
CoalescingCandidateInfo &TargetRegion) {
if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) {
@@ -685,10 +712,9 @@
return true;
}
-bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
+bool PPCBranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(*MF.getFunction()) || MF.empty() ||
- !isBranchCoalescingEnabled())
+ if (skipFunction(*MF.getFunction()) || MF.empty())
return false;
bool didSomething = false;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index bc5d32b..9e14678 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -40,6 +40,10 @@
using namespace llvm;
+
+static cl::opt<bool>
+ EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden,
+ cl::desc("enable coalescing of duplicate branches for PPC"));
static cl::
opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
cl::desc("Disable CTR loops for PPC"));
@@ -378,6 +382,10 @@
}
void PPCPassConfig::addMachineSSAOptimization() {
+ // PPCBranchCoalescingPass need to be done before machine sinking
+ // since it merges empty blocks.
+ if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCBranchCoalescingPass());
TargetPassConfig::addMachineSSAOptimization();
// For little endian, remove where possible the vector swap instructions
// introduced at code generation to normalize vector element order.
diff --git a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
index deb6d89..007eef2 100644
--- a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
+++ b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
@@ -1,9 +1,54 @@
-; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s
-; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -enable-ppc-branch-coalesce < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -enable-ppc-branch-coalesce < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s
; Function Attrs: nounwind
define double @testBranchCoal(double %a, double %b, double %c, i32 %x) {
-entry:
+
+; CHECK-LABEL: @testBranchCoal
+; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0
+; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]]
+; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha
+; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha
+; CHECK-DAG: xxlxor 2, 2, 2
+; CHECK-NOT: beq
+; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]]
+; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]]
+; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]]
+; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]]
+; CHECK: .LBB[[LAB1]]
+; CHECK: xsadddp 0, 1, 2
+; CHECK: xsadddp 1, 0, 3
+; CHECK: blr
+
+; CHECK-NOCOALESCE-LABEL: testBranchCoal:
+; CHECK-NOCOALESCE: # BB#0: # %entry
+; CHECK-NOCOALESCE-NEXT: cmplwi 0, 6, 0
+; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_5
+; CHECK-NOCOALESCE-NEXT: # BB#1: # %entry
+; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_6
+; CHECK-NOCOALESCE-NEXT: .LBB0_2: # %entry
+; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_4
+; CHECK-NOCOALESCE-NEXT: .LBB0_3: # %entry
+; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
+; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_1@toc@l
+; CHECK-NOCOALESCE-NEXT: lxsdx 3, 0, 3
+; CHECK-NOCOALESCE-NEXT: .LBB0_4: # %entry
+; CHECK-NOCOALESCE-NEXT: xsadddp 0, 1, 2
+; CHECK-NOCOALESCE-NEXT: xsadddp 1, 0, 3
+; CHECK-NOCOALESCE-NEXT: blr
+; CHECK-NOCOALESCE-NEXT: .LBB0_5: # %entry
+; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
+; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_0@toc@l
+; CHECK-NOCOALESCE-NEXT: lxsdx 1, 0, 3
+; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_2
+; CHECK-NOCOALESCE-NEXT: .LBB0_6: # %entry
+; CHECK-NOCOALESCE-NEXT: xxlxor 2, 2, 2
+; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_3
+; CHECK-NOCOALESCE-NEXT: b .LBB0_4
+ entry:
+
%test = icmp eq i32 %x, 0
%tmp1 = select i1 %test, double %a, double 2.000000e-03
%tmp2 = select i1 %test, double %b, double 0.000000e+00
@@ -12,20 +57,4 @@
%res1 = fadd double %tmp1, %tmp2
%result = fadd double %res1, %tmp3
ret double %result
-
-; CHECK-LABEL: @testBranchCoal
-; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0
-; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]]
-; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha
-; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha
-; CHECK-DAG: xxlxor 2, 2, 2
-; CHECK-NOT: beq
-; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]]
-; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]]
-; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]]
-; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]]
-; CHECK: .LBB[[LAB1]]
-; CHECK: xsadddp 0, 1, 2
-; CHECK: xsadddp 1, 0, 3
-; CHECK: blr
}