Pass Divergence Analysis data to Selection DAG to drive divergence
dependent instruction selection.
Differential revision: https://reviews.llvm.org/D35267
llvm-svn: 326703
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 192d4b0..90cd4ff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -27,6 +27,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -83,6 +84,7 @@
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AMDGPUArgumentUsageInfo>();
+ AU.addRequired<DivergenceAnalysis>();
SelectionDAGISel::getAnalysisUsage(AU);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 78bd0da..f142ef6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -25,6 +25,7 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "R600MachineFunctionInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
@@ -748,6 +749,101 @@
return true;
}
+bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode * N) const {
+ switch (N->getOpcode()) {
+ default:
+ return false;
+ case ISD::EntryToken:
+ case ISD::TokenFactor:
+ return true;
+ case ISD::INTRINSIC_WO_CHAIN:
+ {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntrID) {
+ default:
+ return false;
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane:
+ return true;
+ }
+ }
+ break;
+ case ISD::LOAD:
+ {
+ const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
+ if (L->getMemOperand()->getAddrSpace()
+ == Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT)
+ return true;
+ return false;
+ }
+ break;
+ }
+}
+
+bool AMDGPUTargetLowering::isSDNodeSourceOfDivergence(const SDNode * N,
+ FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const
+{
+ switch (N->getOpcode()) {
+ case ISD::Register:
+ case ISD::CopyFromReg:
+ {
+ const RegisterSDNode *R = nullptr;
+ if (N->getOpcode() == ISD::Register) {
+ R = dyn_cast<RegisterSDNode>(N);
+ }
+ else {
+ R = dyn_cast<RegisterSDNode>(N->getOperand(1));
+ }
+ if (R)
+ {
+ const MachineFunction * MF = FLI->MF;
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();
+ unsigned Reg = R->getReg();
+ if (TRI.isPhysicalRegister(Reg))
+ return TRI.isVGPR(MRI, Reg);
+
+ if (MRI.isLiveIn(Reg)) {
+ // workitem.id.x workitem.id.y workitem.id.z
+ if ((MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_X) ||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_Y) ||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::T0_Z)||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR0) ||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR1) ||
+ (MRI.getLiveInPhysReg(Reg) == AMDGPU::VGPR2))
+ return true;
+ // Formal arguments of non-entry functions
+ // are conservatively considered divergent
+ else if (!AMDGPU::isEntryFunctionCC(FLI->Fn->getCallingConv()))
+ return true;
+ }
+ return !DA || DA->isDivergent(FLI->getValueFromVirtualReg(Reg));
+ }
+ }
+ break;
+ case ISD::LOAD: {
+ const LoadSDNode *L = dyn_cast<LoadSDNode>(N);
+ if (L->getMemOperand()->getAddrSpace() ==
+ Subtarget->getAMDGPUAS().PRIVATE_ADDRESS)
+ return true;
+ } break;
+ case ISD::CALLSEQ_END:
+ return true;
+ break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ {
+
+ }
+ return AMDGPU::isIntrinsicSourceOfDivergence(
+ cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
+ case ISD::INTRINSIC_W_CHAIN:
+ return AMDGPU::isIntrinsicSourceOfDivergence(
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ }
+ return false;
+}
+
//===---------------------------------------------------------------------===//
// Target Properties
//===---------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index c99540f..afb85a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -168,6 +168,9 @@
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool isSDNodeSourceOfDivergence(const SDNode * N,
+ FunctionLoweringInfo * FLI, DivergenceAnalysis * DA) const;
+ bool isSDNodeAlwaysUniform(const SDNode * N) const;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 00ff030..6d6ab08 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -17,6 +17,7 @@
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -464,55 +465,7 @@
}
}
-static bool isIntrinsicSourceOfDivergence(const IntrinsicInst *I) {
- switch (I->getIntrinsicID()) {
- case Intrinsic::amdgcn_workitem_id_x:
- case Intrinsic::amdgcn_workitem_id_y:
- case Intrinsic::amdgcn_workitem_id_z:
- case Intrinsic::amdgcn_interp_mov:
- case Intrinsic::amdgcn_interp_p1:
- case Intrinsic::amdgcn_interp_p2:
- case Intrinsic::amdgcn_mbcnt_hi:
- case Intrinsic::amdgcn_mbcnt_lo:
- case Intrinsic::r600_read_tidig_x:
- case Intrinsic::r600_read_tidig_y:
- case Intrinsic::r600_read_tidig_z:
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
- case Intrinsic::amdgcn_ds_fadd:
- case Intrinsic::amdgcn_ds_fmin:
- case Intrinsic::amdgcn_ds_fmax:
- case Intrinsic::amdgcn_image_atomic_swap:
- case Intrinsic::amdgcn_image_atomic_add:
- case Intrinsic::amdgcn_image_atomic_sub:
- case Intrinsic::amdgcn_image_atomic_smin:
- case Intrinsic::amdgcn_image_atomic_umin:
- case Intrinsic::amdgcn_image_atomic_smax:
- case Intrinsic::amdgcn_image_atomic_umax:
- case Intrinsic::amdgcn_image_atomic_and:
- case Intrinsic::amdgcn_image_atomic_or:
- case Intrinsic::amdgcn_image_atomic_xor:
- case Intrinsic::amdgcn_image_atomic_inc:
- case Intrinsic::amdgcn_image_atomic_dec:
- case Intrinsic::amdgcn_image_atomic_cmpswap:
- case Intrinsic::amdgcn_buffer_atomic_swap:
- case Intrinsic::amdgcn_buffer_atomic_add:
- case Intrinsic::amdgcn_buffer_atomic_sub:
- case Intrinsic::amdgcn_buffer_atomic_smin:
- case Intrinsic::amdgcn_buffer_atomic_umin:
- case Intrinsic::amdgcn_buffer_atomic_smax:
- case Intrinsic::amdgcn_buffer_atomic_umax:
- case Intrinsic::amdgcn_buffer_atomic_and:
- case Intrinsic::amdgcn_buffer_atomic_or:
- case Intrinsic::amdgcn_buffer_atomic_xor:
- case Intrinsic::amdgcn_buffer_atomic_cmpswap:
- case Intrinsic::amdgcn_ps_live:
- case Intrinsic::amdgcn_ds_swizzle:
- return true;
- default:
- return false;
- }
-}
+
static bool isArgPassedInSGPR(const Argument *A) {
const Function *F = A->getParent();
@@ -563,7 +516,7 @@
return true;
if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
- return isIntrinsicSourceOfDivergence(Intrinsic);
+ return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
// Assume all function calls are a source of divergence.
if (isa<CallInst>(V) || isa<InvokeInst>(V))
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c4d7ab9..94d1e3a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5372,7 +5372,7 @@
unsigned NumElements = MemVT.getVectorNumElements();
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) {
- if (isMemOpUniform(Load))
+ if (!Op->isDivergent())
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
@@ -5382,7 +5382,7 @@
if (AS == AMDGPUASI.CONSTANT_ADDRESS ||
AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT ||
AS == AMDGPUASI.GLOBAL_ADDRESS) {
- if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
+ if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
!Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 7ee0af0..6bb5abd 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -223,12 +223,9 @@
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
auto Ld = cast<LoadSDNode>(N);
return Ld->getAlignment() >= 4 &&
- (((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
- static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
+ ((((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) || (Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT)) && !N->isDivergent()) ||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
- !Ld->isVolatile() &&
- static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
+ !Ld->isVolatile() && !N->isDivergent() &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
}]>;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 08ecf4f..7848a29 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUBaseInfo.h"
+#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPU.h"
#include "SIDefines.h"
#include "llvm/ADT/StringRef.h"
@@ -938,5 +939,55 @@
AMDGPUAS getAMDGPUAS(const Module &M) {
return getAMDGPUAS(Triple(M.getTargetTriple()));
}
+
+bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
+ switch (IntrID) {
+ case Intrinsic::amdgcn_workitem_id_x:
+ case Intrinsic::amdgcn_workitem_id_y:
+ case Intrinsic::amdgcn_workitem_id_z:
+ case Intrinsic::amdgcn_interp_mov:
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_mbcnt_hi:
+ case Intrinsic::amdgcn_mbcnt_lo:
+ case Intrinsic::r600_read_tidig_x:
+ case Intrinsic::r600_read_tidig_y:
+ case Intrinsic::r600_read_tidig_z:
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_fadd:
+ case Intrinsic::amdgcn_ds_fmin:
+ case Intrinsic::amdgcn_ds_fmax:
+ case Intrinsic::amdgcn_image_atomic_swap:
+ case Intrinsic::amdgcn_image_atomic_add:
+ case Intrinsic::amdgcn_image_atomic_sub:
+ case Intrinsic::amdgcn_image_atomic_smin:
+ case Intrinsic::amdgcn_image_atomic_umin:
+ case Intrinsic::amdgcn_image_atomic_smax:
+ case Intrinsic::amdgcn_image_atomic_umax:
+ case Intrinsic::amdgcn_image_atomic_and:
+ case Intrinsic::amdgcn_image_atomic_or:
+ case Intrinsic::amdgcn_image_atomic_xor:
+ case Intrinsic::amdgcn_image_atomic_inc:
+ case Intrinsic::amdgcn_image_atomic_dec:
+ case Intrinsic::amdgcn_image_atomic_cmpswap:
+ case Intrinsic::amdgcn_buffer_atomic_swap:
+ case Intrinsic::amdgcn_buffer_atomic_add:
+ case Intrinsic::amdgcn_buffer_atomic_sub:
+ case Intrinsic::amdgcn_buffer_atomic_smin:
+ case Intrinsic::amdgcn_buffer_atomic_umin:
+ case Intrinsic::amdgcn_buffer_atomic_smax:
+ case Intrinsic::amdgcn_buffer_atomic_umax:
+ case Intrinsic::amdgcn_buffer_atomic_and:
+ case Intrinsic::amdgcn_buffer_atomic_or:
+ case Intrinsic::amdgcn_buffer_atomic_xor:
+ case Intrinsic::amdgcn_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_ps_live:
+ case Intrinsic::amdgcn_ds_swizzle:
+ return true;
+ default:
+ return false;
+ }
+}
} // namespace AMDGPU
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 650abf9..1fb8153 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -382,6 +382,9 @@
/// not the encoded offset.
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+/// \returns true if the intrinsic is divergent
+bool isIntrinsicSourceOfDivergence(unsigned IntrID);
+
} // end namespace AMDGPU
} // end namespace llvm