AMDGPU: Select MIMG instructions manually in SITargetLowering
Summary:
Having TableGen patterns for image intrinsics is hitting limitations:
for D16 we already have to manually pre-lower the packing of data
values, and we will have to do the same for A16 eventually.
Since there is already some custom C++ code anyway, it is arguably easier
to just do everything in C++, now that we can use the beefed-up generic
tables backend of TableGen to provide all the required metadata and map
intrinsics to corresponding opcodes. With this approach, all image
intrinsic lowering happens in SITargetLowering::lowerImage. That code is
dense due to all the cases that it handles, but it should still be easier
to follow than what we had before, by virtue of it all being done in a
single location, and by virtue of not relying on the TableGen pattern
magic that very few people really understand.
This means that we will have MachineSDNodes with MIMG instructions
during DAG combining, but that seems alright: previously we had
intrinsic nodes instead, but those are similarly opaque to the generic
CodeGen infrastructure, and the final pattern matching just did a 1:1
translation to machine instructions anyway. If anything, the fact that
we now merge the address words into a vector before DAG combine should
be an advantage.
Change-Id: I417f26bd88f54ce9781c1668acc01f3f99774de6
Reviewers: arsenm, rampitec, rtaylor, tstellar
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D48017
llvm-svn: 335228
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 81beb1a..94d6d31 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -28,10 +28,9 @@
namespace llvm {
namespace AMDGPU {
-#define GET_RsrcIntrinsics_IMPL
-#include "AMDGPUGenSearchableTables.inc"
-
#define GET_D16ImageDimIntrinsics_IMPL
+#define GET_ImageDimIntrinsicTable_IMPL
+#define GET_RsrcIntrinsics_IMPL
#include "AMDGPUGenSearchableTables.inc"
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 057b674..48f84a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -69,6 +69,13 @@
};
const D16ImageDimIntrinsic *lookupD16ImageDimIntrinsic(unsigned Intr);
+struct ImageDimIntrinsicInfo {
+ unsigned Intr;
+ unsigned BaseOpcode;
+ MIMGDim Dim;
+};
+const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
+
} // end AMDGPU namespace
} // End llvm namespace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index df6c81f..e62b684 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -28,8 +28,6 @@
foreach intr = !listconcat(AMDGPUBufferIntrinsics,
AMDGPUImageIntrinsics,
AMDGPUImageDimIntrinsics,
- AMDGPUImageDimGatherIntrinsics,
- AMDGPUImageDimGetResInfoIntrinsics,
AMDGPUImageDimAtomicIntrinsics) in {
def : RsrcIntrinsic<!cast<AMDGPURsrcIntrinsic>(intr)>;
}
@@ -91,22 +89,3 @@
foreach intr = AMDGPUImageDimAtomicIntrinsics in
def : SourceOfDivergence<intr>;
-
-class D16ImageDimIntrinsic<AMDGPUImageDimIntrinsic intr> {
- Intrinsic Intr = intr;
- code D16HelperIntr =
- !cast<code>("AMDGPUIntrinsic::SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name);
-}
-
-def D16ImageDimIntrinsics : GenericTable {
- let FilterClass = "D16ImageDimIntrinsic";
- let Fields = ["Intr", "D16HelperIntr"];
-
- let PrimaryKey = ["Intr"];
- let PrimaryKeyName = "lookupD16ImageDimIntrinsic";
-}
-
-foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
- AMDGPUImageDimGatherIntrinsics) in {
- def : D16ImageDimIntrinsic<intr>;
-}
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index c584fe2..3410dc3 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -27,6 +27,10 @@
// vdata/vaddr size.
class MIMGBaseOpcode {
MIMGBaseOpcode BaseOpcode = !cast<MIMGBaseOpcode>(NAME);
+ bit Store = 0;
+ bit Atomic = 0;
+ bit AtomicX2 = 0; // (f)cmpswap
+ bit Sampler = 0;
bits<8> NumExtraArgs = 0;
bit Gradients = 0;
bit Coordinates = 1;
@@ -41,14 +45,29 @@
def MIMGBaseOpcodesTable : GenericTable {
let FilterClass = "MIMGBaseOpcode";
let CppTypeName = "MIMGBaseOpcodeInfo";
- let Fields = ["BaseOpcode", "NumExtraArgs", "Gradients", "Coordinates",
- "LodOrClampOrMip", "HasD16"];
+ let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
+ "NumExtraArgs", "Gradients", "Coordinates", "LodOrClampOrMip",
+ "HasD16"];
GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
let PrimaryKey = ["BaseOpcode"];
let PrimaryKeyName = "getMIMGBaseOpcodeInfo";
}
+def MIMGDim : GenericEnum {
+ let FilterClass = "AMDGPUDimProps";
+}
+
+def MIMGDimInfoTable : GenericTable {
+ let FilterClass = "AMDGPUDimProps";
+ let CppTypeName = "MIMGDimInfo";
+ let Fields = ["Dim", "NumCoords", "NumGradients", "DA"];
+ GenericEnum TypeOf_Dim = MIMGDim;
+
+ let PrimaryKey = ["Dim"];
+ let PrimaryKeyName = "getMIMGDimInfo";
+}
+
class mimg <bits<7> si, bits<7> vi = si> {
field bits<7> SI = si;
field bits<7> VI = vi;
@@ -188,6 +207,7 @@
multiclass MIMG_Store <bits<7> op, string asm, bit has_d16, bit mip = 0> {
def "" : MIMGBaseOpcode {
+ let Store = 1;
let LodOrClampOrMip = mip;
let HasD16 = has_d16;
}
@@ -263,7 +283,10 @@
}
multiclass MIMG_Atomic <mimg op, string asm, bit isCmpSwap = 0> { // 64-bit atomics
- def "" : MIMGBaseOpcode;
+ def "" : MIMGBaseOpcode {
+ let Atomic = 1;
+ let AtomicX2 = isCmpSwap;
+ }
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
// _V* variants have different dst size, but the size is encoded implicitly,
@@ -309,6 +332,7 @@
class MIMG_Sampler_BaseOpcode<AMDGPUSampleVariant sample>
: MIMGBaseOpcode {
+ let Sampler = 1;
let NumExtraArgs = !size(sample.ExtraAddrArgs);
let Gradients = sample.Gradients;
let LodOrClampOrMip = !ne(sample.LodOrClamp, "");
@@ -458,188 +482,30 @@
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
-/********** ============================== **********/
-/********** Dimension-aware image patterns **********/
-/********** ============================== **********/
+/********** ========================================= **********/
+/********** Table of dimension-aware image intrinsics **********/
+/********** ========================================= **********/
-class getDwordsType<int dwords> {
- int NumDwords = dwords;
- string suffix = !if(!lt(dwords, 1), ?,
- !if(!eq(dwords, 1), "_V1",
- !if(!eq(dwords, 2), "_V2",
- !if(!le(dwords, 4), "_V4",
- !if(!le(dwords, 8), "_V8",
- !if(!le(dwords, 16), "_V16", ?))))));
- ValueType VT = !if(!lt(dwords, 1), ?,
- !if(!eq(dwords, 1), f32,
- !if(!eq(dwords, 2), v2f32,
- !if(!le(dwords, 4), v4f32,
- !if(!le(dwords, 8), v8f32,
- !if(!le(dwords, 16), v16f32, ?))))));
- RegisterClass VReg = !if(!lt(dwords, 1), ?,
- !if(!eq(dwords, 1), VGPR_32,
- !if(!eq(dwords, 2), VReg_64,
- !if(!le(dwords, 4), VReg_128,
- !if(!le(dwords, 8), VReg_256,
- !if(!le(dwords, 16), VReg_512, ?))))));
+class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
+ Intrinsic Intr = I;
+ MIMGBaseOpcode BaseOpcode = !cast<MIMGBaseOpcode>(!strconcat("IMAGE_", I.P.OpMod));
+ AMDGPUDimProps Dim = I.P.Dim;
}
-class makeRegSequence_Fold<int i, dag d> {
- int idx = i;
- dag lhs = d;
-}
+def ImageDimIntrinsicTable : GenericTable {
+ let FilterClass = "ImageDimIntrinsicInfo";
+ let Fields = ["Intr", "BaseOpcode", "Dim"];
+ GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode;
+ GenericEnum TypeOf_Dim = MIMGDim;
-// Generate a dag node which returns a vector register of class RC into which
-// the source operands given by names have been inserted (assuming that each
-// name corresponds to an operand whose size is equal to a subregister).
-class makeRegSequence<ValueType vt, RegisterClass RC, list<string> names> {
- dag ret =
- !if(!eq(!size(names), 1),
- !dag(COPY_TO_REGCLASS, [?, RC], [names[0], ?]),
- !foldl(makeRegSequence_Fold<0, (vt (IMPLICIT_DEF))>, names, f, name,
- makeRegSequence_Fold<
- !add(f.idx, 1),
- !con((INSERT_SUBREG f.lhs),
- !dag(INSERT_SUBREG, [?, !cast<SubRegIndex>("sub"#f.idx)],
- [name, ?]))>).lhs);
-}
-
-class ImageDimPattern<AMDGPUImageDimIntrinsic I,
- string dop, ValueType dty, bit d16,
- string suffix = ""> : GCNPat<(undef), (undef)> {
- list<AMDGPUArg> AddrArgs = I.P.AddrDefaultArgs;
- getDwordsType AddrDwords = getDwordsType<!size(AddrArgs)>;
-
- MIMG MI =
- !cast<MIMG>(!strconcat("IMAGE_", I.P.OpMod, dop, AddrDwords.suffix, suffix));
-
- // DAG fragment to match data arguments (vdata for store/atomic, dmask
- // for non-atomic).
- dag MatchDataDag =
- !con(!dag(I, !foreach(arg, I.P.DataArgs, dty),
- !foreach(arg, I.P.DataArgs, arg.Name)),
- !if(I.P.IsAtomic, (I), (I i32:$dmask)));
-
- // DAG fragment to match vaddr arguments.
- dag MatchAddrDag = !dag(I, !foreach(arg, AddrArgs, arg.Type.VT),
- !foreach(arg, AddrArgs, arg.Name));
-
- // DAG fragment to match sampler resource and unorm arguments.
- dag MatchSamplerDag = !if(I.P.IsSample, (I v4i32:$sampler, i1:$unorm), (I));
-
- // DAG node that generates the MI vdata for store/atomic
- getDwordsType DataDwords = getDwordsType<!size(I.P.DataArgs)>;
- dag GenDataDag =
- !if(I.P.IsAtomic, (MI makeRegSequence<DataDwords.VT, DataDwords.VReg,
- !foreach(arg, I.P.DataArgs, arg.Name)>.ret),
- !if(!size(I.P.DataArgs), (MI $vdata), (MI)));
-
- // DAG node that generates the MI vaddr
- dag GenAddrDag = makeRegSequence<AddrDwords.VT, AddrDwords.VReg,
- !foreach(arg, AddrArgs, arg.Name)>.ret;
- // DAG fragments that generate various inline flags
- dag GenDmask =
- !if(I.P.IsAtomic, (MI !add(!shl(1, DataDwords.NumDwords), -1)),
- (MI (as_i32imm $dmask)));
- dag GenGLC =
- !if(I.P.IsAtomic, (MI 1),
- (MI (bitextract_imm<0> $cachepolicy)));
-
- dag MatchIntrinsic = !con(MatchDataDag,
- MatchAddrDag,
- (I v8i32:$rsrc),
- MatchSamplerDag,
- (I 0/*texfailctrl*/,
- i32:$cachepolicy));
- let PatternToMatch =
- !if(!size(I.RetTypes), (dty MatchIntrinsic), MatchIntrinsic);
-
- bit IsCmpSwap = !and(I.P.IsAtomic, !eq(!size(I.P.DataArgs), 2));
- dag ImageInstruction =
- !con(GenDataDag,
- (MI GenAddrDag),
- (MI $rsrc),
- !if(I.P.IsSample, (MI $sampler), (MI)),
- GenDmask,
- !if(I.P.IsSample, (MI (as_i1imm $unorm)), (MI 1)),
- GenGLC,
- (MI (bitextract_imm<1> $cachepolicy),
- 0, /* r128 */
- 0, /* tfe */
- 0 /*(as_i1imm $lwe)*/,
- { I.P.Dim.DA }),
- !if(MI.BaseOpcode.HasD16, (MI d16), (MI)));
- let ResultInstrs = [
- !if(IsCmpSwap, (EXTRACT_SUBREG ImageInstruction, sub0), ImageInstruction)
- ];
+ let PrimaryKey = ["Intr"];
+ let PrimaryKeyName = "getImageDimIntrinsicInfo";
+ let PrimaryKeyEarlyOut = 1;
}
foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
- AMDGPUImageDimGetResInfoIntrinsics) in {
- def intr#_pat_v1 : ImageDimPattern<intr, "_V1", f32, 0>;
- def intr#_pat_v2 : ImageDimPattern<intr, "_V2", v2f32, 0>;
- def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32, 0>;
-}
-
-multiclass ImageDimD16Helper<AMDGPUImageDimIntrinsic I,
- AMDGPUImageDimIntrinsic d16helper> {
- let SubtargetPredicate = HasUnpackedD16VMem in {
- def _unpacked_v1 : ImageDimPattern<I, "_V1", f16, 1>;
- def _unpacked_v2 : ImageDimPattern<d16helper, "_V2", v2i32, 1>;
- def _unpacked_v4 : ImageDimPattern<d16helper, "_V4", v4i32, 1>;
- } // End HasUnpackedD16VMem.
-
- let SubtargetPredicate = HasPackedD16VMem in {
- def _packed_v1 : ImageDimPattern<I, "_V1", f16, 1>;
- def _packed_v2 : ImageDimPattern<I, "_V1", v2f16, 1>;
- def _packed_v4 : ImageDimPattern<I, "_V2", v4f16, 1>;
- } // End HasPackedD16VMem.
-}
-
-foreach intr = AMDGPUImageDimIntrinsics in {
- def intr#_d16helper_profile : AMDGPUDimProfileCopy<intr.P> {
- let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
- let DataArgs = !foreach(arg, intr.P.DataArgs, AMDGPUArg<llvm_any_ty, arg.Name>);
- }
-
- let TargetPrefix = "SI", isTarget = 1 in
- def int_SI_image_d16helper_ # intr.P.OpMod # intr.P.Dim.Name :
- AMDGPUImageDimIntrinsic<!cast<AMDGPUDimProfile>(intr#"_d16helper_profile"),
- intr.IntrProperties, intr.Properties>;
-
- defm intr#_d16 :
- ImageDimD16Helper<
- intr, !cast<AMDGPUImageDimIntrinsic>(
- "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name)>;
-}
-
-foreach intr = AMDGPUImageDimGatherIntrinsics in {
- def intr#_pat3 : ImageDimPattern<intr, "_V4", v4f32, 0>;
-
- def intr#_d16helper_profile : AMDGPUDimProfileCopy<intr.P> {
- let RetTypes = !foreach(ty, intr.P.RetTypes, llvm_any_ty);
- let DataArgs = !foreach(arg, intr.P.DataArgs, AMDGPUArg<llvm_any_ty, arg.Name>);
- }
-
- let TargetPrefix = "SI", isTarget = 1 in
- def int_SI_image_d16helper_ # intr.P.OpMod # intr.P.Dim.Name :
- AMDGPUImageDimIntrinsic<!cast<AMDGPUDimProfile>(intr#"_d16helper_profile"),
- intr.IntrProperties, intr.Properties>;
-
- let SubtargetPredicate = HasUnpackedD16VMem in {
- def intr#_unpacked_v4 :
- ImageDimPattern<!cast<AMDGPUImageDimIntrinsic>(
- "int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name),
- "_V4", v4i32, 1>;
- } // End HasUnpackedD16VMem.
-
- let SubtargetPredicate = HasPackedD16VMem in {
- def intr#_packed_v4 : ImageDimPattern<intr, "_V2", v4f16, 1>;
- } // End HasPackedD16VMem.
-}
-
-foreach intr = AMDGPUImageDimAtomicIntrinsics in {
- def intr#_pat1 : ImageDimPattern<intr, "_V1", i32, 0>;
+ AMDGPUImageDimAtomicIntrinsics) in {
+ def : ImageDimIntrinsicInfo<intr>;
}
/********** ======================= **********/
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d71f391..e17cef8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4516,6 +4516,245 @@
return DAG.getUNDEF(VT);
}
+static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
+ ArrayRef<SDValue> Elts) {
+ assert(!Elts.empty());
+ MVT Type;
+ unsigned NumElts;
+
+ if (Elts.size() == 1) {
+ Type = MVT::f32;
+ NumElts = 1;
+ } else if (Elts.size() == 2) {
+ Type = MVT::v2f32;
+ NumElts = 2;
+ } else if (Elts.size() <= 4) {
+ Type = MVT::v4f32;
+ NumElts = 4;
+ } else if (Elts.size() <= 8) {
+ Type = MVT::v8f32;
+ NumElts = 8;
+ } else {
+ assert(Elts.size() <= 16);
+ Type = MVT::v16f32;
+ NumElts = 16;
+ }
+
+ SmallVector<SDValue, 16> VecElts(NumElts);
+ for (unsigned i = 0; i < Elts.size(); ++i) {
+ SDValue Elt = Elts[i];
+ if (Elt.getValueType() != MVT::f32)
+ Elt = DAG.getBitcast(MVT::f32, Elt);
+ VecElts[i] = Elt;
+ }
+ for (unsigned i = Elts.size(); i < NumElts; ++i)
+ VecElts[i] = DAG.getUNDEF(MVT::f32);
+
+ if (NumElts == 1)
+ return VecElts[0];
+ return DAG.getBuildVector(Type, DL, VecElts);
+}
+
+static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
+ SDValue *GLC, SDValue *SLC) {
+ auto CachePolicyConst = dyn_cast<ConstantSDNode>(CachePolicy.getNode());
+ if (!CachePolicyConst)
+ return false;
+
+ uint64_t Value = CachePolicyConst->getZExtValue();
+ SDLoc DL(CachePolicy);
+ if (GLC) {
+ *GLC = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
+ Value &= ~(uint64_t)0x1;
+ }
+ if (SLC) {
+ *SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
+ Value &= ~(uint64_t)0x2;
+ }
+
+ return Value == 0;
+}
+
+SDValue SITargetLowering::lowerImage(SDValue Op,
+ const AMDGPU::ImageDimIntrinsicInfo *Intr,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
+ const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
+
+ SmallVector<EVT, 2> ResultTypes(Op->value_begin(), Op->value_end());
+ bool IsD16 = false;
+ SDValue VData;
+ int NumVDataDwords;
+ unsigned AddrIdx; // Index of first address argument
+ unsigned DMask;
+
+ if (BaseOpcode->Atomic) {
+ VData = Op.getOperand(2);
+
+ bool Is64Bit = VData.getValueType() == MVT::i64;
+ if (BaseOpcode->AtomicX2) {
+ SDValue VData2 = Op.getOperand(3);
+ VData = DAG.getBuildVector(Is64Bit ? MVT::v2i64 : MVT::v2i32, DL,
+ {VData, VData2});
+ if (Is64Bit)
+ VData = DAG.getBitcast(MVT::v4i32, VData);
+
+ ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
+ DMask = Is64Bit ? 0xf : 0x3;
+ NumVDataDwords = Is64Bit ? 4 : 2;
+ AddrIdx = 4;
+ } else {
+ DMask = Is64Bit ? 0x3 : 0x1;
+ NumVDataDwords = Is64Bit ? 2 : 1;
+ AddrIdx = 3;
+ }
+ } else {
+ unsigned DMaskIdx;
+
+ if (BaseOpcode->Store) {
+ VData = Op.getOperand(2);
+
+ MVT StoreVT = VData.getSimpleValueType();
+ if (StoreVT.getScalarType() == MVT::f16) {
+ if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS ||
+ !BaseOpcode->HasD16)
+ return Op; // D16 is unsupported for this instruction
+
+ IsD16 = true;
+ VData = handleD16VData(VData, DAG);
+ }
+
+ NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32;
+ DMaskIdx = 3;
+ } else {
+ MVT LoadVT = Op.getSimpleValueType();
+ if (LoadVT.getScalarType() == MVT::f16) {
+ if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS ||
+ !BaseOpcode->HasD16)
+ return Op; // D16 is unsupported for this instruction
+
+ IsD16 = true;
+ if (LoadVT.isVector() && Subtarget->hasUnpackedD16VMem())
+ ResultTypes[0] = (LoadVT == MVT::v2f16) ? MVT::v2i32 : MVT::v4i32;
+ }
+
+ NumVDataDwords = (ResultTypes[0].getSizeInBits() + 31) / 32;
+ DMaskIdx = isa<MemSDNode>(Op) ? 2 : 1;
+ }
+
+ auto DMaskConst = dyn_cast<ConstantSDNode>(Op.getOperand(DMaskIdx));
+ if (!DMaskConst)
+ return Op;
+
+ AddrIdx = DMaskIdx + 1;
+ DMask = DMaskConst->getZExtValue();
+ if (!DMask && !BaseOpcode->Store) {
+ // Eliminate no-op loads. Stores with dmask == 0 are *not* no-op: they
+ // store the channels' default values.
+ SDValue Undef = DAG.getUNDEF(Op.getValueType());
+ if (isa<MemSDNode>(Op))
+ return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
+ return Undef;
+ }
+ }
+
+ unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
+ (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
+ (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
+ (BaseOpcode->LodOrClampOrMip ? 1 : 0);
+ SmallVector<SDValue, 4> VAddrs;
+ for (unsigned i = 0; i < NumVAddrs; ++i)
+ VAddrs.push_back(Op.getOperand(AddrIdx + i));
+ SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
+
+ SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
+ SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
+ unsigned CtrlIdx; // Index of texfailctrl argument
+ SDValue Unorm;
+ if (!BaseOpcode->Sampler) {
+ Unorm = True;
+ CtrlIdx = AddrIdx + NumVAddrs + 1;
+ } else {
+ auto UnormConst =
+ dyn_cast<ConstantSDNode>(Op.getOperand(AddrIdx + NumVAddrs + 2));
+ if (!UnormConst)
+ return Op;
+
+ Unorm = UnormConst->getZExtValue() ? True : False;
+ CtrlIdx = AddrIdx + NumVAddrs + 3;
+ }
+
+ SDValue TexFail = Op.getOperand(CtrlIdx);
+ auto TexFailConst = dyn_cast<ConstantSDNode>(TexFail.getNode());
+ if (!TexFailConst || TexFailConst->getZExtValue() != 0)
+ return Op;
+
+ SDValue GLC;
+ SDValue SLC;
+ if (BaseOpcode->Atomic) {
+ GLC = True; // TODO no-return optimization
+ if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC))
+ return Op;
+ } else {
+ if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC))
+ return Op;
+ }
+
+ SmallVector<SDValue, 14> Ops;
+ if (BaseOpcode->Store || BaseOpcode->Atomic)
+ Ops.push_back(VData); // vdata
+ Ops.push_back(VAddr);
+ Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc
+ if (BaseOpcode->Sampler)
+ Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler
+ Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
+ Ops.push_back(Unorm);
+ Ops.push_back(GLC);
+ Ops.push_back(SLC);
+ Ops.push_back(False); // r128
+ Ops.push_back(False); // tfe
+ Ops.push_back(False); // lwe
+ Ops.push_back(DimInfo->DA ? True : False);
+ if (BaseOpcode->HasD16)
+ Ops.push_back(IsD16 ? True : False);
+ if (isa<MemSDNode>(Op))
+ Ops.push_back(Op.getOperand(0)); // chain
+
+ int NumVAddrDwords = VAddr.getValueType().getSizeInBits() / 32;
+ int Opcode = -1;
+
+ if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
+ Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx8,
+ NumVDataDwords, NumVAddrDwords);
+ if (Opcode == -1)
+ Opcode = AMDGPU::getMIMGOpcode(Intr->BaseOpcode, AMDGPU::MIMGEncGfx6,
+ NumVDataDwords, NumVAddrDwords);
+ assert(Opcode != -1);
+
+ MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
+ if (auto MemOp = dyn_cast<MemSDNode>(Op)) {
+ MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
+ *MemRefs = MemOp->getMemOperand();
+ NewNode->setMemRefs(MemRefs, MemRefs + 1);
+ }
+
+ if (BaseOpcode->AtomicX2) {
+ SmallVector<SDValue, 1> Elt;
+ DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
+ return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
+ } else if (IsD16 && !BaseOpcode->Store) {
+ MVT LoadVT = Op.getSimpleValueType();
+ SDValue Adjusted = adjustLoadValueTypeImpl(
+ SDValue(NewNode, 0), LoadVT, DL, DAG, Subtarget->hasUnpackedD16VMem());
+ return DAG.getMergeValues({Adjusted, SDValue(NewNode, 1)}, DL);
+ }
+
+ return SDValue(NewNode, 0);
+}
+
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -4853,6 +5092,10 @@
return SDValue();
}
default:
+ if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
+ AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
+ return lowerImage(Op, ImageDimIntr, DAG);
+
return Op;
}
}
@@ -5134,15 +5377,9 @@
return SDValue();
}
default:
- if (Subtarget->hasUnpackedD16VMem() &&
- Op.getValueType().isVector() &&
- Op.getValueType().getScalarSizeInBits() == 16) {
- if (const AMDGPU::D16ImageDimIntrinsic *D16ImageDimIntr =
- AMDGPU::lookupD16ImageDimIntrinsic(IntrID)) {
- return adjustLoadValueType(D16ImageDimIntr->D16HelperIntr,
- cast<MemSDNode>(Op), DAG, true);
- }
- }
+ if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
+ AMDGPU::getImageDimIntrinsicInfo(IntrID))
+ return lowerImage(Op, ImageDimIntr, DAG);
return SDValue();
}
@@ -5392,25 +5629,9 @@
return SDValue();
}
default: {
- const AMDGPU::D16ImageDimIntrinsic *D16ImageDimIntr =
- AMDGPU::lookupD16ImageDimIntrinsic(IntrinsicID);
- if (D16ImageDimIntr) {
- SDValue VData = Op.getOperand(2);
- EVT StoreVT = VData.getValueType();
- if (Subtarget->hasUnpackedD16VMem() &&
- StoreVT.isVector() &&
- StoreVT.getScalarSizeInBits() == 16) {
- SmallVector<SDValue, 12> Ops(Op.getNode()->op_values());
-
- Ops[1] = DAG.getConstant(D16ImageDimIntr->D16HelperIntr, DL, MVT::i32);
- Ops[2] = handleD16VData(VData, DAG);
-
- MemSDNode *M = cast<MemSDNode>(Op);
- return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Op->getVTList(),
- Ops, M->getMemoryVT(),
- M->getMemOperand());
- }
- }
+ if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
+ AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
+ return lowerImage(Op, ImageDimIntr, DAG);
return Op;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index b454681..5851adb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -42,6 +42,8 @@
SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
MVT VT, unsigned Offset) const;
+ SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
+ SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 739cc91..f7bd27a 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -107,9 +107,18 @@
uint8_t VAddrDwords;
};
+#define GET_MIMGBaseOpcodesTable_IMPL
+#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
#include "AMDGPUGenSearchableTables.inc"
+int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
+ unsigned VDataDwords, unsigned VAddrDwords) {
+ const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
+ VDataDwords, VAddrDwords);
+ return Info ? Info->Opcode : -1;
+}
+
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
const MIMGInfo *NewInfo =
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 31bf7fa..a59571c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -39,6 +39,7 @@
namespace AMDGPU {
#define GET_MIMGBaseOpcode_DECL
+#define GET_MIMGDim_DECL
#define GET_MIMGEncoding_DECL
#include "AMDGPUGenSearchableTables.inc"
@@ -162,6 +163,37 @@
LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+struct MIMGBaseOpcodeInfo {
+ MIMGBaseOpcode BaseOpcode;
+ bool Store;
+ bool Atomic;
+ bool AtomicX2;
+ bool Sampler;
+
+ uint8_t NumExtraArgs;
+ bool Gradients;
+ bool Coordinates;
+ bool LodOrClampOrMip;
+ bool HasD16;
+};
+
+LLVM_READONLY
+const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
+
+struct MIMGDimInfo {
+ MIMGDim Dim;
+ uint8_t NumCoords;
+ uint8_t NumGradients;
+ bool DA;
+};
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
+
+LLVM_READONLY
+int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
+ unsigned VDataDwords, unsigned VAddrDwords);
+
LLVM_READONLY
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);