AMDGPU/GlobalISel: Select flat loads
Now that the patterns use the new PatFrag address space support, the
only blocker to importing most load patterns is the addressing mode
complex patterns.
llvm-svn: 366237
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 6f725d6..1ccb90b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -50,6 +50,14 @@
GIComplexOperandMatcher<s64, "selectSmrdSgpr">,
GIComplexPatternEquiv<SMRDSgpr>;
+def gi_flat_offset :
+ GIComplexOperandMatcher<s64, "selectFlatOffset">,
+ GIComplexPatternEquiv<FLATOffset>;
+def gi_flat_offset_signed :
+ GIComplexOperandMatcher<s64, "selectFlatOffsetSigned">,
+ GIComplexPatternEquiv<FLATOffsetSigned>;
+
+
class GISelSop2Pat <
SDPatternOperator node,
Instruction inst,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 14ae629..39016ed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2937,18 +2937,11 @@
SDValue N1 = Addr.getOperand(1);
int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
- if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
- if ((IsSigned && isInt<12>(COffsetVal)) ||
- (!IsSigned && isUInt<11>(COffsetVal))) {
- Addr = N0;
- OffsetVal = COffsetVal;
- }
- } else {
- if ((IsSigned && isInt<13>(COffsetVal)) ||
- (!IsSigned && isUInt<12>(COffsetVal))) {
- Addr = N0;
- OffsetVal = COffsetVal;
- }
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(),
+ IsSigned)) {
+ Addr = N0;
+ OffsetVal = COffsetVal;
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index aa634e8..f8f8959 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1239,47 +1239,9 @@
}
bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- MachineFunction *MF = BB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- const DebugLoc &DL = I.getDebugLoc();
- Register DstReg = I.getOperand(0).getReg();
- Register PtrReg = I.getOperand(1).getReg();
- unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
- unsigned Opcode;
-
- if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) {
- LLVM_DEBUG(dbgs() << "Unhandled address space\n");
- return false;
- }
-
- SmallVector<GEPInfo, 4> AddrInfo;
-
- getAddrModeInfo(I, MRI, AddrInfo);
-
- switch (LoadSize) {
- case 32:
- Opcode = AMDGPU::FLAT_LOAD_DWORD;
- break;
- case 64:
- Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
- break;
- default:
- LLVM_DEBUG(dbgs() << "Unhandled load size\n");
- return false;
- }
-
- MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
- .add(I.getOperand(0))
- .addReg(PtrReg)
- .addImm(0) // offset
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0); // dlc
-
- bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
- I.eraseFromParent();
- return Ret;
+ // TODO: Can/should we insert m0 initialization here for DS instructions and
+ // call the normal selector?
+ return false;
}
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
@@ -1397,9 +1359,7 @@
return true;
return selectImpl(I, CoverageInfo);
case TargetOpcode::G_LOAD:
- if (selectImpl(I, CoverageInfo))
- return true;
- return selectG_LOAD(I);
+ return selectImpl(I, CoverageInfo);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
case TargetOpcode::G_STORE:
@@ -1584,3 +1544,51 @@
[=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
}};
}
+
+ template <bool Signed>
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ InstructionSelector::ComplexRendererFns Default = {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
+ }};
+
+ if (!STI.hasFlatInstOffsets())
+ return Default;
+
+ const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
+ if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
+ return Default;
+
+ Optional<int64_t> Offset =
+ getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
+ if (!Offset.hasValue())
+ return Default;
+
+ unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
+ if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
+ return Default;
+
+ Register BasePtr = OpDef->getOperand(1).getReg();
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
+ return selectFlatOffsetImpl<false>(Root);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
+ return selectFlatOffsetImpl<true>(Root);
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 1027a0b..e30d745 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -119,6 +119,15 @@
InstructionSelector::ComplexRendererFns
selectSmrdSgpr(MachineOperand &Root) const;
+ template <bool Signed>
+ InstructionSelector::ComplexRendererFns
+ selectFlatOffsetImpl(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectFlatOffset(MachineOperand &Root) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectFlatOffsetSigned(MachineOperand &Root) const;
+
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 3cf4fbc..670f622 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -497,6 +497,9 @@
.custom();
}
+ // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
+ // handle some operations by just promoting the register during
+ // selection. There are also d16 loads on GFX9+ which preserve the high bits.
getActionDefinitionsBuilder({G_LOAD, G_STORE})
.narrowScalarIf([](const LegalityQuery &Query) {
unsigned Size = Query.Types[0].getSizeInBits();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3474185..ba8ed69 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6118,6 +6118,25 @@
return RCID == AMDGPU::SReg_128RegClassID;
}
+bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
+ bool Signed) const {
+ // TODO: Should 0 be special cased?
+ if (!ST.hasFlatInstOffsets())
+ return false;
+
+ if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS)
+ return false;
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
+ return (Signed && isInt<12>(Offset)) ||
+ (!Signed && isUInt<11>(Offset));
+ }
+
+ return (Signed && isInt<13>(Offset)) ||
+ (!Signed && isUInt<12>(Offset));
+}
+
+
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1f3c659..3ff35da 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -970,6 +970,12 @@
return isUInt<12>(Imm);
}
+ /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
+ /// encoded instruction. If \p Signed, this is for an instruction that
+ /// interprets the offset as signed.
+ bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
+ bool Signed) const;
+
/// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.