[AMDGPU][MC] Added support of lds_direct operand
See bug 39293: https://bugs.llvm.org/show_bug.cgi?id=39293
Reviewers: artem.tamazov, rampitec
Differential Revision: https://reviews.llvm.org/D57889
llvm-svn: 353524
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index ba1d6ee..60eef3f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -686,6 +686,9 @@
case AMDGPU::XNACK_MASK_HI:
llvm_unreachable("xnack_mask registers should not be used");
+ case AMDGPU::LDS_DIRECT:
+ llvm_unreachable("lds_direct register should not be used");
+
case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 89c0d48..0f0731b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1095,6 +1095,7 @@
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMIMGDataSize(const MCInst &Inst);
bool validateMIMGD16(const MCInst &Inst);
+ bool validateLdsDirect(const MCInst &Inst);
bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
@@ -1599,6 +1600,8 @@
.Case("vcc", AMDGPU::VCC)
.Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("xnack_mask", AMDGPU::XNACK_MASK)
+ .Case("lds_direct", AMDGPU::LDS_DIRECT)
+ .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
.Case("m0", AMDGPU::M0)
.Case("scc", AMDGPU::SCC)
.Case("tba", AMDGPU::TBA)
@@ -2465,6 +2468,86 @@
return true;
}
+bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
+
+ using namespace SIInstrFlags;
+ const unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opcode);
+
+ // lds_direct register is defined so that it can be used
+ // with 9-bit operands only. Ignore encodings which do not accept these.
+ if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
+ return true;
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ const int SrcIndices[] = { Src1Idx, Src2Idx };
+
+ // lds_direct cannot be specified as either src1 or src2.
+ for (int SrcIdx : SrcIndices) {
+ if (SrcIdx == -1) break;
+ const MCOperand &Src = Inst.getOperand(SrcIdx);
+ if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
+ return false;
+ }
+ }
+
+ if (Src0Idx == -1)
+ return true;
+
+ const MCOperand &Src = Inst.getOperand(Src0Idx);
+ if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
+ return true;
+
+ // lds_direct is specified as src0. Check additional limitations.
+
+ // FIXME: This is a workaround for bug 37943
+ // which allows 64-bit VOP3 opcodes use 32-bit operands.
+ if (AMDGPU::getRegOperandSize(getMRI(), Desc, Src0Idx) != 4)
+ return false;
+
+ // Documentation does not disable lds_direct for SDWA, but SP3 assembler does.
+ // FIXME: This inconsistence needs to be investigated further.
+ if (Desc.TSFlags & SIInstrFlags::SDWA)
+ return false;
+
+ // The following opcodes do not accept lds_direct which is explicitly stated
+ // in AMD documentation. However SP3 disables lds_direct for most other 'rev'
+ // opcodes as well (e.g. for v_subrev_u32 but not for v_subrev_f32).
+ // FIXME: This inconsistence needs to be investigated further.
+ switch (Opcode) {
+ case AMDGPU::V_LSHLREV_B32_e32_si:
+ case AMDGPU::V_LSHLREV_B32_e64_si:
+ case AMDGPU::V_LSHLREV_B16_e32_vi:
+ case AMDGPU::V_LSHLREV_B16_e64_vi:
+ case AMDGPU::V_LSHLREV_B32_e32_vi:
+ case AMDGPU::V_LSHLREV_B32_e64_vi:
+ case AMDGPU::V_LSHLREV_B64_vi:
+ case AMDGPU::V_LSHRREV_B32_e32_si:
+ case AMDGPU::V_LSHRREV_B32_e64_si:
+ case AMDGPU::V_LSHRREV_B16_e32_vi:
+ case AMDGPU::V_LSHRREV_B16_e64_vi:
+ case AMDGPU::V_LSHRREV_B32_e32_vi:
+ case AMDGPU::V_LSHRREV_B32_e64_vi:
+ case AMDGPU::V_LSHRREV_B64_vi:
+ case AMDGPU::V_ASHRREV_I32_e64_si:
+ case AMDGPU::V_ASHRREV_I32_e32_si:
+ case AMDGPU::V_ASHRREV_I16_e32_vi:
+ case AMDGPU::V_ASHRREV_I16_e64_vi:
+ case AMDGPU::V_ASHRREV_I32_e32_vi:
+ case AMDGPU::V_ASHRREV_I32_e64_vi:
+ case AMDGPU::V_ASHRREV_I64_vi:
+ case AMDGPU::V_PK_LSHLREV_B16_vi:
+ case AMDGPU::V_PK_LSHRREV_B16_vi:
+ case AMDGPU::V_PK_ASHRREV_I16_vi:
+ return false;
+ default:
+ return true;
+ }
+}
+
bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
@@ -2500,6 +2583,11 @@
bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
const SMLoc &IDLoc) {
+ if (!validateLdsDirect(Inst)) {
+ Error(IDLoc,
+ "invalid use of lds_direct");
+ return false;
+ }
if (!validateSOPLiteral(Inst)) {
Error(IDLoc,
"only one literal operand is allowed");
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 8cdc06c..9ae1bcd 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -781,6 +781,7 @@
// ToDo: no support for execz register
case 252: break;
case 253: return createRegOperand(SCC);
+ case 254: return createRegOperand(LDS_DIRECT);
default: break;
}
return errOperand(Val, "unknown operand encoding " + Twine(Val));
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 4a430ba..3871cfd 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -268,6 +268,9 @@
case AMDGPU::XNACK_MASK:
O << "xnack_mask";
return;
+ case AMDGPU::LDS_DIRECT:
+ O << "src_lds_direct";
+ return;
case AMDGPU::VCC_LO:
O << "vcc_lo";
return;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index fdaf126..fdbafd9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -163,6 +163,9 @@
// Reserve xnack_mask registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
+ // Reserve lds_direct register - support is not implemented in Codegen.
+ reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
+
// Reserve Trap Handler registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::TBA);
reserveRegisterTuples(Reserved, AMDGPU::TMA);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 92022d5..84751d1 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -75,6 +75,8 @@
def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
+def LDS_DIRECT : SIReg <"lds_direct", 254>;
+
def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>;
def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>;
@@ -409,6 +411,12 @@
let CopyCost = -1;
}
+def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+ (add LDS_DIRECT)> {
+ let isAllocatable = 0;
+ let CopyCost = -1;
+}
+
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
@@ -545,7 +553,7 @@
}
def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
- (add VGPR_32, SReg_32)> {
+ (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
}