AMDGPU: Turn D16 for MIMG instructions into a regular operand Summary: This allows us to reduce the number of different machine instruction opcodes, which reduces the table sizes and helps flatten the TableGen multiclass hierarchies. We can do this because for each hardware MIMG opcode, we have a full set of IMAGE_xxx_Vn_Vm machine instructions for all required sizes of vdata and vaddr registers. Instead of having separate D16 machine instructions, a packed D16 instructions loading e.g. 4 components can simply use the same V2 opcode variant that non-D16 instructions use. We still require a TSFlag for D16 buffer instructions, because the D16-ness of buffer instructions is part of the opcode. Renaming the flag should help avoid future confusion. The one non-obvious code change is that for gather4 instructions, the disassembler can no longer automatically decide whether to use a V2 or a V4 variant. The existing logic which choose the correct variant for other MIMG instruction is extended to cover gather4 as well. As a bonus, some of the assembler error messages are now more helpful (e.g., complaining about a wrong data size instead of a non-existing instruction). While we're at it, delete a whole bunch of dead legacy TableGen code. Change-Id: I89b02c2841c06f95e662541433e597f5d4553978 Reviewers: arsenm, rampitec, kzhuravl, artem.tamazov, dp, rtaylor Subscribers: wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D47434 llvm-svn: 335222

commit: f2674319018151e00f603ebab34ab2155f669394 [log] [tgz]
author: Nicolai Haehnle <nhaehnle@gmail.com> Thu Jun 21 13:36:01 2018 +0000
committer: Nicolai Haehnle <nhaehnle@gmail.com> Thu Jun 21 13:36:01 2018 +0000
tree: 587f6a59416cb5c38666b4365f2c20578eb0eb05
parent: 7d69e0f37d2829e720662d6398a956c6b2e6048d [diff] [blame]
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 18cc67f..57c552d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

@@ -2301,10 +2301,6 @@
   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
     return true;
 
-  // Gather4 instructions do not need validation: dst size is hardcoded.
-  if (Desc.TSFlags & SIInstrFlags::Gather4)
-    return true;
-
   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
@@ -2319,9 +2315,12 @@
   if (DMask == 0)
     DMask = 1;
 
-  unsigned DataSize = countPopulation(DMask);
-  if ((Desc.TSFlags & SIInstrFlags::D16) != 0 && hasPackedD16()) {
-    DataSize = (DataSize + 1) / 2;
+  unsigned DataSize =
+    (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
+  if (hasPackedD16()) {
+    int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
+    if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
+      DataSize = (DataSize + 1) / 2;
   }
 
   return (VDataSize / 4) == DataSize + TFESize;
@@ -2389,10 +2388,14 @@
 
   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
     return true;
-  if ((Desc.TSFlags & SIInstrFlags::D16) == 0)
-    return true;
 
-  return !isCI() && !isSI();
+  int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
+  if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
+    if (isCI() || isSI())
+      return false;
+  }
+
+  return true;
 }
 
 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
@@ -4261,6 +4264,7 @@
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
 }
 
 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
@@ -4287,6 +4291,10 @@
   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
 }
 
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultD16() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyD16);
+}
+
 //===----------------------------------------------------------------------===//
 // smrd
 //===----------------------------------------------------------------------===//
@@ -4389,6 +4397,7 @@
   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
   {"r128",    AMDGPUOperand::ImmTyR128,  true, nullptr},
   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
+  {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
@@ -5094,8 +5103,6 @@
     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
   case MCK_glc:
     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
-  case MCK_d16:
-    return Operand.isD16() ? Match_Success : Match_InvalidOperand;
   case MCK_idxen:
     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
   case MCK_offen:
commit	f2674319018151e00f603ebab34ab2155f669394	[log] [tgz]
author	Nicolai Haehnle <nhaehnle@gmail.com>	Thu Jun 21 13:36:01 2018 +0000
committer	Nicolai Haehnle <nhaehnle@gmail.com>	Thu Jun 21 13:36:01 2018 +0000
tree	587f6a59416cb5c38666b4365f2c20578eb0eb05
parent	7d69e0f37d2829e720662d6398a956c6b2e6048d [diff] [blame]