[AMDGPU] New tbuffer intrinsics
Summary:
This commit adds new intrinsics
llvm.amdgcn.raw.tbuffer.load
llvm.amdgcn.struct.tbuffer.load
llvm.amdgcn.raw.tbuffer.store
llvm.amdgcn.struct.tbuffer.store
with the following changes from the llvm.amdgcn.tbuffer.* intrinsics:
* there are separate raw and struct versions: raw does not have an index
arg and sets idxen=0 in the instruction, and struct always sets
idxen=1 in the instruction even if the index is 0, to allow for the
fact that gfx9 does bounds checking differently depending on whether
idxen is set;
* there is a combined format arg (dfmt+nfmt)
* there is a combined cachepolicy arg (glc+slc)
* there are now only two offset args: one for the offset that is
included in bounds checking and swizzling, to be split between the
instruction's voffset and immoffset fields, and one for the offset
that is excluded from bounds checking and swizzling, to go into the
instruction's soffset field.
The AMDISD::TBUFFER_* SD nodes always have an index operand, all three
offset operands, combined format operand, combined cachepolicy operand,
and an extra idxen operand.
The tbuffer pseudo- and real instructions now also have a combined
format operand.
The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store
intrinsics continue to work.
V2: Separate raw and struct intrinsics.
V3: Moved extract_glc and extract_slc defs to a more sensible place.
V4: Rebased on D49995.
V5: Only two separate offset args instead of three.
V6: Pseudo- and real instructions have joint format operand.
V7: Restored optionality of dfmt and nfmt in assembler.
V8: Addressed minor review comments.
Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D49026
Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4
llvm-svn: 340268
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index b87c47a..0e14760 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -100,15 +100,11 @@
bits<1> has_vaddr = 1;
bits<1> has_glc = 1;
bits<1> glc_value = 0; // the value for glc if no such operand
- bits<4> dfmt_value = 1; // the value for dfmt if no such operand
- bits<3> nfmt_value = 0; // the value for nfmt if no such operand
bits<1> has_srsrc = 1;
bits<1> has_soffset = 1;
bits<1> has_offset = 1;
bits<1> has_slc = 1;
bits<1> has_tfe = 1;
- bits<1> has_dfmt = 1;
- bits<1> has_nfmt = 1;
}
class MTBUF_Real <MTBUF_Pseudo ps> :
@@ -126,14 +122,16 @@
bits<12> offset;
bits<1> glc;
- bits<4> dfmt;
- bits<3> nfmt;
+ bits<7> format;
bits<8> vaddr;
bits<8> vdata;
bits<7> srsrc;
bits<1> slc;
bits<1> tfe;
bits<8> soffset;
+
+ bits<4> dfmt = format{3-0};
+ bits<3> nfmt = format{6-4};
}
class getMTBUFInsDA<list<RegisterClass> vdataList,
@@ -142,16 +140,16 @@
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, SLC:$slc, TFE:$tfe),
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc, SLC:$slc, TFE:$tfe)
+ offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe)
);
dag InsData = !if(!empty(vaddrList),
(ins vdataClass:$vdata, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
SLC:$slc, TFE:$tfe),
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, DFMT:$dfmt, NFMT:$nfmt, GLC:$glc,
+ SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
SLC:$slc, TFE:$tfe)
);
dag ret = !if(!empty(vdataList), InsNoData, InsData);
@@ -169,15 +167,15 @@
class getMTBUFAsmOps<int addrKind> {
string Pfx =
- !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $dfmt, $nfmt, $soffset",
+ !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $format, $soffset",
!if(!eq(addrKind, BUFAddrKind.OffEn),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset offen",
+ "$vaddr, $srsrc, $format, $soffset offen",
!if(!eq(addrKind, BUFAddrKind.IdxEn),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen",
+ "$vaddr, $srsrc, $format, $soffset idxen",
!if(!eq(addrKind, BUFAddrKind.BothEn),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset idxen offen",
+ "$vaddr, $srsrc, $format, $soffset idxen offen",
!if(!eq(addrKind, BUFAddrKind.Addr64),
- "$vaddr, $srsrc, $dfmt, $nfmt, $soffset addr64",
+ "$vaddr, $srsrc, $format, $soffset addr64",
"")))));
string ret = Pfx # "$offset";
}
@@ -217,14 +215,14 @@
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set load_vt:$vdata,
- (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$dfmt,
- i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
+ i1:$glc, i1:$slc, i1:$tfe)))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set load_vt:$vdata,
(ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
- i8:$dfmt, i8:$nfmt, i1:$glc, i1:$slc, i1:$tfe)))]>,
+ i8:$format, i1:$glc, i1:$slc, i1:$tfe)))]>,
MTBUFAddr64Table<1, NAME>;
def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
@@ -263,13 +261,13 @@
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i16:$offset, i8:$format, i1:$glc,
i1:$slc, i1:$tfe))]>,
MTBUFAddr64Table<0, NAME>;
def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, i8:$dfmt, i8:$nfmt, i1:$glc,
+ i16:$offset, i8:$format, i1:$glc,
i1:$slc, i1:$tfe))]>,
MTBUFAddr64Table<1, NAME>;
@@ -1030,6 +1028,14 @@
// MUBUF Patterns
//===----------------------------------------------------------------------===//
+def extract_glc : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
+}]>;
+
+def extract_slc : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
+}]>;
+
//===----------------------------------------------------------------------===//
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
@@ -1524,32 +1530,36 @@
string opcode> {
def : GCNPat<
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)),
+ imm:$format, imm:$cachepolicy, imm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
@@ -1576,39 +1586,36 @@
string opcode> {
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $dfmt),
- (as_i8imm $nfmt), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $dfmt),
- (as_i8imm $nfmt), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$format, imm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
- (as_i16imm $offset), (as_i8imm $dfmt),
- (as_i8imm $nfmt), (as_i1imm $glc),
- (as_i1imm $slc), 0)
+ (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
- imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc),
+ imm:$offset, imm:$format, imm:$cachepolicy, imm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
- $rsrc, $soffset, (as_i16imm $offset),
- (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0)
+ $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
+ (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0)
>;
}
@@ -1781,8 +1788,8 @@
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{15} = ps.addr64;
let Inst{18-16} = op;
- let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
- let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -1811,6 +1818,7 @@
//===----------------------------------------------------------------------===//
// CI
+// MTBUF - GFX6, GFX7.
//===----------------------------------------------------------------------===//
class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> :
@@ -2013,8 +2021,8 @@
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{18-15} = op;
- let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
- let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
@@ -2043,8 +2051,8 @@
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{18-15} = op;
- let Inst{22-19} = !if(ps.has_dfmt, dfmt, ps.dfmt_value);
- let Inst{25-23} = !if(ps.has_nfmt, nfmt, ps.nfmt_value);
+ let Inst{22-19} = dfmt;
+ let Inst{25-23} = nfmt;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata, ?);