blob: a46cbf3e624e2446449fae3fabda09b5b61f9305 [file] [log] [blame]
Valery Pykhtin1b138862016-09-01 09:56:47 +00001//===---- SMInstructions.td - Scalar Memory Instruction Defintions --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10def smrd_offset : NamedOperandU32<"SMRDOffset",
11 NamedMatchClass<"SMRDOffset">> {
12 let OperandType = "OPERAND_IMMEDIATE";
13}
14
15
16//===----------------------------------------------------------------------===//
17// Scalar Memory classes
18//===----------------------------------------------------------------------===//
19
20class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
21 InstSI <outs, ins, "", pattern>,
22 SIMCInstr<opName, SIEncodingFamily.NONE> {
23 let isPseudo = 1;
24 let isCodeGenOnly = 1;
25
26 let LGKM_CNT = 1;
27 let SMRD = 1;
28 let mayStore = 0;
29 let mayLoad = 1;
30 let hasSideEffects = 0;
31 let UseNamedOperandTable = 1;
32 let SchedRW = [WriteSMEM];
33 let SubtargetPredicate = isGCN;
34
35 string Mnemonic = opName;
36 string AsmOperands = asmOps;
37
38 bits<1> has_sbase = 1;
39 bits<1> has_sdst = 1;
40 bits<1> has_offset = 1;
41 bits<1> offset_is_imm = 0;
42}
43
44class SM_Real <SM_Pseudo ps>
45 : InstSI<ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
46
47 let isPseudo = 0;
48 let isCodeGenOnly = 0;
49
50 // copy relevant pseudo op flags
51 let SubtargetPredicate = ps.SubtargetPredicate;
52 let AsmMatchConverter = ps.AsmMatchConverter;
53
54 // encoding
55 bits<7> sbase;
56 bits<7> sdst;
57 bits<32> offset;
58 bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
59}
60
61class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]>
62 : SM_Pseudo<opName, outs, ins, asmOps, pattern> {
63 RegisterClass BaseClass;
64}
65
66multiclass SM_Pseudo_Loads<string opName,
67 RegisterClass baseClass,
68 RegisterClass dstClass> {
69 def _IMM : SM_Load_Pseudo <opName,
70 (outs dstClass:$sdst),
71 (ins baseClass:$sbase, i32imm:$offset),
72 " $sdst, $sbase, $offset", []> {
73 let offset_is_imm = 1;
74 let BaseClass = baseClass;
75 let PseudoInstr = opName # "_IMM";
76 }
77 def _SGPR : SM_Load_Pseudo <opName,
78 (outs dstClass:$sdst),
79 (ins baseClass:$sbase, SReg_32:$soff),
80 " $sdst, $sbase, $offset", []> {
81 let BaseClass = baseClass;
82 let PseudoInstr = opName # "_SGPR";
83 }
84}
85
86class SM_Time_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
87 opName, (outs SReg_64:$sdst), (ins),
88 " $sdst", [(set i64:$sdst, (node))]> {
89 let hasSideEffects = 1;
90 // FIXME: mayStore = ? is a workaround for tablegen bug for different
91 // inferred mayStore flags for the instruction pattern vs. standalone
92 // Pat. Each considers the other contradictory.
93 let mayStore = ?;
94 let mayLoad = ?;
95 let has_sbase = 0;
96 let has_offset = 0;
97}
98
99class SM_Inval_Pseudo <string opName, SDPatternOperator node> : SM_Pseudo<
100 opName, (outs), (ins), "", [(node)]> {
101 let hasSideEffects = 1;
102 let mayStore = 1;
103 let has_sdst = 0;
104 let has_sbase = 0;
105 let has_offset = 0;
106}
107
108
109//===----------------------------------------------------------------------===//
110// Scalar Memory Instructions
111//===----------------------------------------------------------------------===//
112
113// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
114// SMRD instructions, because the SReg_32_XM0 register class does not include M0
115// and writing to M0 from an SMRD instruction will hang the GPU.
116defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0>;
117defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64>;
118defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>;
119defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>;
120defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>;
121
122defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <
123 "s_buffer_load_dword", SReg_128, SReg_32_XM0
124>;
125
126defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <
127 "s_buffer_load_dwordx2", SReg_128, SReg_64
128>;
129
130defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <
131 "s_buffer_load_dwordx4", SReg_128, SReg_128
132>;
133
134defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <
135 "s_buffer_load_dwordx8", SReg_128, SReg_256
136>;
137
138defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
139 "s_buffer_load_dwordx16", SReg_128, SReg_512
140>;
141
142def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
143def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
144
145let SubtargetPredicate = isCIVI in {
146def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
147} // let SubtargetPredicate = isCIVI
148
149let SubtargetPredicate = isVI in {
150def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
151def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
152def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
153} // SubtargetPredicate = isVI
154
155
156
157//===----------------------------------------------------------------------===//
158// Scalar Memory Patterns
159//===----------------------------------------------------------------------===//
160
161def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
162 auto Ld = cast<LoadSDNode>(N);
163 return Ld->getAlignment() >= 4 &&
164 Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
165 static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N);
166}]>;
167
168def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
169def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
170def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
171def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
172def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
173def SMRDBufferSgpr : ComplexPattern<i32, 1, "SelectSMRDBufferSgpr">;
174
175let Predicates = [isGCN] in {
176
177multiclass SMRD_Pattern <string Instr, ValueType vt> {
178
179 // 1. IMM offset
180 def : Pat <
181 (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
182 (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset))
183 >;
184
185 // 2. SGPR offset
186 def : Pat <
187 (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
188 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset))
189 >;
190}
191
192let Predicates = [isSICI] in {
193def : Pat <
194 (i64 (readcyclecounter)),
195 (S_MEMTIME)
196>;
197}
198
199// Global and constant loads can be selected to either MUBUF or SMRD
200// instructions, but SMRD instructions are faster so we want the instruction
201// selector to prefer those.
202let AddedComplexity = 100 in {
203
204defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
205defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
206defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
207defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
208defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
209
210// 1. Offset as an immediate
211def SM_LOAD_PATTERN : Pat < // name this pattern to reuse AddedComplexity on CI
212 (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
213 (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset)
214>;
215
216// 2. Offset loaded in an 32bit SGPR
217def : Pat <
218 (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)),
219 (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset)
220>;
221
222} // End let AddedComplexity = 100
223
224} // let Predicates = [isGCN]
225
226let Predicates = [isVI] in {
227
228// 1. Offset as 20bit DWORD immediate
229def : Pat <
230 (SIload_constant v4i32:$sbase, IMM20bit:$offset),
231 (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
232>;
233
234def : Pat <
235 (i64 (readcyclecounter)),
236 (S_MEMREALTIME)
237>;
238
239} // let Predicates = [isVI]
240
241
242//===----------------------------------------------------------------------===//
243// Targets
244//===----------------------------------------------------------------------===//
245
246//===----------------------------------------------------------------------===//
247// SI
248//===----------------------------------------------------------------------===//
249
250class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
251 : SM_Real<ps>
252 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
253 , Enc32 {
254
255 let AssemblerPredicates = [isSICI];
256 let DecoderNamespace = "SICI";
257
258 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
259 let Inst{8} = imm;
260 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
261 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
262 let Inst{26-22} = op;
263 let Inst{31-27} = 0x18; //encoding
264}
265
266multiclass SM_Real_Loads_si<bits<5> op, string ps,
267 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
268 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
269 def _IMM_si : SMRD_Real_si <op, immPs> {
270 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset);
271 }
272 def _SGPR_si : SMRD_Real_si <op, sgprPs> {
273 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset);
274 }
275}
276
277defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">;
278defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01, "S_LOAD_DWORDX2">;
279defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02, "S_LOAD_DWORDX4">;
280defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03, "S_LOAD_DWORDX8">;
281defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04, "S_LOAD_DWORDX16">;
282defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08, "S_BUFFER_LOAD_DWORD">;
283defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09, "S_BUFFER_LOAD_DWORDX2">;
284defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a, "S_BUFFER_LOAD_DWORDX4">;
285defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b, "S_BUFFER_LOAD_DWORDX8">;
286defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c, "S_BUFFER_LOAD_DWORDX16">;
287
288def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>;
289def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
290
291
292//===----------------------------------------------------------------------===//
293// VI
294//===----------------------------------------------------------------------===//
295
296class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
297 : SM_Real<ps>
298 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
299 , Enc64 {
300
301 let AssemblerPredicates = [isVI];
302 let DecoderNamespace = "VI";
303
304 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
305 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
306
307 // glc is only applicable to scalar stores, which are not yet
308 // implemented.
309 let Inst{16} = 0; // glc bit
310 let Inst{17} = imm;
311 let Inst{25-18} = op;
312 let Inst{31-26} = 0x30; //encoding
313 let Inst{51-32} = !if(ps.has_offset, offset{19-0}, ?);
314}
315
316multiclass SM_Real_Loads_vi<bits<8> op, string ps,
317 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
318 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
319 def _IMM_vi : SMEM_Real_vi <op, immPs> {
320 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset:$offset);
321 }
322 def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
323 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset);
324 }
325}
326
327defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">;
328defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01, "S_LOAD_DWORDX2">;
329defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02, "S_LOAD_DWORDX4">;
330defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03, "S_LOAD_DWORDX8">;
331defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04, "S_LOAD_DWORDX16">;
332defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08, "S_BUFFER_LOAD_DWORD">;
333defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09, "S_BUFFER_LOAD_DWORDX2">;
334defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">;
335defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">;
336defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">;
337
338def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>;
339def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>;
340def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>;
341def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>;
342def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>;
343def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>;
344
345
346//===----------------------------------------------------------------------===//
347// CI
348//===----------------------------------------------------------------------===//
349
350def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset",
351 NamedMatchClass<"SMRDLiteralOffset">> {
352 let OperandType = "OPERAND_IMMEDIATE";
353}
354
355class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
356 SM_Real<ps>,
357 Enc64 {
358
359 let AssemblerPredicates = [isCIOnly];
360 let DecoderNamespace = "CI";
361 let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset);
362
363 let LGKM_CNT = ps.LGKM_CNT;
364 let SMRD = ps.SMRD;
365 let mayLoad = ps.mayLoad;
366 let mayStore = ps.mayStore;
367 let hasSideEffects = ps.hasSideEffects;
368 let SchedRW = ps.SchedRW;
369 let UseNamedOperandTable = ps.UseNamedOperandTable;
370
371 let Inst{7-0} = 0xff;
372 let Inst{8} = 0;
373 let Inst{14-9} = sbase{6-1};
374 let Inst{21-15} = sdst{6-0};
375 let Inst{26-22} = op;
376 let Inst{31-27} = 0x18; //encoding
377 let Inst{63-32} = offset{31-0};
378}
379
380def S_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>;
381def S_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>;
382def S_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>;
383def S_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>;
384def S_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>;
385def S_BUFFER_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>;
386def S_BUFFER_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>;
387def S_BUFFER_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>;
388def S_BUFFER_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>;
389def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>;
390
391class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
392 : SM_Real<ps>
393 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
394 , Enc32 {
395
396 let AssemblerPredicates = [isCIOnly];
397 let DecoderNamespace = "CI";
398
399 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, ?);
400 let Inst{8} = imm;
401 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?);
402 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
403 let Inst{26-22} = op;
404 let Inst{31-27} = 0x18; //encoding
405}
406
407def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
408
409let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in {
410
411class SMRD_Pattern_ci <string Instr, ValueType vt> : Pat <
412 (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
413 (vt (!cast<SM_Pseudo>(Instr#"_IMM_ci") $sbase, $offset))> {
414 let Predicates = [isCIOnly];
415}
416
417def : SMRD_Pattern_ci <"S_LOAD_DWORD", i32>;
418def : SMRD_Pattern_ci <"S_LOAD_DWORDX2", v2i32>;
419def : SMRD_Pattern_ci <"S_LOAD_DWORDX4", v4i32>;
420def : SMRD_Pattern_ci <"S_LOAD_DWORDX8", v8i32>;
421def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>;
422
423def : Pat <
424 (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
425 (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset)> {
426 let Predicates = [isCI]; // should this be isCIOnly?
427}
428
429} // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity
430