blob: edca6fcd812c8f8f34f4e7bf7be5697321a9c57b [file] [log] [blame]
Valery Pykhtin8bc65962016-09-05 11:22:51 +00001//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +000010def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
11def FLATOffset : ComplexPattern<i64, 3, "SelectFlat", [], [], -10>;
Valery Pykhtin8bc65962016-09-05 11:22:51 +000012
13//===----------------------------------------------------------------------===//
14// FLAT classes
15//===----------------------------------------------------------------------===//
16
17class FLAT_Pseudo<string opName, dag outs, dag ins,
18 string asmOps, list<dag> pattern=[]> :
19 InstSI<outs, ins, "", pattern>,
20 SIMCInstr<opName, SIEncodingFamily.NONE> {
21
22 let isPseudo = 1;
23 let isCodeGenOnly = 1;
24
25 let SubtargetPredicate = isCIVI;
26
27 let FLAT = 1;
28 // Internally, FLAT instruction are executed as both an LDS and a
29 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
30 // and are not considered done until both have been decremented.
31 let VM_CNT = 1;
32 let LGKM_CNT = 1;
33
Valery Pykhtin8bc65962016-09-05 11:22:51 +000034 let UseNamedOperandTable = 1;
35 let hasSideEffects = 0;
36 let SchedRW = [WriteVMEM];
37
38 string Mnemonic = opName;
39 string AsmOperands = asmOps;
40
Matt Arsenault9698f1c2017-06-20 19:54:14 +000041 bits<1> is_flat_global = 0;
42 bits<1> is_flat_scratch = 0;
43
Valery Pykhtin8bc65962016-09-05 11:22:51 +000044 bits<1> has_vdst = 1;
45 bits<1> has_data = 1;
46 bits<1> has_glc = 1;
47 bits<1> glcValue = 0;
Matt Arsenault9698f1c2017-06-20 19:54:14 +000048
49 // TODO: M0 if it could possibly access LDS (before gfx9? only)?
50 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
Valery Pykhtin8bc65962016-09-05 11:22:51 +000051}
52
53class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
54 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
55 Enc64 {
56
57 let isPseudo = 0;
58 let isCodeGenOnly = 0;
59
60 // copy relevant pseudo op flags
61 let SubtargetPredicate = ps.SubtargetPredicate;
62 let AsmMatchConverter = ps.AsmMatchConverter;
Matt Arsenaultfd023142017-06-12 15:55:58 +000063 let TSFlags = ps.TSFlags;
64 let UseNamedOperandTable = ps.UseNamedOperandTable;
Valery Pykhtin8bc65962016-09-05 11:22:51 +000065
66 // encoding fields
Matt Arsenault97279a82016-11-29 19:30:44 +000067 bits<8> vaddr;
68 bits<8> vdata;
Valery Pykhtin8bc65962016-09-05 11:22:51 +000069 bits<8> vdst;
70 bits<1> slc;
71 bits<1> glc;
Matt Arsenault47ccafe2017-05-11 17:38:33 +000072
Matt Arsenaultfd023142017-06-12 15:55:58 +000073 // Only valid on gfx9
74 bits<1> lds = 0; // XXX - What does this actually do?
Matt Arsenault9698f1c2017-06-20 19:54:14 +000075
76 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
77 bits<2> seg = !if(ps.is_flat_global, 0b10,
78 !if(ps.is_flat_scratch, 0b01, 0));
Matt Arsenaultfd023142017-06-12 15:55:58 +000079
80 // Signed offset. Highest bit ignored for flat and treated as 12-bit
81 // unsigned for flat acceses.
82 bits<13> offset;
83 bits<1> nv = 0; // XXX - What does this actually do?
84
Matt Arsenault47ccafe2017-05-11 17:38:33 +000085 // We don't use tfe right now, and it was removed in gfx9.
86 bits<1> tfe = 0;
Valery Pykhtin8bc65962016-09-05 11:22:51 +000087
Matt Arsenaultfd023142017-06-12 15:55:58 +000088 // Only valid on GFX9+
89 let Inst{12-0} = offset;
90 let Inst{13} = lds;
Matt Arsenault9698f1c2017-06-20 19:54:14 +000091 let Inst{15-14} = seg;
Matt Arsenaultfd023142017-06-12 15:55:58 +000092
Valery Pykhtin8bc65962016-09-05 11:22:51 +000093 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
94 let Inst{17} = slc;
95 let Inst{24-18} = op;
96 let Inst{31-26} = 0x37; // Encoding.
Matt Arsenault97279a82016-11-29 19:30:44 +000097 let Inst{39-32} = vaddr;
98 let Inst{47-40} = !if(ps.has_data, vdata, ?);
Valery Pykhtin8bc65962016-09-05 11:22:51 +000099 // 54-48 is reserved.
Matt Arsenaultfd023142017-06-12 15:55:58 +0000100 let Inst{55} = nv; // nv on GFX9+, TFE before.
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000101 let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
102}
103
Matt Arsenaultfd023142017-06-12 15:55:58 +0000104class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
105 bit HasSignedOffset = 0> : FLAT_Pseudo<
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000106 opName,
107 (outs regClass:$vdst),
Matt Arsenaultfd023142017-06-12 15:55:58 +0000108 !if(HasSignedOffset,
109 (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc),
110 (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)),
111 " $vdst, $vaddr$offset$glc$slc"> {
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000112 let has_data = 0;
113 let mayLoad = 1;
114}
115
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000116class FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass> :
117 FLAT_Load_Pseudo<opName, regClass, 1> {
118 let is_flat_global = 1;
119}
120
121class FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> :
122 FLAT_Load_Pseudo<opName, regClass, 1> {
123 let is_flat_scratch = 1;
124}
125
Matt Arsenaultfd023142017-06-12 15:55:58 +0000126class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
127 bit HasSignedOffset = 0> : FLAT_Pseudo<
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000128 opName,
129 (outs),
Matt Arsenaultfd023142017-06-12 15:55:58 +0000130 !if(HasSignedOffset,
131 (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc),
132 (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)),
133 " $vaddr, $vdata$offset$glc$slc"> {
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000134 let mayLoad = 0;
135 let mayStore = 1;
136 let has_vdst = 0;
137}
138
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000139class FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> :
140 FLAT_Store_Pseudo<opName, regClass, 1> {
141 let is_flat_global = 1;
142}
143
144class FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> :
145 FLAT_Store_Pseudo<opName, regClass, 1> {
146 let is_flat_scratch = 1;
147}
148
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000149multiclass FLAT_Atomic_Pseudo<
150 string opName,
151 RegisterClass vdst_rc,
152 ValueType vt,
153 SDPatternOperator atomic = null_frag,
154 ValueType data_vt = vt,
Matt Arsenaultfd023142017-06-12 15:55:58 +0000155 RegisterClass data_rc = vdst_rc,
156 bit HasSignedOffset = 0> {
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000157
158 def "" : FLAT_Pseudo <opName,
159 (outs),
Matt Arsenaultfd023142017-06-12 15:55:58 +0000160 !if(HasSignedOffset,
161 (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
162 (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
163 " $vaddr, $vdata$offset$slc",
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000164 []>,
165 AtomicNoRet <NAME, 0> {
166 let mayLoad = 1;
167 let mayStore = 1;
168 let has_glc = 0;
169 let glcValue = 0;
170 let has_vdst = 0;
171 let PseudoInstr = NAME;
172 }
173
174 def _RTN : FLAT_Pseudo <opName,
175 (outs vdst_rc:$vdst),
Matt Arsenaultfd023142017-06-12 15:55:58 +0000176 !if(HasSignedOffset,
177 (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
178 (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc)),
179 " $vdst, $vaddr, $vdata$offset glc$slc",
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000180 [(set vt:$vdst,
Matt Arsenaultfd023142017-06-12 15:55:58 +0000181 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000182 AtomicNoRet <NAME, 1> {
183 let mayLoad = 1;
184 let mayStore = 1;
185 let hasPostISelHook = 1;
186 let has_glc = 0;
187 let glcValue = 1;
188 let PseudoInstr = NAME # "_RTN";
189 }
190}
191
192class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
193 (ops node:$ptr, node:$value),
194 (atomic_op node:$ptr, node:$value),
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000195 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000196>;
197
198def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
199def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
200def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
201def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
202def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
203def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
204def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
205def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
206def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
207def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
208def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
209def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>;
210def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>;
211
212
213
214//===----------------------------------------------------------------------===//
215// Flat Instructions
216//===----------------------------------------------------------------------===//
217
218def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
219def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
220def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
221def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
222def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
223def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
224def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
225def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
226
227def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
228def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
229def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
230def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
231def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
232def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
233
234defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
235 VGPR_32, i32, atomic_cmp_swap_flat,
236 v2i32, VReg_64>;
237
238defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
239 VReg_64, i64, atomic_cmp_swap_flat,
240 v2i64, VReg_128>;
241
242defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
243 VGPR_32, i32, atomic_swap_flat>;
244
245defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
246 VReg_64, i64, atomic_swap_flat>;
247
248defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
249 VGPR_32, i32, atomic_add_flat>;
250
251defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
252 VGPR_32, i32, atomic_sub_flat>;
253
254defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
255 VGPR_32, i32, atomic_min_flat>;
256
257defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
258 VGPR_32, i32, atomic_umin_flat>;
259
260defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
261 VGPR_32, i32, atomic_max_flat>;
262
263defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
264 VGPR_32, i32, atomic_umax_flat>;
265
266defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
267 VGPR_32, i32, atomic_and_flat>;
268
269defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
270 VGPR_32, i32, atomic_or_flat>;
271
272defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
273 VGPR_32, i32, atomic_xor_flat>;
274
275defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
276 VGPR_32, i32, atomic_inc_flat>;
277
278defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
279 VGPR_32, i32, atomic_dec_flat>;
280
281defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
282 VReg_64, i64, atomic_add_flat>;
283
284defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
285 VReg_64, i64, atomic_sub_flat>;
286
287defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
288 VReg_64, i64, atomic_min_flat>;
289
290defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
291 VReg_64, i64, atomic_umin_flat>;
292
293defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
294 VReg_64, i64, atomic_max_flat>;
295
296defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
297 VReg_64, i64, atomic_umax_flat>;
298
299defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
300 VReg_64, i64, atomic_and_flat>;
301
302defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
303 VReg_64, i64, atomic_or_flat>;
304
305defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
306 VReg_64, i64, atomic_xor_flat>;
307
308defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
309 VReg_64, i64, atomic_inc_flat>;
310
311defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
312 VReg_64, i64, atomic_dec_flat>;
313
314let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only?
315
316defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
317 VGPR_32, f32, null_frag, v2f32, VReg_64>;
318
319defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
320 VReg_64, f64, null_frag, v2f64, VReg_128>;
321
322defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
323 VGPR_32, f32>;
324
325defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
326 VGPR_32, f32>;
327
328defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
329 VReg_64, f64>;
330
331defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
332 VReg_64, f64>;
333
334} // End SubtargetPredicate = isCI
335
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000336let SubtargetPredicate = HasFlatGlobalInsts in {
337def GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
338def GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
339def GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
340def GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
341def GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
342def GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
343def GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
344def GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
345
346def GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
347def GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
348def GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
349def GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
350def GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
351def GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
352
353} // End SubtargetPredicate = HasFlatGlobalInsts
354
355
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000356//===----------------------------------------------------------------------===//
357// Flat Patterns
358//===----------------------------------------------------------------------===//
359
360class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
361 (ld node:$ptr), [{
362 auto const AS = cast<MemSDNode>(N)->getAddressSpace();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000363 return AS == AMDGPUASI.FLAT_ADDRESS ||
364 AS == AMDGPUASI.GLOBAL_ADDRESS ||
365 AS == AMDGPUASI.CONSTANT_ADDRESS;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000366}]>;
367
368class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
369 (st node:$val, node:$ptr), [{
370 auto const AS = cast<MemSDNode>(N)->getAddressSpace();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000371 return AS == AMDGPUASI.FLAT_ADDRESS ||
372 AS == AMDGPUASI.GLOBAL_ADDRESS;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000373}]>;
374
375def atomic_flat_load : flat_ld <atomic_load>;
376def flat_load : flat_ld <load>;
377def flat_az_extloadi8 : flat_ld <az_extloadi8>;
378def flat_sextloadi8 : flat_ld <sextloadi8>;
379def flat_az_extloadi16 : flat_ld <az_extloadi16>;
380def flat_sextloadi16 : flat_ld <sextloadi16>;
381
382def atomic_flat_store : flat_st <atomic_store>;
383def flat_store : flat_st <store>;
384def flat_truncstorei8 : flat_st <truncstorei8>;
385def flat_truncstorei16 : flat_st <truncstorei16>;
386
387// Patterns for global loads with no offset.
388class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000389 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
390 (inst $vaddr, $offset, 0, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000391>;
392
393class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000394 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
395 (inst $vaddr, $offset, 1, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000396>;
397
398class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000399 (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)),
400 (inst $vaddr, $data, $offset, 0, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000401>;
402
403class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
404 // atomic store follows atomic binop convention so the address comes
405 // first.
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000406 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
407 (inst $vaddr, $data, $offset, 1, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000408>;
409
410class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
411 ValueType data_vt = vt> : Pat <
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000412 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
413 (inst $vaddr, $data, $offset, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000414>;
415
416let Predicates = [isCIVI] in {
417
418def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
419def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
Tom Stellard115a6152016-11-10 16:02:37 +0000420def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i16>;
421def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i16>;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000422def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
423def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
424def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
425def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
426def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
427
428def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
429def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
430
431def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
432def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
433def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
434def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
435def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
436
437def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
438def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
439
440def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
441def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
442def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
443def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
444def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
445def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
446def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
447def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
448def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
449def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
450def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
Jan Vesely206a5102016-12-23 15:34:51 +0000451def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000452def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
453
454def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
455def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
456def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
457def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
458def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
459def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
460def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
461def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
462def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
463def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
464def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
Jan Vesely206a5102016-12-23 15:34:51 +0000465def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000466def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
467
468} // End Predicates = [isCIVI]
469
Tom Stellard115a6152016-11-10 16:02:37 +0000470let Predicates = [isVI] in {
471 def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i16>;
472 def : FlatStorePat <FLAT_STORE_SHORT, flat_store, i16>;
473}
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000474
475
476//===----------------------------------------------------------------------===//
477// Target
478//===----------------------------------------------------------------------===//
479
480//===----------------------------------------------------------------------===//
481// CI
482//===----------------------------------------------------------------------===//
483
484class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
485 FLAT_Real <op, ps>,
486 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
487 let AssemblerPredicate = isCIOnly;
488 let DecoderNamespace="CI";
489}
490
491def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>;
492def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>;
493def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>;
494def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>;
495def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>;
496def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>;
497def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>;
498def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>;
499
500def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
501def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
502def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
503def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
504def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
505def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
506
507multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
508 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
509 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
510}
511
512defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
513defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
514defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
515defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
516defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
517defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
518defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
519defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
520defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
521defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
522defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
523defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
524defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
525defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
526defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
527defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
528defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
529defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
530defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
531defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
532defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
533defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
534defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
535defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
536defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
537defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
538
539// CI Only flat instructions
540defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
541defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
542defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
543defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
544defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
545defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
546
547
548//===----------------------------------------------------------------------===//
549// VI
550//===----------------------------------------------------------------------===//
551
552class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
553 FLAT_Real <op, ps>,
554 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
555 let AssemblerPredicate = isVI;
556 let DecoderNamespace="VI";
557}
558
559def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
560def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
561def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
562def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
563def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
564def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
565def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
566def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
567
568def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
569def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
570def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
571def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
572def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
573def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
574
575multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
576 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
577 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
578}
579
580defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
581defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
582defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
583defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
584defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
585defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
586defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
587defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
588defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
589defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
590defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
591defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
592defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
593defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
594defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
595defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
596defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
597defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
598defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
599defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
600defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
601defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
602defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
603defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
604defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
605defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
606
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000607def GLOBAL_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, GLOBAL_LOAD_UBYTE>;
608def GLOBAL_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, GLOBAL_LOAD_SBYTE>;
609def GLOBAL_LOAD_USHORT_vi : FLAT_Real_vi <0x12, GLOBAL_LOAD_USHORT>;
610def GLOBAL_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, GLOBAL_LOAD_SSHORT>;
611def GLOBAL_LOAD_DWORD_vi : FLAT_Real_vi <0x14, GLOBAL_LOAD_DWORD>;
612def GLOBAL_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, GLOBAL_LOAD_DWORDX2>;
613def GLOBAL_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, GLOBAL_LOAD_DWORDX4>;
614def GLOBAL_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, GLOBAL_LOAD_DWORDX3>;
615
616def GLOBAL_STORE_BYTE_vi : FLAT_Real_vi <0x18, GLOBAL_STORE_BYTE>;
617def GLOBAL_STORE_SHORT_vi : FLAT_Real_vi <0x1a, GLOBAL_STORE_SHORT>;
618def GLOBAL_STORE_DWORD_vi : FLAT_Real_vi <0x1c, GLOBAL_STORE_DWORD>;
619def GLOBAL_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, GLOBAL_STORE_DWORDX2>;
620def GLOBAL_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, GLOBAL_STORE_DWORDX4>;
621def GLOBAL_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, GLOBAL_STORE_DWORDX3>;