blob: b038cc4fa2c052cc48a0344e08a4aa1e7bcd6718 [file] [log] [blame]
Valery Pykhtin8bc65962016-09-05 11:22:51 +00001//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +000010def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [], -10>;
11def FLATOffset : ComplexPattern<i64, 3, "SelectFlat", [], [], -10>;
Valery Pykhtin8bc65962016-09-05 11:22:51 +000012
13//===----------------------------------------------------------------------===//
14// FLAT classes
15//===----------------------------------------------------------------------===//
16
17class FLAT_Pseudo<string opName, dag outs, dag ins,
18 string asmOps, list<dag> pattern=[]> :
19 InstSI<outs, ins, "", pattern>,
20 SIMCInstr<opName, SIEncodingFamily.NONE> {
21
22 let isPseudo = 1;
23 let isCodeGenOnly = 1;
24
25 let SubtargetPredicate = isCIVI;
26
27 let FLAT = 1;
Valery Pykhtin8bc65962016-09-05 11:22:51 +000028
Valery Pykhtin8bc65962016-09-05 11:22:51 +000029 let UseNamedOperandTable = 1;
30 let hasSideEffects = 0;
31 let SchedRW = [WriteVMEM];
32
33 string Mnemonic = opName;
34 string AsmOperands = asmOps;
35
Matt Arsenault9698f1c2017-06-20 19:54:14 +000036 bits<1> is_flat_global = 0;
37 bits<1> is_flat_scratch = 0;
38
Valery Pykhtin8bc65962016-09-05 11:22:51 +000039 bits<1> has_vdst = 1;
Matt Arsenault04004712017-07-20 05:17:54 +000040
41 // We need to distinguish having saddr and enabling saddr because
42 // saddr is only valid for scratch and global instructions. Pre-gfx9
43 // these bits were reserved, so we also don't necessarily want to
44 // set these bits to the disabled value for the original flat
45 // segment instructions.
46 bits<1> has_saddr = 0;
47 bits<1> enabled_saddr = 0;
48 bits<7> saddr_value = 0;
Matt Arsenaultca7b0a12017-07-21 15:36:16 +000049 bits<1> has_vaddr = 1;
Matt Arsenault04004712017-07-20 05:17:54 +000050
Valery Pykhtin8bc65962016-09-05 11:22:51 +000051 bits<1> has_data = 1;
52 bits<1> has_glc = 1;
53 bits<1> glcValue = 0;
Matt Arsenault9698f1c2017-06-20 19:54:14 +000054
55 // TODO: M0 if it could possibly access LDS (before gfx9? only)?
56 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
Matt Arsenault6ab9ea92017-07-21 18:34:51 +000057
58 // Internally, FLAT instruction are executed as both an LDS and a
59 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
60 // and are not considered done until both have been decremented.
61 let VM_CNT = 1;
62 let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1);
Valery Pykhtin8bc65962016-09-05 11:22:51 +000063}
64
65class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
66 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
67 Enc64 {
68
69 let isPseudo = 0;
70 let isCodeGenOnly = 0;
71
72 // copy relevant pseudo op flags
73 let SubtargetPredicate = ps.SubtargetPredicate;
74 let AsmMatchConverter = ps.AsmMatchConverter;
Matt Arsenaultfd023142017-06-12 15:55:58 +000075 let TSFlags = ps.TSFlags;
76 let UseNamedOperandTable = ps.UseNamedOperandTable;
Valery Pykhtin8bc65962016-09-05 11:22:51 +000077
78 // encoding fields
Matt Arsenault97279a82016-11-29 19:30:44 +000079 bits<8> vaddr;
80 bits<8> vdata;
Matt Arsenault04004712017-07-20 05:17:54 +000081 bits<7> saddr;
Valery Pykhtin8bc65962016-09-05 11:22:51 +000082 bits<8> vdst;
Matt Arsenault04004712017-07-20 05:17:54 +000083
Valery Pykhtin8bc65962016-09-05 11:22:51 +000084 bits<1> slc;
85 bits<1> glc;
Matt Arsenault47ccafe2017-05-11 17:38:33 +000086
Matt Arsenaultfd023142017-06-12 15:55:58 +000087 // Only valid on gfx9
88 bits<1> lds = 0; // XXX - What does this actually do?
Matt Arsenault9698f1c2017-06-20 19:54:14 +000089
90 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
91 bits<2> seg = !if(ps.is_flat_global, 0b10,
92 !if(ps.is_flat_scratch, 0b01, 0));
Matt Arsenaultfd023142017-06-12 15:55:58 +000093
94 // Signed offset. Highest bit ignored for flat and treated as 12-bit
95 // unsigned for flat acceses.
96 bits<13> offset;
97 bits<1> nv = 0; // XXX - What does this actually do?
98
Matt Arsenault47ccafe2017-05-11 17:38:33 +000099 // We don't use tfe right now, and it was removed in gfx9.
100 bits<1> tfe = 0;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000101
Matt Arsenaultfd023142017-06-12 15:55:58 +0000102 // Only valid on GFX9+
103 let Inst{12-0} = offset;
104 let Inst{13} = lds;
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000105 let Inst{15-14} = seg;
Matt Arsenaultfd023142017-06-12 15:55:58 +0000106
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000107 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
108 let Inst{17} = slc;
109 let Inst{24-18} = op;
110 let Inst{31-26} = 0x37; // Encoding.
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000111 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
Matt Arsenault97279a82016-11-29 19:30:44 +0000112 let Inst{47-40} = !if(ps.has_data, vdata, ?);
Matt Arsenault04004712017-07-20 05:17:54 +0000113 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
114
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000115 // 54-48 is reserved.
Matt Arsenaultfd023142017-06-12 15:55:58 +0000116 let Inst{55} = nv; // nv on GFX9+, TFE before.
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000117 let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
118}
119
Matt Arsenault04004712017-07-20 05:17:54 +0000120// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
121// same encoding value as exec_hi, so it isn't possible to use that if
122// saddr is 32-bit (which isn't handled here yet).
Matt Arsenaultfd023142017-06-12 15:55:58 +0000123class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
Matt Arsenault04004712017-07-20 05:17:54 +0000124 bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000125 opName,
126 (outs regClass:$vdst),
Matt Arsenault04004712017-07-20 05:17:54 +0000127 !if(EnableSaddr,
128 !if(HasSignedOffset,
129 (ins VReg_64:$vaddr, SReg_64:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc),
130 (ins VReg_64:$vaddr, SReg_64:$saddr, offset_u12:$offset, GLC:$glc, slc:$slc)),
131 !if(HasSignedOffset,
132 (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc),
133 (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc))),
134 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000135 let has_data = 0;
136 let mayLoad = 1;
Matt Arsenault04004712017-07-20 05:17:54 +0000137 let has_saddr = HasSaddr;
138 let enabled_saddr = EnableSaddr;
139 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
Konstantin Zhuravlyov070d88e2017-07-21 21:05:45 +0000140 let maybeAtomic = 1;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000141}
142
Matt Arsenaultfd023142017-06-12 15:55:58 +0000143class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
Matt Arsenault04004712017-07-20 05:17:54 +0000144 bit HasSignedOffset = 0, bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000145 opName,
146 (outs),
Matt Arsenault04004712017-07-20 05:17:54 +0000147 !if(EnableSaddr,
148 !if(HasSignedOffset,
149 (ins VReg_64:$vaddr, vdataClass:$vdata, SReg_64:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc),
150 (ins VReg_64:$vaddr, vdataClass:$vdata, SReg_64:$saddr, offset_u12:$offset, GLC:$glc, slc:$slc)),
151 !if(HasSignedOffset,
152 (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc),
153 (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc))),
154 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc"> {
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000155 let mayLoad = 0;
156 let mayStore = 1;
157 let has_vdst = 0;
Matt Arsenault04004712017-07-20 05:17:54 +0000158 let has_saddr = HasSaddr;
159 let enabled_saddr = EnableSaddr;
160 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", "");
Konstantin Zhuravlyov070d88e2017-07-21 21:05:45 +0000161 let maybeAtomic = 1;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000162}
163
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000164multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass> {
165 let is_flat_global = 1 in {
166 def "" : FLAT_Load_Pseudo<opName, regClass, 1, 1>;
167 def _SADDR : FLAT_Load_Pseudo<opName, regClass, 1, 1, 1>;
168 }
169}
170
Matt Arsenault04004712017-07-20 05:17:54 +0000171multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
172 let is_flat_global = 1 in {
173 def "" : FLAT_Store_Pseudo<opName, regClass, 1, 1>;
174 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1, 1>;
175 }
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000176}
177
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000178class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
179 bit EnableSaddr = 0>: FLAT_Pseudo<
180 opName,
181 (outs regClass:$vdst),
182 !if(EnableSaddr,
183 (ins SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc),
184 (ins VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc)),
185 " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc"> {
186 let has_data = 0;
187 let mayLoad = 1;
188 let has_saddr = 1;
189 let enabled_saddr = EnableSaddr;
190 let has_vaddr = !if(EnableSaddr, 0, 1);
191 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
Konstantin Zhuravlyov070d88e2017-07-21 21:05:45 +0000192 let maybeAtomic = 1;
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000193}
194
195class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo<
196 opName,
197 (outs),
198 !if(EnableSaddr,
199 (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, offset_s13:$offset, GLC:$glc, slc:$slc),
200 (ins vdataClass:$vdata, VGPR_32:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc)),
201 " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc"> {
202 let mayLoad = 0;
203 let mayStore = 1;
204 let has_vdst = 0;
205 let has_saddr = 1;
206 let enabled_saddr = EnableSaddr;
207 let has_vaddr = !if(EnableSaddr, 0, 1);
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000208 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
Konstantin Zhuravlyov070d88e2017-07-21 21:05:45 +0000209 let maybeAtomic = 1;
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000210}
211
212multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> {
213 let is_flat_scratch = 1 in {
214 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>;
215 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>;
216 }
217}
218
219multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
220 let is_flat_scratch = 1 in {
221 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>;
222 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>;
223 }
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000224}
225
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000226class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
227 string asm, list<dag> pattern = []> :
228 FLAT_Pseudo<opName, outs, ins, asm, pattern> {
229 let mayLoad = 1;
230 let mayStore = 1;
231 let has_glc = 0;
232 let glcValue = 0;
233 let has_vdst = 0;
Konstantin Zhuravlyov070d88e2017-07-21 21:05:45 +0000234 let maybeAtomic = 1;
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000235}
236
237class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
238 string asm, list<dag> pattern = []>
239 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
240 let hasPostISelHook = 1;
241 let has_vdst = 1;
242 let glcValue = 1;
243 let PseudoInstr = NAME # "_RTN";
244}
245
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000246multiclass FLAT_Atomic_Pseudo<
247 string opName,
248 RegisterClass vdst_rc,
249 ValueType vt,
250 SDPatternOperator atomic = null_frag,
251 ValueType data_vt = vt,
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000252 RegisterClass data_rc = vdst_rc> {
253 def "" : FLAT_AtomicNoRet_Pseudo <opName,
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000254 (outs),
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000255 (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc),
256 " $vaddr, $vdata$offset$slc">,
257 AtomicNoRet <opName, 0> {
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000258 let PseudoInstr = NAME;
259 }
260
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000261 def _RTN : FLAT_AtomicRet_Pseudo <opName,
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000262 (outs vdst_rc:$vdst),
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000263 (ins VReg_64:$vaddr, data_rc:$vdata, offset_u12:$offset, slc:$slc),
Matt Arsenaultfd023142017-06-12 15:55:58 +0000264 " $vdst, $vaddr, $vdata$offset glc$slc",
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000265 [(set vt:$vdst,
Matt Arsenaultfd023142017-06-12 15:55:58 +0000266 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000267 AtomicNoRet <opName, 1>;
268}
269
270multiclass FLAT_Global_Atomic_Pseudo<
271 string opName,
272 RegisterClass vdst_rc,
273 ValueType vt,
274 SDPatternOperator atomic = null_frag,
275 ValueType data_vt = vt,
276 RegisterClass data_rc = vdst_rc> {
277
278 def "" : FLAT_AtomicNoRet_Pseudo <opName,
279 (outs),
280 (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
281 " $vaddr, $vdata, off$offset$slc">,
282 AtomicNoRet <opName, 0> {
283 let has_saddr = 1;
284 let PseudoInstr = NAME;
285 }
286
287 def _RTN : FLAT_AtomicRet_Pseudo <opName,
288 (outs vdst_rc:$vdst),
289 (ins VReg_64:$vaddr, data_rc:$vdata, offset_s13:$offset, slc:$slc),
290 " $vdst, $vaddr, $vdata, off$offset glc$slc",
291 [(set vt:$vdst,
292 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>,
293 AtomicNoRet <opName, 1> {
294 let has_saddr = 1;
295 }
296
297 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
298 (outs),
299 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, slc:$slc),
300 " $vaddr, $vdata$saddr$offset$slc">,
301 AtomicNoRet <opName#"_saddr", 0> {
302 let has_saddr = 1;
303 let enabled_saddr = 1;
304 let PseudoInstr = NAME#"_SADDR";
305 }
306
307 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
308 (outs vdst_rc:$vdst),
309 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, offset_s13:$offset, slc:$slc),
310 " $vdst, $vaddr, $vdata$saddr$offset glc$slc">,
311 AtomicNoRet <opName#"_saddr", 1> {
312 let has_saddr = 1;
313 let enabled_saddr = 1;
314 let PseudoInstr = NAME#"_SADDR_RTN";
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000315 }
316}
317
318class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
319 (ops node:$ptr, node:$value),
320 (atomic_op node:$ptr, node:$value),
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000321 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000322>;
323
324def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
325def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
326def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
327def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
328def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
329def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
330def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
331def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
332def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
333def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
334def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
335def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>;
336def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>;
337
338
339
340//===----------------------------------------------------------------------===//
341// Flat Instructions
342//===----------------------------------------------------------------------===//
343
344def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
345def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
346def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
347def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
348def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
349def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
350def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
351def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
352
353def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
354def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
355def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
356def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
357def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
358def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
359
360defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
361 VGPR_32, i32, atomic_cmp_swap_flat,
362 v2i32, VReg_64>;
363
364defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
365 VReg_64, i64, atomic_cmp_swap_flat,
366 v2i64, VReg_128>;
367
368defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap",
369 VGPR_32, i32, atomic_swap_flat>;
370
371defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
372 VReg_64, i64, atomic_swap_flat>;
373
374defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add",
375 VGPR_32, i32, atomic_add_flat>;
376
377defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub",
378 VGPR_32, i32, atomic_sub_flat>;
379
380defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin",
381 VGPR_32, i32, atomic_min_flat>;
382
383defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin",
384 VGPR_32, i32, atomic_umin_flat>;
385
386defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax",
387 VGPR_32, i32, atomic_max_flat>;
388
389defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax",
390 VGPR_32, i32, atomic_umax_flat>;
391
392defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and",
393 VGPR_32, i32, atomic_and_flat>;
394
395defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or",
396 VGPR_32, i32, atomic_or_flat>;
397
398defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor",
399 VGPR_32, i32, atomic_xor_flat>;
400
401defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc",
402 VGPR_32, i32, atomic_inc_flat>;
403
404defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec",
405 VGPR_32, i32, atomic_dec_flat>;
406
407defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
408 VReg_64, i64, atomic_add_flat>;
409
410defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
411 VReg_64, i64, atomic_sub_flat>;
412
413defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
414 VReg_64, i64, atomic_min_flat>;
415
416defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
417 VReg_64, i64, atomic_umin_flat>;
418
419defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
420 VReg_64, i64, atomic_max_flat>;
421
422defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
423 VReg_64, i64, atomic_umax_flat>;
424
425defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
426 VReg_64, i64, atomic_and_flat>;
427
428defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
429 VReg_64, i64, atomic_or_flat>;
430
431defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
432 VReg_64, i64, atomic_xor_flat>;
433
434defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
435 VReg_64, i64, atomic_inc_flat>;
436
437defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
438 VReg_64, i64, atomic_dec_flat>;
439
440let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only?
441
442defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
443 VGPR_32, f32, null_frag, v2f32, VReg_64>;
444
445defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
446 VReg_64, f64, null_frag, v2f64, VReg_128>;
447
448defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
449 VGPR_32, f32>;
450
451defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
452 VGPR_32, f32>;
453
454defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
455 VReg_64, f64>;
456
457defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
458 VReg_64, f64>;
459
460} // End SubtargetPredicate = isCI
461
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000462let SubtargetPredicate = HasFlatGlobalInsts in {
Matt Arsenault04004712017-07-20 05:17:54 +0000463defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
464defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
465defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
466defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
467defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
468defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
469defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
470defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000471
Matt Arsenault04004712017-07-20 05:17:54 +0000472defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
473defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
474defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
475defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
476defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
477defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000478
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000479
480let is_flat_global = 1 in {
481defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
482 VGPR_32, i32, AMDGPUatomic_cmp_swap_global,
483 v2i32, VReg_64>;
484
485defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
486 VReg_64, i64, AMDGPUatomic_cmp_swap_global,
487 v2i64, VReg_128>;
488
489defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
490 VGPR_32, i32, atomic_swap_global>;
491
492defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
493 VReg_64, i64, atomic_swap_global>;
494
495defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
496 VGPR_32, i32, atomic_add_global>;
497
498defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
499 VGPR_32, i32, atomic_sub_global>;
500
501defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
502 VGPR_32, i32, atomic_min_global>;
503
504defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
505 VGPR_32, i32, atomic_umin_global>;
506
507defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
508 VGPR_32, i32, atomic_max_global>;
509
510defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
511 VGPR_32, i32, atomic_umax_global>;
512
513defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
514 VGPR_32, i32, atomic_and_global>;
515
516defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
517 VGPR_32, i32, atomic_or_global>;
518
519defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
520 VGPR_32, i32, atomic_xor_global>;
521
522defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
523 VGPR_32, i32, atomic_inc_global>;
524
525defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
526 VGPR_32, i32, atomic_dec_global>;
527
528defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
529 VReg_64, i64, atomic_add_global>;
530
531defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
532 VReg_64, i64, atomic_sub_global>;
533
534defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
535 VReg_64, i64, atomic_min_global>;
536
537defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
538 VReg_64, i64, atomic_umin_global>;
539
540defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
541 VReg_64, i64, atomic_max_global>;
542
543defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
544 VReg_64, i64, atomic_umax_global>;
545
546defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
547 VReg_64, i64, atomic_and_global>;
548
549defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
550 VReg_64, i64, atomic_or_global>;
551
552defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
553 VReg_64, i64, atomic_xor_global>;
554
555defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
556 VReg_64, i64, atomic_inc_global>;
557
558defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
559 VReg_64, i64, atomic_dec_global>;
560} // End is_flat_global = 1
561
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000562} // End SubtargetPredicate = HasFlatGlobalInsts
563
564
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000565let SubtargetPredicate = HasFlatScratchInsts in {
566defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
567defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
568defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
569defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
570defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
571defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
572defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
573defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
574
575defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
576defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
577defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
578defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
579defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
580defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
581
582} // End SubtargetPredicate = HasFlatScratchInsts
583
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000584//===----------------------------------------------------------------------===//
585// Flat Patterns
586//===----------------------------------------------------------------------===//
587
588class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
589 (ld node:$ptr), [{
590 auto const AS = cast<MemSDNode>(N)->getAddressSpace();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000591 return AS == AMDGPUASI.FLAT_ADDRESS ||
592 AS == AMDGPUASI.GLOBAL_ADDRESS ||
593 AS == AMDGPUASI.CONSTANT_ADDRESS;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000594}]>;
595
596class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
597 (st node:$val, node:$ptr), [{
598 auto const AS = cast<MemSDNode>(N)->getAddressSpace();
Yaxun Liu1a14bfa2017-03-27 14:04:01 +0000599 return AS == AMDGPUASI.FLAT_ADDRESS ||
600 AS == AMDGPUASI.GLOBAL_ADDRESS;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000601}]>;
602
603def atomic_flat_load : flat_ld <atomic_load>;
604def flat_load : flat_ld <load>;
605def flat_az_extloadi8 : flat_ld <az_extloadi8>;
606def flat_sextloadi8 : flat_ld <sextloadi8>;
607def flat_az_extloadi16 : flat_ld <az_extloadi16>;
608def flat_sextloadi16 : flat_ld <sextloadi16>;
609
610def atomic_flat_store : flat_st <atomic_store>;
611def flat_store : flat_st <store>;
612def flat_truncstorei8 : flat_st <truncstorei8>;
613def flat_truncstorei16 : flat_st <truncstorei16>;
614
615// Patterns for global loads with no offset.
616class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000617 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
618 (inst $vaddr, $offset, 0, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000619>;
620
621class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000622 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))),
623 (inst $vaddr, $offset, 1, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000624>;
625
626class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000627 (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)),
628 (inst $vaddr, $data, $offset, 0, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000629>;
630
631class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
632 // atomic store follows atomic binop convention so the address comes
633 // first.
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000634 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data),
635 (inst $vaddr, $data, $offset, 1, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000636>;
637
638class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
639 ValueType data_vt = vt> : Pat <
Matt Arsenaultdb7c6a82017-06-12 16:53:51 +0000640 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)),
641 (inst $vaddr, $data, $offset, $slc)
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000642>;
643
644let Predicates = [isCIVI] in {
645
646def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
647def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
Tom Stellard115a6152016-11-10 16:02:37 +0000648def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i16>;
649def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i16>;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000650def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
651def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
652def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
653def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
654def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
655
656def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
657def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
658
659def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
660def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
661def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
662def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
663def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
664
665def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
666def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
667
668def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
669def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
670def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
671def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
672def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
673def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
674def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
675def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
676def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
677def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
678def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
Jan Vesely206a5102016-12-23 15:34:51 +0000679def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000680def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
681
682def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
683def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
684def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
685def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
686def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
687def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
688def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
689def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
690def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
691def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
692def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
Jan Vesely206a5102016-12-23 15:34:51 +0000693def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000694def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
695
696} // End Predicates = [isCIVI]
697
Tom Stellard115a6152016-11-10 16:02:37 +0000698let Predicates = [isVI] in {
699 def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i16>;
700 def : FlatStorePat <FLAT_STORE_SHORT, flat_store, i16>;
701}
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000702
703
704//===----------------------------------------------------------------------===//
705// Target
706//===----------------------------------------------------------------------===//
707
708//===----------------------------------------------------------------------===//
709// CI
710//===----------------------------------------------------------------------===//
711
712class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
713 FLAT_Real <op, ps>,
714 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
715 let AssemblerPredicate = isCIOnly;
716 let DecoderNamespace="CI";
717}
718
719def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>;
720def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>;
721def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>;
722def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>;
723def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>;
724def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>;
725def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>;
726def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>;
727
728def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
729def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
730def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
731def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
732def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
733def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
734
735multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
736 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
737 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
738}
739
740defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
741defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
742defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
743defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
744defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
745defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
746defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
747defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
748defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
749defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
750defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
751defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
752defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
753defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
754defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
755defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
756defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
757defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
758defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
759defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
760defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
761defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
762defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
763defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
764defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
765defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
766
767// CI Only flat instructions
768defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
769defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
770defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
771defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
772defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
773defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
774
775
776//===----------------------------------------------------------------------===//
777// VI
778//===----------------------------------------------------------------------===//
779
780class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
781 FLAT_Real <op, ps>,
782 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
783 let AssemblerPredicate = isVI;
784 let DecoderNamespace="VI";
785}
786
Matt Arsenault04004712017-07-20 05:17:54 +0000787multiclass FLAT_Real_AllAddr_vi<bits<7> op> {
788 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>;
789 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
790}
791
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000792def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
793def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
794def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
795def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
796def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
797def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
798def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
799def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
800
801def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
802def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
803def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
804def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
805def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
806def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
807
808multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
809 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
810 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
811}
812
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000813multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> :
814 FLAT_Real_AllAddr_vi<op> {
815 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
816 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
817}
818
819
Valery Pykhtin8bc65962016-09-05 11:22:51 +0000820defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
821defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
822defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
823defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
824defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
825defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
826defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
827defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
828defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
829defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
830defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
831defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
832defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
833defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
834defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
835defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
836defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
837defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
838defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
839defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
840defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
841defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
842defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
843defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
844defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
845defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
846
Matt Arsenault04004712017-07-20 05:17:54 +0000847defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
848defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
849defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
850defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
851defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
852defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
Matt Arsenault04004712017-07-20 05:17:54 +0000853defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000854defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
Matt Arsenault9698f1c2017-06-20 19:54:14 +0000855
Matt Arsenault04004712017-07-20 05:17:54 +0000856defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
857defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
858defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
859defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
Matt Arsenault04004712017-07-20 05:17:54 +0000860defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000861defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
862
Matt Arsenaultf65c5ac2017-07-20 17:31:56 +0000863
864defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>;
865defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>;
866defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>;
867defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>;
868defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>;
869defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>;
870defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>;
871defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>;
872defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>;
873defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>;
874defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>;
875defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>;
876defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>;
877defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>;
878defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
879defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>;
880defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>;
881defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>;
882defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>;
883defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>;
884defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>;
885defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>;
886defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>;
887defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>;
888defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>;
889defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>;
Matt Arsenaultca7b0a12017-07-21 15:36:16 +0000890
891defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
892defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
893defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
894defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
895defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
896defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
897defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
898defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
899
900defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
901defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
902defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
903defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
904defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
905defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;