blob: 836e402aa0b8eaff39b1fd43d61fba334faa87d3 [file] [log] [blame]
Matt Arsenault9c47dd52016-02-11 06:02:01 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
Tom Stellardeef2ad92013-08-05 22:45:56 +00003
4; Tests for indirect addressing on SI, which is implemented using dynamic
5; indexing of vectors.
6
Tom Stellard8d19f9b2015-03-20 03:12:42 +00007; CHECK-LABEL: {{^}}extract_w_offset:
Matt Arsenault28419272015-10-07 00:42:51 +00008; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
9; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
10; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
11; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
Tom Stellard326d6ec2014-11-05 14:50:53 +000012; CHECK: s_mov_b32 m0
13; CHECK-NEXT: v_movrels_b32_e32
Tom Stellardeef2ad92013-08-05 22:45:56 +000014define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
15entry:
Matt Arsenault28419272015-10-07 00:42:51 +000016 %idx = add i32 %in, 1
17 %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %idx
18 store float %elt, float addrspace(1)* %out
19 ret void
20}
21
22; XXX: Could do v_or_b32 directly
23; CHECK-LABEL: {{^}}extract_w_offset_salu_use_vector:
24; CHECK-DAG: s_or_b32
25; CHECK-DAG: s_or_b32
26; CHECK-DAG: s_or_b32
27; CHECK-DAG: s_or_b32
28; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
29; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
30; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
31; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
32; CHECK: s_mov_b32 m0
33; CHECK-NEXT: v_movrels_b32_e32
34define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) {
35entry:
36 %idx = add i32 %in, 1
37 %vec = or <4 x i32> %or.val, <i32 1, i32 2, i32 3, i32 4>
38 %elt = extractelement <4 x i32> %vec, i32 %idx
39 store i32 %elt, i32 addrspace(1)* %out
Tom Stellardeef2ad92013-08-05 22:45:56 +000040 ret void
41}
42
Tom Stellard8d19f9b2015-03-20 03:12:42 +000043; CHECK-LABEL: {{^}}extract_wo_offset:
Matt Arsenault28419272015-10-07 00:42:51 +000044; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
45; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
46; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
47; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
Tom Stellard326d6ec2014-11-05 14:50:53 +000048; CHECK: s_mov_b32 m0
49; CHECK-NEXT: v_movrels_b32_e32
Tom Stellardeef2ad92013-08-05 22:45:56 +000050define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
51entry:
Matt Arsenault28419272015-10-07 00:42:51 +000052 %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
53 store float %elt, float addrspace(1)* %out
Tom Stellardeef2ad92013-08-05 22:45:56 +000054 ret void
55}
56
Tom Stellard8b0182a2015-04-23 20:32:01 +000057; CHECK-LABEL: {{^}}extract_neg_offset_sgpr:
58; The offset depends on the register that holds the first element of the vector.
59; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
60; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
61define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
62entry:
63 %index = add i32 %offset, -512
64 %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
65 store i32 %value, i32 addrspace(1)* %out
66 ret void
67}
68
Matt Arsenault28419272015-10-07 00:42:51 +000069; CHECK-LABEL: {{^}}extract_neg_offset_sgpr_loaded:
70; The offset depends on the register that holds the first element of the vector.
71; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
72; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
73define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) {
74entry:
75 %index = add i32 %offset, -512
76 %or = or <4 x i32> %vec0, %vec1
77 %value = extractelement <4 x i32> %or, i32 %index
78 store i32 %value, i32 addrspace(1)* %out
79 ret void
80}
81
Tom Stellard8b0182a2015-04-23 20:32:01 +000082; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
83; The offset depends on the register that holds the first element of the vector.
84; CHECK: v_readfirstlane_b32
85; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
86; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0
87; CHECK: s_cbranch_execnz
88define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
89entry:
Matt Arsenault9c47dd52016-02-11 06:02:01 +000090 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
Tom Stellard8b0182a2015-04-23 20:32:01 +000091 %index = add i32 %id, -512
92 %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
93 store i32 %value, i32 addrspace(1)* %out
94 ret void
95}
96
Tom Stellard8d19f9b2015-03-20 03:12:42 +000097; CHECK-LABEL: {{^}}insert_w_offset:
Tom Stellard326d6ec2014-11-05 14:50:53 +000098; CHECK: s_mov_b32 m0
99; CHECK-NEXT: v_movreld_b32_e32
Tom Stellardeef2ad92013-08-05 22:45:56 +0000100define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
101entry:
102 %0 = add i32 %in, 1
103 %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0
104 %2 = extractelement <4 x float> %1, i32 2
105 store float %2, float addrspace(1)* %out
106 ret void
107}
108
Tom Stellard8d19f9b2015-03-20 03:12:42 +0000109; CHECK-LABEL: {{^}}insert_wo_offset:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000110; CHECK: s_mov_b32 m0
111; CHECK-NEXT: v_movreld_b32_e32
Tom Stellardeef2ad92013-08-05 22:45:56 +0000112define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
113entry:
114 %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
115 %1 = extractelement <4 x float> %0, i32 2
116 store float %1, float addrspace(1)* %out
117 ret void
118}
Tom Stellard8b0182a2015-04-23 20:32:01 +0000119
120; CHECK-LABEL: {{^}}insert_neg_offset_sgpr:
121; The offset depends on the register that holds the first element of the vector.
122; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
123; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
124define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
125entry:
126 %index = add i32 %offset, -512
127 %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
128 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
129 ret void
130}
131
Matt Arsenault28419272015-10-07 00:42:51 +0000132; The vector indexed into is originally loaded into an SGPR rather
133; than built with a reg_sequence
134
135; CHECK-LABEL: {{^}}insert_neg_offset_sgpr_loadreg:
136; The offset depends on the register that holds the first element of the vector.
137; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
138; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
139define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) {
140entry:
141 %index = add i32 %offset, -512
142 %value = insertelement <4 x i32> %vec, i32 5, i32 %index
143 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
144 ret void
145}
146
Tom Stellard8b0182a2015-04-23 20:32:01 +0000147; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
148; The offset depends on the register that holds the first element of the vector.
149; CHECK: v_readfirstlane_b32
150; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}}
151; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
152; CHECK: s_cbranch_execnz
153define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
154entry:
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000155 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
Tom Stellard8b0182a2015-04-23 20:32:01 +0000156 %index = add i32 %id, -512
157 %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
158 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
159 ret void
160}
161
162; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr:
163; The offset depends on the register that holds the first element of the vector.
164; CHECK: v_readfirstlane_b32
165; CHECK: s_add_i32 m0, m0, -{{[0-9]+}}
166; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}}
167; CHECK: s_cbranch_execnz
168define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
169entry:
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000170 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
Tom Stellard8b0182a2015-04-23 20:32:01 +0000171 %index = add i32 %id, -16
172 %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
173 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
174 ret void
175}
176
Matt Arsenault9babdf42016-06-22 20:15:28 +0000177; When the block is split to insert the loop, make sure any other
178; places that need to be expanded in the same block are also handled.
179
180; CHECK-LABEL: {{^}}extract_vgpr_offset_multiple_in_block:
181
182; CHECK: {{buffer|flat}}_load_dword [[IDX0:v[0-9]+]]
183; CHECK-DAG: s_mov_b32 [[S_ELT0:s[0-9]+]], 7
184; CHECK-DAG: s_mov_b32 [[S_ELT1:s[0-9]+]], 9
185; CHECK-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], [[S_ELT0]]
186; CHECK-DAG: v_mov_b32_e32 [[VEC_ELT1:v[0-9]+]], [[S_ELT1]]
187; CHECK: s_waitcnt vmcnt(0)
188
189; CHECK: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec
190
191; CHECK: [[LOOP0:BB[0-9]+_[0-9]+]]:
192; CHECK: v_readfirstlane_b32 vcc_lo, [[IDX0]]
193; CHECK: s_mov_b32 m0, vcc_lo
194; CHECK: v_cmp_eq_u32_e32 vcc, m0, [[IDX0]]
195; CHECK: s_and_saveexec_b64 vcc, vcc
196; CHECK-NEXT: v_movrels_b32_e32 [[MOVREL0:v[0-9]+]], [[VEC_ELT0]]
197; CHECK-NEXT: s_xor_b64 exec, exec, vcc
198; CHECK: s_cbranch_execnz [[LOOP0]]
199
200; FIXME: Redundant copy
201; CHECK: s_mov_b64 exec, [[MASK]]
202; CHECK: s_mov_b64 [[MASK]], exec
203
204; CHECK: [[LOOP1:BB[0-9]+_[0-9]+]]:
205; CHECK: v_readfirstlane_b32 vcc_lo, [[IDX0]]
206; CHECK: s_mov_b32 m0, vcc_lo
207; CHECK: v_cmp_eq_u32_e32 vcc, m0, [[IDX0]]
208; CHECK: s_and_saveexec_b64 vcc, vcc
209; CHECK-NEXT: v_movrels_b32_e32 [[MOVREL1:v[0-9]+]], [[VEC_ELT1]]
210; CHECK-NEXT: s_xor_b64 exec, exec, vcc
211; CHECK: s_cbranch_execnz [[LOOP1]]
212
213; CHECK: buffer_store_dword [[MOVREL0]]
214; CHECK: buffer_store_dword [[MOVREL1]]
215define void @extract_vgpr_offset_multiple_in_block(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
216entry:
217 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
218 %id.ext = zext i32 %id to i64
219 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %id.ext
220 %idx0 = load volatile i32, i32 addrspace(1)* %gep
221 %idx1 = add i32 %idx0, 1
222 %val0 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx0
223 %val1 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx1
224 store volatile i32 %val0, i32 addrspace(1)* %out0
225 store volatile i32 %val1, i32 addrspace(1)* %out0
226 ret void
227}
228
229; CHECK-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:
230; CHECK-DAG: s_load_dwordx4 s{{\[}}[[S_ELT0:[0-9]+]]:[[S_ELT3:[0-9]+]]{{\]}}
231; CHECK-DAG: {{buffer|flat}}_load_dword [[IDX0:v[0-9]+]]
232; CHECK-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], s[[S_ELT0]]
233; CHECK-DAG: v_mov_b32_e32 [[INS0:v[0-9]+]], 62
234; CHECK-DAG: s_waitcnt vmcnt(0)
235
236; CHECK: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec
237
238; CHECK: [[LOOP0:BB[0-9]+_[0-9]+]]:
239; CHECK: v_readfirstlane_b32 vcc_lo, [[IDX0]]
240; CHECK: s_mov_b32 m0, vcc_lo
241; CHECK: v_cmp_eq_u32_e32 vcc, m0, [[IDX0]]
242; CHECK: s_and_saveexec_b64 vcc, vcc
243; CHECK-NEXT: v_movreld_b32_e32 v[[MOVREL0:[0-9]+]], [[INS0]]
244; CHECK-NEXT: s_xor_b64 exec, exec, vcc
245; CHECK: s_cbranch_execnz [[LOOP0]]
246
247; FIXME: Redundant copy
248; CHECK: s_mov_b64 exec, [[MASK]]
249; CHECK: v_mov_b32_e32 [[INS1:v[0-9]+]], 63
250; CHECK: s_mov_b64 [[MASK]], exec
251
252; CHECK: [[LOOP1:BB[0-9]+_[0-9]+]]:
253; CHECK: v_readfirstlane_b32 vcc_lo, [[IDX0]]
254; CHECK: s_mov_b32 m0, vcc_lo
255; CHECK: v_cmp_eq_u32_e32 vcc, m0, [[IDX0]]
256; CHECK: s_and_saveexec_b64 vcc, vcc
257; CHECK-NEXT: v_movreld_b32_e32 v[[MOVREL1:[0-9]+]], [[INS1]]
258; CHECK-NEXT: s_xor_b64 exec, exec, vcc
259; CHECK: s_cbranch_execnz [[LOOP1]]
260
261; CHECK: buffer_store_dwordx4 v{{\[}}[[MOVREL0]]:
262define void @insert_vgpr_offset_multiple_in_block(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, i32 addrspace(1)* %in, <4 x i32> %vec0) #0 {
263entry:
264 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
265 %id.ext = zext i32 %id to i64
266 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %id.ext
267 %idx0 = load volatile i32, i32 addrspace(1)* %gep
268 %idx1 = add i32 %idx0, 1
269 %vec1 = insertelement <4 x i32> %vec0, i32 62, i32 %idx0
270 %vec2 = insertelement <4 x i32> %vec1, i32 63, i32 %idx1
271 store volatile <4 x i32> %vec2, <4 x i32> addrspace(1)* %out0
272 ret void
273}
274
275; CHECK-LABEL: {{^}}extract_adjacent_blocks:
276; CHECK: s_load_dword [[ARG:s[0-9]+]]
277; CHECK: s_cmp_lg_i32
278; CHECK: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]]
279
280; CHECK: buffer_load_dwordx4
281; CHECK: s_mov_b32 m0,
282; CHECK: v_movrels_b32_e32
283; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
284
285; CHECK: [[BB4]]:
286; CHECK: buffer_load_dwordx4
287; CHECK: s_mov_b32 m0,
288; CHECK: v_movrels_b32_e32
289
290; CHECK: [[ENDBB]]:
291; CHECK: buffer_store_dword
292; CHECK: s_endpgm
293define void @extract_adjacent_blocks(i32 %arg) #0 {
294bb:
295 %tmp = icmp eq i32 %arg, 0
296 br i1 %tmp, label %bb1, label %bb4
297
298bb1:
299 %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
300 %tmp3 = extractelement <4 x float> %tmp2, i32 undef
301 br label %bb7
302
303bb4:
304 %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
305 %tmp6 = extractelement <4 x float> %tmp5, i32 undef
306 br label %bb7
307
308bb7:
309 %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ]
310 store volatile float %tmp8, float addrspace(1)* undef
311 ret void
312}
313
314; CHECK-LABEL: {{^}}insert_adjacent_blocks:
315; CHECK: s_load_dword [[ARG:s[0-9]+]]
316; CHECK: s_cmp_lg_i32
317; CHECK: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]]
318
319; CHECK: buffer_load_dwordx4
320; CHECK: s_mov_b32 m0,
321; CHECK: v_movreld_b32_e32
322; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
323
324; CHECK: [[BB4]]:
325; CHECK: buffer_load_dwordx4
326; CHECK: s_mov_b32 m0,
327; CHECK: v_movreld_b32_e32
328
329; CHECK: [[ENDBB]]:
330; CHECK: buffer_store_dword
331; CHECK: s_endpgm
332define void @insert_adjacent_blocks(i32 %arg, float %val0) #0 {
333bb:
334 %tmp = icmp eq i32 %arg, 0
335 br i1 %tmp, label %bb1, label %bb4
336
337bb1: ; preds = %bb
338 %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
339 %tmp3 = insertelement <4 x float> %tmp2, float %val0, i32 undef
340 br label %bb7
341
342bb4: ; preds = %bb
343 %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
344 %tmp6 = insertelement <4 x float> %tmp5, float %val0, i32 undef
345 br label %bb7
346
347bb7: ; preds = %bb4, %bb1
348 %tmp8 = phi <4 x float> [ %tmp3, %bb1 ], [ %tmp6, %bb4 ]
349 store volatile <4 x float> %tmp8, <4 x float> addrspace(1)* undef
350 ret void
351}
352
353; FIXME: Should be able to fold zero input to movreld to inline imm?
354
355; CHECK-LABEL: {{^}}multi_same_block:
356; CHECK: s_load_dword [[ARG:s[0-9]+]]
357; CHECK-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
358; CHECK-DAG: s_add_i32 m0, [[ARG]], -16
359; CHECK: v_movreld_b32_e32 v{{[0-9]+}}, [[ZERO]]
360
361; CHECK: s_add_i32 m0, [[ARG]], -14
362; CHECK: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
363
364; CHECK: s_mov_b32 m0, -1
365; CHECK: ds_write_b32
366; CHECK: ds_write_b32
367; CHECK: s_endpgm
368define void @multi_same_block(i32 %arg) #0 {
369bb:
370 %tmp1 = add i32 %arg, -16
371 %tmp2 = insertelement <6 x float> <float 1.700000e+01, float 1.800000e+01, float 1.900000e+01, float 2.000000e+01, float 2.100000e+01, float 2.200000e+01>, float 0.000000e+00, i32 %tmp1
372 %tmp3 = add i32 %arg, -16
373 %tmp4 = insertelement <6 x float> <float 0x40311999A0000000, float 0x40321999A0000000, float 0x40331999A0000000, float 0x40341999A0000000, float 0x40351999A0000000, float 0x40361999A0000000>, float 0x3FB99999A0000000, i32 %tmp3
374 %tmp5 = bitcast <6 x float> %tmp2 to <6 x i32>
375 %tmp6 = extractelement <6 x i32> %tmp5, i32 1
376 %tmp7 = bitcast <6 x float> %tmp4 to <6 x i32>
377 %tmp8 = extractelement <6 x i32> %tmp7, i32 5
378 store volatile i32 %tmp6, i32 addrspace(3)* undef, align 4
379 store volatile i32 %tmp8, i32 addrspace(3)* undef, align 4
380 ret void
381}
382
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000383declare i32 @llvm.amdgcn.workitem.id.x() #1
384
Matt Arsenault9babdf42016-06-22 20:15:28 +0000385attributes #0 = { nounwind }
Tom Stellard8b0182a2015-04-23 20:32:01 +0000386attributes #1 = { nounwind readnone }