blob: dd7c9d3582089ff349e83780ce2e6c547d5e56e1 [file] [log] [blame]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=MOVREL %s
3; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-vgpr-index-mode -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=IDXMODE %s
Tom Stellardeef2ad92013-08-05 22:45:56 +00004
5; Tests for indirect addressing on SI, which is implemented using dynamic
6; indexing of vectors.
7
Matt Arsenault93401f42016-10-07 03:55:04 +00008; GCN-LABEL: {{^}}extract_w_offset:
9; GCN-DAG: s_load_dword [[IN:s[0-9]+]]
10; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
11; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
12; GCN-DAG: v_mov_b32_e32 [[BASEREG:v[0-9]+]], 2.0
13; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
Matt Arsenaultd486d3f2016-10-12 18:49:05 +000014
15; MOVREL-DAG: s_mov_b32 m0, [[IN]]
16; MOVREL: v_movrels_b32_e32 v{{[0-9]+}}, [[BASEREG]]
17
18; IDXMODE: s_set_gpr_idx_on [[IN]], src0{{$}}
19; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, [[BASEREG]]
20; IDXMODE-NEXT: s_set_gpr_idx_off
Tom Stellardeef2ad92013-08-05 22:45:56 +000021define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
22entry:
Matt Arsenault28419272015-10-07 00:42:51 +000023 %idx = add i32 %in, 1
24 %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %idx
25 store float %elt, float addrspace(1)* %out
26 ret void
27}
28
29; XXX: Could do v_or_b32 directly
Matt Arsenault93401f42016-10-07 03:55:04 +000030; GCN-LABEL: {{^}}extract_w_offset_salu_use_vector:
Matt Arsenaultd486d3f2016-10-12 18:49:05 +000031; MOVREL: s_mov_b32 m0
Matt Arsenault93401f42016-10-07 03:55:04 +000032; GCN-DAG: s_or_b32
33; GCN-DAG: s_or_b32
34; GCN-DAG: s_or_b32
35; GCN-DAG: s_or_b32
36; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
37; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
38; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
39; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
Matt Arsenaultd486d3f2016-10-12 18:49:05 +000040
41; MOVREL: v_movrels_b32_e32
42
43; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, src0{{$}}
44; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
45; IDXMODE-NEXT: s_set_gpr_idx_off
Matt Arsenault28419272015-10-07 00:42:51 +000046define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) {
47entry:
48 %idx = add i32 %in, 1
49 %vec = or <4 x i32> %or.val, <i32 1, i32 2, i32 3, i32 4>
50 %elt = extractelement <4 x i32> %vec, i32 %idx
51 store i32 %elt, i32 addrspace(1)* %out
Tom Stellardeef2ad92013-08-05 22:45:56 +000052 ret void
53}
54
Matt Arsenault93401f42016-10-07 03:55:04 +000055; GCN-LABEL: {{^}}extract_wo_offset:
56; GCN-DAG: s_load_dword [[IN:s[0-9]+]]
57; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
58; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
59; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
60; GCN-DAG: v_mov_b32_e32 [[BASEREG:v[0-9]+]], 1.0
Matt Arsenaultd486d3f2016-10-12 18:49:05 +000061
62; MOVREL-DAG: s_mov_b32 m0, [[IN]]
63; MOVREL: v_movrels_b32_e32 v{{[0-9]+}}, [[BASEREG]]
64
65; IDXMODE: s_set_gpr_idx_on [[IN]], src0{{$}}
66; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, [[BASEREG]]
67; IDXMODE-NEXT: s_set_gpr_idx_off
Tom Stellardeef2ad92013-08-05 22:45:56 +000068define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
69entry:
Matt Arsenault28419272015-10-07 00:42:51 +000070 %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
71 store float %elt, float addrspace(1)* %out
Tom Stellardeef2ad92013-08-05 22:45:56 +000072 ret void
73}
74
Matt Arsenault93401f42016-10-07 03:55:04 +000075; GCN-LABEL: {{^}}extract_neg_offset_sgpr:
Tom Stellard8b0182a2015-04-23 20:32:01 +000076; The offset depends on the register that holds the first element of the vector.
Matt Arsenaultd486d3f2016-10-12 18:49:05 +000077; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
78; MOVREL: v_movrels_b32_e32 v{{[0-9]}}, v0
79
80; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}}
81; IDXMODE-NEXT: s_set_gpr_idx_on [[ADD_IDX]], src0{{$}}
82; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
83; IDXMODE-NEXT: s_set_gpr_idx_off
Tom Stellard8b0182a2015-04-23 20:32:01 +000084define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) {
85entry:
86 %index = add i32 %offset, -512
87 %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
88 store i32 %value, i32 addrspace(1)* %out
89 ret void
90}
91
Matt Arsenault93401f42016-10-07 03:55:04 +000092; GCN-LABEL: {{^}}extract_neg_offset_sgpr_loaded:
Matt Arsenault28419272015-10-07 00:42:51 +000093; The offset depends on the register that holds the first element of the vector.
Matt Arsenaultd486d3f2016-10-12 18:49:05 +000094; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
95; MOVREL: v_movrels_b32_e32 v{{[0-9]}}, v0
96
97; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}}
98; IDXMODE-NEXT: s_set_gpr_idx_on [[ADD_IDX]], src0{{$}}
99; IDXMODE-NEXT: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
100; IDXMODE-NEXT: s_set_gpr_idx_off
Matt Arsenault28419272015-10-07 00:42:51 +0000101define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) {
102entry:
103 %index = add i32 %offset, -512
104 %or = or <4 x i32> %vec0, %vec1
105 %value = extractelement <4 x i32> %or, i32 %index
106 store i32 %value, i32 addrspace(1)* %out
107 ret void
108}
109
Matt Arsenault93401f42016-10-07 03:55:04 +0000110; GCN-LABEL: {{^}}extract_neg_offset_vgpr:
Tom Stellard8b0182a2015-04-23 20:32:01 +0000111; The offset depends on the register that holds the first element of the vector.
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000112
113; FIXME: The waitcnt for the argument load can go after the loop
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000114; IDXMODE: s_set_gpr_idx_on 0, src0
Matt Arsenault93401f42016-10-07 03:55:04 +0000115; GCN: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, exec
116; GCN: s_waitcnt lgkmcnt(0)
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000117
Matt Arsenault93401f42016-10-07 03:55:04 +0000118; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v{{[0-9]+}}
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000119
120; MOVREL: s_add_i32 m0, [[READLANE]], 0xfffffe0
121; MOVREL: s_and_saveexec_b64 vcc, vcc
122; MOVREL: v_movrels_b32_e32 [[RESULT:v[0-9]+]], v1
123
124; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00
125; IDXMODE: s_set_gpr_idx_idx [[ADD_IDX]]
126; IDXMODE: s_and_saveexec_b64 vcc, vcc
127; IDXMODE: v_mov_b32_e32 [[RESULT:v[0-9]+]], v1
128
Matt Arsenault93401f42016-10-07 03:55:04 +0000129; GCN: s_cbranch_execnz
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000130
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000131; IDXMODE: s_set_gpr_idx_off
Matt Arsenault93401f42016-10-07 03:55:04 +0000132; GCN: buffer_store_dword [[RESULT]]
Tom Stellard8b0182a2015-04-23 20:32:01 +0000133define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
134entry:
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000135 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
Tom Stellard8b0182a2015-04-23 20:32:01 +0000136 %index = add i32 %id, -512
137 %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
138 store i32 %value, i32 addrspace(1)* %out
139 ret void
140}
141
Matt Arsenault93401f42016-10-07 03:55:04 +0000142; GCN-LABEL: {{^}}extract_undef_offset_sgpr:
Matt Arsenault21a46252016-06-27 19:57:44 +0000143define void @extract_undef_offset_sgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
144entry:
145 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
146 %value = extractelement <4 x i32> %ld, i32 undef
147 store i32 %value, i32 addrspace(1)* %out
148 ret void
149}
150
Matt Arsenault93401f42016-10-07 03:55:04 +0000151; GCN-LABEL: {{^}}insert_undef_offset_sgpr_vector_src:
152; GCN-DAG: buffer_load_dwordx4
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000153; MOVREL-DAG: s_mov_b32 m0,
154; MOVREL: v_movreld_b32
Matt Arsenault21a46252016-06-27 19:57:44 +0000155define void @insert_undef_offset_sgpr_vector_src(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
156entry:
157 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
158 %value = insertelement <4 x i32> %ld, i32 5, i32 undef
159 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
160 ret void
161}
162
Matt Arsenault93401f42016-10-07 03:55:04 +0000163; GCN-LABEL: {{^}}insert_w_offset:
164; GCN-DAG: s_load_dword [[IN:s[0-9]+]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000165; MOVREL-DAG: s_mov_b32 m0, [[IN]]
Matt Arsenault93401f42016-10-07 03:55:04 +0000166; GCN-DAG: v_mov_b32_e32 v[[ELT0:[0-9]+]], 1.0
167; GCN-DAG: v_mov_b32_e32 v[[ELT1:[0-9]+]], 2.0
168; GCN-DAG: v_mov_b32_e32 v[[ELT2:[0-9]+]], 0x40400000
169; GCN-DAG: v_mov_b32_e32 v[[ELT3:[0-9]+]], 4.0
170; GCN-DAG: v_mov_b32_e32 v[[INS:[0-9]+]], 0x40a00000
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000171
172; MOVREL: v_movreld_b32_e32 v[[ELT1]], v[[INS]]
173; MOVREL: buffer_store_dwordx4 v{{\[}}[[ELT0]]:[[ELT3]]{{\]}}
Matt Arsenaultf403df32016-08-26 06:31:32 +0000174define void @insert_w_offset(<4 x float> addrspace(1)* %out, i32 %in) {
Tom Stellardeef2ad92013-08-05 22:45:56 +0000175entry:
176 %0 = add i32 %in, 1
177 %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0
Matt Arsenaultf403df32016-08-26 06:31:32 +0000178 store <4 x float> %1, <4 x float> addrspace(1)* %out
Tom Stellardeef2ad92013-08-05 22:45:56 +0000179 ret void
180}
181
Matt Arsenault93401f42016-10-07 03:55:04 +0000182; GCN-LABEL: {{^}}insert_wo_offset:
183; GCN: s_load_dword [[IN:s[0-9]+]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000184
185; MOVREL: s_mov_b32 m0, [[IN]]
186; MOVREL: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
187
188; IDXMODE: s_set_gpr_idx_on [[IN]], dst
189; IDXMODE-NEXT: v_mov_b32_e32 v[[ELT0:[0-9]+]], v{{[0-9]+}}
190; IDXMODE-NEXT: s_set_gpr_idx_off
191
Matt Arsenault93401f42016-10-07 03:55:04 +0000192; GCN: buffer_store_dwordx4 v{{\[}}[[ELT0]]:
Matt Arsenaultf403df32016-08-26 06:31:32 +0000193define void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) {
Tom Stellardeef2ad92013-08-05 22:45:56 +0000194entry:
195 %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
Matt Arsenaultf403df32016-08-26 06:31:32 +0000196 store <4 x float> %0, <4 x float> addrspace(1)* %out
Tom Stellardeef2ad92013-08-05 22:45:56 +0000197 ret void
198}
Tom Stellard8b0182a2015-04-23 20:32:01 +0000199
Matt Arsenault93401f42016-10-07 03:55:04 +0000200; GCN-LABEL: {{^}}insert_neg_offset_sgpr:
Tom Stellard8b0182a2015-04-23 20:32:01 +0000201; The offset depends on the register that holds the first element of the vector.
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000202; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
203; MOVREL: v_movreld_b32_e32 v0, 5
204
205; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}}
206; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], dst
207; IDXMODE-NEXT: v_mov_b32_e32 v0, 5
208; IDXMODE-NEXT: s_set_gpr_idx_off
Tom Stellard8b0182a2015-04-23 20:32:01 +0000209define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) {
210entry:
211 %index = add i32 %offset, -512
212 %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
213 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
214 ret void
215}
216
Matt Arsenault28419272015-10-07 00:42:51 +0000217; The vector indexed into is originally loaded into an SGPR rather
218; than built with a reg_sequence
219
Matt Arsenault93401f42016-10-07 03:55:04 +0000220; GCN-LABEL: {{^}}insert_neg_offset_sgpr_loadreg:
Matt Arsenault28419272015-10-07 00:42:51 +0000221; The offset depends on the register that holds the first element of the vector.
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000222; MOVREL: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
223; MOVREL: v_movreld_b32_e32 v0, 5
224
225; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}}
226; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], dst
227; IDXMODE-NEXT: v_mov_b32_e32 v0, 5
228; IDXMODE-NEXT: s_set_gpr_idx_off
Matt Arsenault28419272015-10-07 00:42:51 +0000229define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) {
230entry:
231 %index = add i32 %offset, -512
232 %value = insertelement <4 x i32> %vec, i32 5, i32 %index
233 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
234 ret void
235}
236
Matt Arsenault93401f42016-10-07 03:55:04 +0000237; GCN-LABEL: {{^}}insert_neg_offset_vgpr:
Tom Stellard8b0182a2015-04-23 20:32:01 +0000238; The offset depends on the register that holds the first element of the vector.
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000239
Matt Arsenault93401f42016-10-07 03:55:04 +0000240; GCN-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], 1{{$}}
241; GCN-DAG: v_mov_b32_e32 [[VEC_ELT1:v[0-9]+]], 2{{$}}
242; GCN-DAG: v_mov_b32_e32 [[VEC_ELT2:v[0-9]+]], 3{{$}}
243; GCN-DAG: v_mov_b32_e32 [[VEC_ELT3:v[0-9]+]], 4{{$}}
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000244
Matt Arsenault93401f42016-10-07 03:55:04 +0000245; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
246; GCN: s_waitcnt lgkmcnt(0)
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000247
Matt Arsenault93401f42016-10-07 03:55:04 +0000248; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]:
249; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]]
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000250
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000251; MOVREL: s_add_i32 m0, [[READLANE]], 0xfffffe00
252; MOVREL: s_and_saveexec_b64 vcc, vcc
253; MOVREL: v_movreld_b32_e32 [[VEC_ELT0]], 5
254
255; IDXMODE: s_addk_i32 [[ADD_IDX:s[0-9]+]], 0xfe00{{$}}
256; IDXMODE: s_set_gpr_idx_idx [[ADD_IDX]]
257; IDXMODE: s_and_saveexec_b64 vcc, vcc
258; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, 5
259
260; GCN: s_cbranch_execnz [[LOOPBB]]
Matt Arsenault93401f42016-10-07 03:55:04 +0000261; GCN: s_mov_b64 exec, [[SAVEEXEC]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000262
263; IDXMODE: s_set_gpr_idx_off
264
Matt Arsenault93401f42016-10-07 03:55:04 +0000265; GCN: buffer_store_dword
Tom Stellard8b0182a2015-04-23 20:32:01 +0000266define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
267entry:
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000268 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
Tom Stellard8b0182a2015-04-23 20:32:01 +0000269 %index = add i32 %id, -512
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000270 %value = insertelement <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i32 5, i32 %index
Tom Stellard8b0182a2015-04-23 20:32:01 +0000271 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
272 ret void
273}
274
Matt Arsenault93401f42016-10-07 03:55:04 +0000275; GCN-LABEL: {{^}}insert_neg_inline_offset_vgpr:
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000276
Matt Arsenault93401f42016-10-07 03:55:04 +0000277; GCN-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], 1{{$}}
278; GCN-DAG: v_mov_b32_e32 [[VEC_ELT1:v[0-9]+]], 2{{$}}
279; GCN-DAG: v_mov_b32_e32 [[VEC_ELT2:v[0-9]+]], 3{{$}}
280; GCN-DAG: v_mov_b32_e32 [[VEC_ELT3:v[0-9]+]], 4{{$}}
281; GCN-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x1f4{{$}}
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000282
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000283; IDXMODE: s_set_gpr_idx_on 0, dst
284
Matt Arsenault93401f42016-10-07 03:55:04 +0000285; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
286; GCN: s_waitcnt lgkmcnt(0)
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000287
Tom Stellard8b0182a2015-04-23 20:32:01 +0000288; The offset depends on the register that holds the first element of the vector.
Matt Arsenault93401f42016-10-07 03:55:04 +0000289; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000290
291; MOVREL: s_add_i32 m0, [[READLANE]], -16
292; MOVREL: v_movreld_b32_e32 [[VEC_ELT0]], [[VAL]]
293
294; IDXMODE: s_add_i32 [[ADD_IDX:s[0-9]+]], [[READLANE]], -16
295; IDXMODE: s_set_gpr_idx_idx [[ADD_IDX]]
296; IDXMODE: v_mov_b32_e32 [[VEC_ELT0]], [[VAL]]
297
Matt Arsenault93401f42016-10-07 03:55:04 +0000298; GCN: s_cbranch_execnz
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000299
300; IDXMODE: s_set_gpr_idx_off
Tom Stellard8b0182a2015-04-23 20:32:01 +0000301define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
302entry:
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000303 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
Tom Stellard8b0182a2015-04-23 20:32:01 +0000304 %index = add i32 %id, -16
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000305 %value = insertelement <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i32 500, i32 %index
Tom Stellard8b0182a2015-04-23 20:32:01 +0000306 store <4 x i32> %value, <4 x i32> addrspace(1)* %out
307 ret void
308}
309
Matt Arsenault9babdf42016-06-22 20:15:28 +0000310; When the block is split to insert the loop, make sure any other
311; places that need to be expanded in the same block are also handled.
312
Matt Arsenault93401f42016-10-07 03:55:04 +0000313; GCN-LABEL: {{^}}extract_vgpr_offset_multiple_in_block:
Matt Arsenault9babdf42016-06-22 20:15:28 +0000314
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000315; FIXME: Why is vector copied in between?
316
Matt Arsenault93401f42016-10-07 03:55:04 +0000317; GCN-DAG: {{buffer|flat}}_load_dword [[IDX0:v[0-9]+]]
318; GCN-DAG: s_mov_b32 [[S_ELT1:s[0-9]+]], 9
319; GCN-DAG: s_mov_b32 [[S_ELT0:s[0-9]+]], 7
320; GCN-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], [[S_ELT0]]
321; GCN-DAG: v_mov_b32_e32 [[VEC_ELT1:v[0-9]+]], [[S_ELT1]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000322
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000323; IDXMODE: s_set_gpr_idx_on 0, src0
324
Matt Arsenault93401f42016-10-07 03:55:04 +0000325; GCN: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec
326; GCN: s_waitcnt vmcnt(0)
Matt Arsenault9babdf42016-06-22 20:15:28 +0000327
Matt Arsenault93401f42016-10-07 03:55:04 +0000328; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]:
329; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]]
330; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000331
332; MOVREL: s_mov_b32 m0, [[READLANE]]
333; MOVREL: s_and_saveexec_b64 vcc, vcc
334; MOVREL: v_movrels_b32_e32 [[MOVREL0:v[0-9]+]], [[VEC_ELT0]]
335
336; IDXMODE: s_set_gpr_idx_idx [[READLANE]]
337; IDXMODE: s_and_saveexec_b64 vcc, vcc
338; IDXMODE: v_mov_b32_e32 [[MOVREL0:v[0-9]+]], [[VEC_ELT0]]
339
Matt Arsenault93401f42016-10-07 03:55:04 +0000340; GCN-NEXT: s_xor_b64 exec, exec, vcc
341; GCN-NEXT: s_cbranch_execnz [[LOOP0]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000342
343; FIXME: Redundant copy
Matt Arsenault93401f42016-10-07 03:55:04 +0000344; GCN: s_mov_b64 exec, [[MASK]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000345; IDXMODE: s_set_gpr_idx_off
346
Matt Arsenault93401f42016-10-07 03:55:04 +0000347; GCN: v_mov_b32_e32 [[VEC_ELT1_2:v[0-9]+]], [[S_ELT1]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000348
349; IDXMODE: s_set_gpr_idx_on 0, src0
Matt Arsenault93401f42016-10-07 03:55:04 +0000350; GCN: s_mov_b64 [[MASK2:s\[[0-9]+:[0-9]+\]]], exec
Matt Arsenault9babdf42016-06-22 20:15:28 +0000351
Matt Arsenault93401f42016-10-07 03:55:04 +0000352; GCN: [[LOOP1:BB[0-9]+_[0-9]+]]:
353; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]]
354; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000355
356; MOVREL: s_mov_b32 m0, [[READLANE]]
357; MOVREL: s_and_saveexec_b64 vcc, vcc
358; MOVREL-NEXT: v_movrels_b32_e32 [[MOVREL1:v[0-9]+]], [[VEC_ELT1_2]]
359
360; IDXMODE: s_set_gpr_idx_idx [[READLANE]]
361; IDXMODE: s_and_saveexec_b64 vcc, vcc
362; IDXMODE-NEXT: v_mov_b32_e32 [[MOVREL1:v[0-9]+]], [[VEC_ELT1_2]]
363
Matt Arsenault93401f42016-10-07 03:55:04 +0000364; GCN-NEXT: s_xor_b64 exec, exec, vcc
365; GCN: s_cbranch_execnz [[LOOP1]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000366
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000367; IDXMODE: s_set_gpr_idx_off
368
Matt Arsenault93401f42016-10-07 03:55:04 +0000369; GCN: buffer_store_dword [[MOVREL0]]
370; GCN: buffer_store_dword [[MOVREL1]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000371define void @extract_vgpr_offset_multiple_in_block(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
372entry:
373 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
374 %id.ext = zext i32 %id to i64
375 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %id.ext
376 %idx0 = load volatile i32, i32 addrspace(1)* %gep
377 %idx1 = add i32 %idx0, 1
378 %val0 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx0
Matt Arsenault3cb4dde2016-06-22 23:40:57 +0000379 %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={SGPR4}" ()
Matt Arsenault9babdf42016-06-22 20:15:28 +0000380 %val1 = extractelement <4 x i32> <i32 7, i32 9, i32 11, i32 13>, i32 %idx1
381 store volatile i32 %val0, i32 addrspace(1)* %out0
382 store volatile i32 %val1, i32 addrspace(1)* %out0
Matt Arsenault3cb4dde2016-06-22 23:40:57 +0000383 %cmp = icmp eq i32 %id, 0
384 br i1 %cmp, label %bb1, label %bb2
385
386bb1:
387 store volatile i32 %live.out.reg, i32 addrspace(1)* undef
388 br label %bb2
389
390bb2:
Matt Arsenault9babdf42016-06-22 20:15:28 +0000391 ret void
392}
393
Matt Arsenault93401f42016-10-07 03:55:04 +0000394; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:
395; GCN-DAG: s_load_dwordx4 s{{\[}}[[S_ELT0:[0-9]+]]:[[S_ELT3:[0-9]+]]{{\]}}
396; GCN-DAG: {{buffer|flat}}_load_dword [[IDX0:v[0-9]+]]
397; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62
Matt Arsenault9babdf42016-06-22 20:15:28 +0000398
Matt Arsenault93401f42016-10-07 03:55:04 +0000399; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT3:[0-9]+]], s[[S_ELT3]]
400; GCN: v_mov_b32_e32 v[[VEC_ELT2:[0-9]+]], s{{[0-9]+}}
401; GCN: v_mov_b32_e32 v[[VEC_ELT1:[0-9]+]], s{{[0-9]+}}
402; GCN: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000403
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000404; IDXMODE: s_set_gpr_idx_on 0, dst
405
Matt Arsenault93401f42016-10-07 03:55:04 +0000406; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]:
407; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]]
408; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000409
410; MOVREL: s_mov_b32 m0, [[READLANE]]
411; MOVREL: s_and_saveexec_b64 vcc, vcc
412; MOVREL-NEXT: v_movreld_b32_e32 v[[VEC_ELT0]], [[INS0]]
413
414; IDXMODE: s_set_gpr_idx_idx [[READLANE]]
415; IDXMODE: s_and_saveexec_b64 vcc, vcc
416; IDXMODE-NEXT: v_mov_b32_e32 v[[VEC_ELT0]], [[INS0]]
417
Matt Arsenault93401f42016-10-07 03:55:04 +0000418; GCN-NEXT: s_xor_b64 exec, exec, vcc
419; GCN: s_cbranch_execnz [[LOOP0]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000420
421; FIXME: Redundant copy
Matt Arsenault93401f42016-10-07 03:55:04 +0000422; GCN: s_mov_b64 exec, [[MASK:s\[[0-9]+:[0-9]+\]]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000423; IDXMODE: s_set_gpr_idx_off
424
425; IDXMODE: s_set_gpr_idx_on 0, dst
Matt Arsenault93401f42016-10-07 03:55:04 +0000426; GCN: s_mov_b64 [[MASK]], exec
Matt Arsenault9babdf42016-06-22 20:15:28 +0000427
Matt Arsenault93401f42016-10-07 03:55:04 +0000428; GCN: [[LOOP1:BB[0-9]+_[0-9]+]]:
429; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]]
430; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000431
432; MOVREL: s_mov_b32 m0, [[READLANE]]
433; MOVREL: s_and_saveexec_b64 vcc, vcc
434; MOVREL-NEXT: v_movreld_b32_e32 v[[VEC_ELT1]], 63
435
436; IDXMODE: s_set_gpr_idx_idx [[READLANE]]
437; IDXMODE: s_and_saveexec_b64 vcc, vcc
438; IDXMODE-NEXT: v_mov_b32_e32 v[[VEC_ELT1]], 63
439
Matt Arsenault93401f42016-10-07 03:55:04 +0000440; GCN-NEXT: s_xor_b64 exec, exec, vcc
441; GCN: s_cbranch_execnz [[LOOP1]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000442
Matt Arsenault93401f42016-10-07 03:55:04 +0000443; GCN: buffer_store_dwordx4 v{{\[}}[[VEC_ELT0]]:
Matt Arsenault3cb4dde2016-06-22 23:40:57 +0000444
Matt Arsenault93401f42016-10-07 03:55:04 +0000445; GCN: buffer_store_dword [[INS0]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000446define void @insert_vgpr_offset_multiple_in_block(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, i32 addrspace(1)* %in, <4 x i32> %vec0) #0 {
447entry:
448 %id = call i32 @llvm.amdgcn.workitem.id.x() #1
449 %id.ext = zext i32 %id to i64
450 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 %id.ext
451 %idx0 = load volatile i32, i32 addrspace(1)* %gep
452 %idx1 = add i32 %idx0, 1
Matt Arsenault3cb4dde2016-06-22 23:40:57 +0000453 %live.out.val = call i32 asm sideeffect "v_mov_b32 $0, 62", "=v"()
454 %vec1 = insertelement <4 x i32> %vec0, i32 %live.out.val, i32 %idx0
Matt Arsenault9babdf42016-06-22 20:15:28 +0000455 %vec2 = insertelement <4 x i32> %vec1, i32 63, i32 %idx1
456 store volatile <4 x i32> %vec2, <4 x i32> addrspace(1)* %out0
Matt Arsenault3cb4dde2016-06-22 23:40:57 +0000457 %cmp = icmp eq i32 %id, 0
458 br i1 %cmp, label %bb1, label %bb2
459
460bb1:
461 store volatile i32 %live.out.val, i32 addrspace(1)* undef
462 br label %bb2
463
464bb2:
Matt Arsenault9babdf42016-06-22 20:15:28 +0000465 ret void
466}
467
Matt Arsenault93401f42016-10-07 03:55:04 +0000468; GCN-LABEL: {{^}}extract_adjacent_blocks:
469; GCN: s_load_dword [[ARG:s[0-9]+]]
470; GCN: s_cmp_lg_u32
471; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000472
Matt Arsenault93401f42016-10-07 03:55:04 +0000473; GCN: buffer_load_dwordx4
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000474; MOVREL: s_mov_b32 m0,
475; MOVREL: v_movrels_b32_e32
476
477; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, src0
478; IDXMODE: v_mov_b32_e32
479; IDXMODE: s_set_gpr_idx_off
480
Matt Arsenault93401f42016-10-07 03:55:04 +0000481; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000482
Matt Arsenault93401f42016-10-07 03:55:04 +0000483; GCN: [[BB4]]:
484; GCN: buffer_load_dwordx4
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000485; MOVREL: s_mov_b32 m0,
486; MOVREL: v_movrels_b32_e32
487
488; IDXMODE: s_set_gpr_idx_on
489; IDXMODE: v_mov_b32_e32
490; IDXMODE: s_set_gpr_idx_off
Matt Arsenault9babdf42016-06-22 20:15:28 +0000491
Matt Arsenault93401f42016-10-07 03:55:04 +0000492; GCN: [[ENDBB]]:
493; GCN: buffer_store_dword
494; GCN: s_endpgm
Matt Arsenault9babdf42016-06-22 20:15:28 +0000495define void @extract_adjacent_blocks(i32 %arg) #0 {
496bb:
497 %tmp = icmp eq i32 %arg, 0
498 br i1 %tmp, label %bb1, label %bb4
499
500bb1:
501 %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
502 %tmp3 = extractelement <4 x float> %tmp2, i32 undef
503 br label %bb7
504
505bb4:
506 %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
507 %tmp6 = extractelement <4 x float> %tmp5, i32 undef
508 br label %bb7
509
510bb7:
511 %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ]
512 store volatile float %tmp8, float addrspace(1)* undef
513 ret void
514}
515
Matt Arsenault93401f42016-10-07 03:55:04 +0000516; GCN-LABEL: {{^}}insert_adjacent_blocks:
517; GCN: s_load_dword [[ARG:s[0-9]+]]
518; GCN: s_cmp_lg_u32
519; GCN: s_cbranch_scc0 [[BB4:BB[0-9]+_[0-9]+]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000520
Matt Arsenault93401f42016-10-07 03:55:04 +0000521; GCN: buffer_load_dwordx4
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000522; MOVREL: s_mov_b32 m0,
523; MOVREL: v_movreld_b32_e32
524
525; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, dst
526; IDXMODE: v_mov_b32_e32
527; IDXMODE: s_set_gpr_idx_off
528
Matt Arsenault93401f42016-10-07 03:55:04 +0000529; GCN: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
Matt Arsenault9babdf42016-06-22 20:15:28 +0000530
Matt Arsenault93401f42016-10-07 03:55:04 +0000531; GCN: [[BB4]]:
532; GCN: buffer_load_dwordx4
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000533; MOVREL: s_mov_b32 m0,
534; MOVREL: v_movreld_b32_e32
535
536; IDXMODE: s_set_gpr_idx_on s{{[0-9]+}}, dst
537; IDXMODE: v_mov_b32_e32
538; IDXMODE: s_set_gpr_idx_off
Matt Arsenault9babdf42016-06-22 20:15:28 +0000539
Matt Arsenault93401f42016-10-07 03:55:04 +0000540; GCN: [[ENDBB]]:
541; GCN: buffer_store_dword
542; GCN: s_endpgm
Matt Arsenault9babdf42016-06-22 20:15:28 +0000543define void @insert_adjacent_blocks(i32 %arg, float %val0) #0 {
544bb:
545 %tmp = icmp eq i32 %arg, 0
546 br i1 %tmp, label %bb1, label %bb4
547
548bb1: ; preds = %bb
549 %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
550 %tmp3 = insertelement <4 x float> %tmp2, float %val0, i32 undef
551 br label %bb7
552
553bb4: ; preds = %bb
554 %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
555 %tmp6 = insertelement <4 x float> %tmp5, float %val0, i32 undef
556 br label %bb7
557
558bb7: ; preds = %bb4, %bb1
559 %tmp8 = phi <4 x float> [ %tmp3, %bb1 ], [ %tmp6, %bb4 ]
560 store volatile <4 x float> %tmp8, <4 x float> addrspace(1)* undef
561 ret void
562}
563
564; FIXME: Should be able to fold zero input to movreld to inline imm?
565
Matt Arsenault93401f42016-10-07 03:55:04 +0000566; GCN-LABEL: {{^}}multi_same_block:
Matt Arsenault9babdf42016-06-22 20:15:28 +0000567
Matt Arsenault93401f42016-10-07 03:55:04 +0000568; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT0:[0-9]+]], 0x41880000
569; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41900000
570; GCN-DAG: v_mov_b32_e32 v[[VEC0_ELT2:[0-9]+]], 0x41980000
571; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a00000
572; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a80000
573; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b00000
574; GCN-DAG: s_load_dword [[ARG:s[0-9]+]]
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000575
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000576; MOVREL-DAG: s_add_i32 m0, [[ARG]], -16
577; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT0]], 4.0
Matt Arsenault93401f42016-10-07 03:55:04 +0000578; GCN-NOT: m0
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000579
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000580; IDXMODE-DAG: s_add_i32 [[ARG_ADD:s[0-9]+]], [[ARG]], -16
581; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
582; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT0]], 4.0
583; IDXMODE: s_set_gpr_idx_off
584
Matt Arsenault93401f42016-10-07 03:55:04 +0000585; GCN: v_mov_b32_e32 v[[VEC0_ELT2]], 0x4188cccd
586; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4190cccd
587; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x4198cccd
588; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a0cccd
589; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41a8cccd
590; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x41b0cccd
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000591
592; MOVREL: v_movreld_b32_e32 v[[VEC0_ELT2]], -4.0
593
594; IDXMODE: s_set_gpr_idx_on [[ARG_ADD]], dst
595; IDXMODE: v_mov_b32_e32 v[[VEC0_ELT2]], -4.0
596; IDXMODE: s_set_gpr_idx_off
Matt Arsenault9babdf42016-06-22 20:15:28 +0000597
Matt Arsenault93401f42016-10-07 03:55:04 +0000598; GCN: s_mov_b32 m0, -1
599; GCN: ds_write_b32
600; GCN: ds_write_b32
601; GCN: s_endpgm
Matt Arsenault9babdf42016-06-22 20:15:28 +0000602define void @multi_same_block(i32 %arg) #0 {
603bb:
604 %tmp1 = add i32 %arg, -16
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000605 %tmp2 = insertelement <6 x float> <float 1.700000e+01, float 1.800000e+01, float 1.900000e+01, float 2.000000e+01, float 2.100000e+01, float 2.200000e+01>, float 4.000000e+00, i32 %tmp1
Matt Arsenault9babdf42016-06-22 20:15:28 +0000606 %tmp3 = add i32 %arg, -16
Matt Arsenaultcb540bc2016-07-19 00:35:03 +0000607 %tmp4 = insertelement <6 x float> <float 0x40311999A0000000, float 0x40321999A0000000, float 0x40331999A0000000, float 0x40341999A0000000, float 0x40351999A0000000, float 0x40361999A0000000>, float -4.0, i32 %tmp3
Matt Arsenault9babdf42016-06-22 20:15:28 +0000608 %tmp5 = bitcast <6 x float> %tmp2 to <6 x i32>
609 %tmp6 = extractelement <6 x i32> %tmp5, i32 1
610 %tmp7 = bitcast <6 x float> %tmp4 to <6 x i32>
611 %tmp8 = extractelement <6 x i32> %tmp7, i32 5
612 store volatile i32 %tmp6, i32 addrspace(3)* undef, align 4
613 store volatile i32 %tmp8, i32 addrspace(3)* undef, align 4
614 ret void
615}
616
Matt Arsenaultb4d95032016-06-28 01:09:00 +0000617; offset puts outside of superegister bounaries, so clamp to 1st element.
Matt Arsenault93401f42016-10-07 03:55:04 +0000618; GCN-LABEL: {{^}}extract_largest_inbounds_offset:
619; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]{{\]}}
620; GCN-DAG: s_load_dword [[IDX:s[0-9]+]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000621; MOVREL: s_mov_b32 m0, [[IDX]]
622; MOVREL: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[HI_ELT]]
623
624; IDXMODE: s_set_gpr_idx_on [[IDX]], src0
625; IDXMODE: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], v[[HI_ELT]]
626; IDXMODE: s_set_gpr_idx_off
627
Matt Arsenault93401f42016-10-07 03:55:04 +0000628; GCN: buffer_store_dword [[EXTRACT]]
Matt Arsenaultb4d95032016-06-28 01:09:00 +0000629define void @extract_largest_inbounds_offset(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) {
630entry:
631 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
632 %offset = add i32 %idx, 3
633 %value = extractelement <4 x i32> %ld, i32 %offset
634 store i32 %value, i32 addrspace(1)* %out
635 ret void
636}
637
Matt Arsenault93401f42016-10-07 03:55:04 +0000638; GCN-LABEL: {{^}}extract_out_of_bounds_offset:
639; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[LO_ELT:[0-9]+]]:[[HI_ELT:[0-9]+]]{{\]}}
640; GCN-DAG: s_load_dword [[IDX:s[0-9]+]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000641; MOVREL: s_add_i32 m0, [[IDX]], 4
642; MOVREL: v_movrels_b32_e32 [[EXTRACT:v[0-9]+]], v[[LO_ELT]]
643
644; IDXMODE: s_add_i32 [[ADD_IDX:s[0-9]+]], [[IDX]], 4
645; IDXMODE: s_set_gpr_idx_on [[ADD_IDX]], src0
646; IDXMODE: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], v[[LO_ELT]]
647; IDXMODE: s_set_gpr_idx_off
648
Matt Arsenault93401f42016-10-07 03:55:04 +0000649; GCN: buffer_store_dword [[EXTRACT]]
Matt Arsenaultb4d95032016-06-28 01:09:00 +0000650define void @extract_out_of_bounds_offset(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx) {
651entry:
652 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
653 %offset = add i32 %idx, 4
654 %value = extractelement <4 x i32> %ld, i32 %offset
655 store i32 %value, i32 addrspace(1)* %out
656 ret void
657}
658
Matt Arsenault1322b6f2016-07-09 01:13:56 +0000659; Test that the or is folded into the base address register instead of
660; added to m0
661
Matt Arsenault93401f42016-10-07 03:55:04 +0000662; GCN-LABEL: {{^}}extractelement_v4i32_or_index:
663; GCN: s_load_dword [[IDX_IN:s[0-9]+]]
664; GCN: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]]
665; GCN-NOT: [[IDX_SHL]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000666
667; MOVREL: s_mov_b32 m0, [[IDX_SHL]]
668; MOVREL: v_movrels_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
669
670; IDXMODE: s_set_gpr_idx_on [[IDX_SHL]], src0
671; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
672; IDXMODE: s_set_gpr_idx_off
Matt Arsenault1322b6f2016-07-09 01:13:56 +0000673define void @extractelement_v4i32_or_index(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %idx.in) {
674entry:
675 %ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
676 %idx.shl = shl i32 %idx.in, 2
677 %idx = or i32 %idx.shl, 1
678 %value = extractelement <4 x i32> %ld, i32 %idx
679 store i32 %value, i32 addrspace(1)* %out
680 ret void
681}
682
Matt Arsenault93401f42016-10-07 03:55:04 +0000683; GCN-LABEL: {{^}}insertelement_v4f32_or_index:
684; GCN: s_load_dword [[IDX_IN:s[0-9]+]]
685; GCN: s_lshl_b32 [[IDX_SHL:s[0-9]+]], [[IDX_IN]]
686; GCN-NOT: [[IDX_SHL]]
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000687
688; MOVREL: s_mov_b32 m0, [[IDX_SHL]]
689; MOVREL: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
690
691; IDXMODE: s_set_gpr_idx_on [[IDX_SHL]], dst
692; IDXMODE: v_mov_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
693; IDXMODE: s_set_gpr_idx_off
Matt Arsenault1322b6f2016-07-09 01:13:56 +0000694define void @insertelement_v4f32_or_index(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %idx.in) nounwind {
695 %idx.shl = shl i32 %idx.in, 2
696 %idx = or i32 %idx.shl, 1
697 %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %idx
698 store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
699 ret void
700}
701
Matt Arsenault93401f42016-10-07 03:55:04 +0000702; GCN-LABEL: {{^}}broken_phi_bb:
703; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8
Matt Arsenaultf0ba86a2016-07-21 09:40:57 +0000704
Matt Arsenault93401f42016-10-07 03:55:04 +0000705; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]]
Matt Arsenaultf0ba86a2016-07-21 09:40:57 +0000706
Matt Arsenault93401f42016-10-07 03:55:04 +0000707; GCN: {{^BB[0-9]+_[0-9]+}}:
708; GCN: s_mov_b64 exec,
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000709; IDXMODE: s_set_gpr_idx_off
Matt Arsenaultf0ba86a2016-07-21 09:40:57 +0000710
Matt Arsenault93401f42016-10-07 03:55:04 +0000711; GCN: [[BB2]]:
712; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]]
713; GCN: buffer_load_dword
Matt Arsenaultf0ba86a2016-07-21 09:40:57 +0000714
Matt Arsenault93401f42016-10-07 03:55:04 +0000715; GCN: [[REGLOOP:BB[0-9]+_[0-9]+]]:
Matt Arsenaultd486d3f2016-10-12 18:49:05 +0000716; MOVREL: v_movreld_b32_e32
717
718; IDXMODE: s_set_gpr_idx_idx
719; IDXMODE: v_mov_b32_e32
Matt Arsenault93401f42016-10-07 03:55:04 +0000720; GCN: s_cbranch_execnz [[REGLOOP]]
Matt Arsenaultf0ba86a2016-07-21 09:40:57 +0000721define void @broken_phi_bb(i32 %arg, i32 %arg1) #0 {
722bb:
723 br label %bb2
724
725bb2: ; preds = %bb4, %bb
726 %tmp = phi i32 [ 8, %bb ], [ %tmp7, %bb4 ]
727 %tmp3 = icmp slt i32 %tmp, %arg
728 br i1 %tmp3, label %bb4, label %bb8
729
730bb4: ; preds = %bb2
731 %vgpr = load volatile i32, i32 addrspace(1)* undef
732 %tmp5 = insertelement <8 x i32> undef, i32 undef, i32 %vgpr
733 %tmp6 = insertelement <8 x i32> %tmp5, i32 %arg1, i32 %vgpr
734 %tmp7 = extractelement <8 x i32> %tmp6, i32 0
735 br label %bb2
736
737bb8: ; preds = %bb2
738 ret void
739}
740
Matt Arsenault9c47dd52016-02-11 06:02:01 +0000741declare i32 @llvm.amdgcn.workitem.id.x() #1
742
Matt Arsenault9babdf42016-06-22 20:15:28 +0000743attributes #0 = { nounwind }
Tom Stellard8b0182a2015-04-23 20:32:01 +0000744attributes #1 = { nounwind readnone }