blob: c1cf56e5058eca39dab6af53816c79e141b647db [file] [log] [blame]
Matt Arsenault711b3902015-08-07 20:18:34 +00001; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
Tom Stellard70580f82015-07-20 14:28:41 +00002; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00003; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
Matt Arsenault03d85842016-06-27 20:32:13 +00004; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00006; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenault73e06fa2015-06-04 16:17:42 +00007
Matt Arsenault02d915b2017-03-15 22:35:20 +00008target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
9
Matt Arsenault73e06fa2015-06-04 16:17:42 +000010; OPT-LABEL: @test_sink_global_small_offset_i32(
Tom Stellard70580f82015-07-20 14:28:41 +000011; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
12; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
Matt Arsenault73e06fa2015-06-04 16:17:42 +000013; OPT: br i1
Eli Friedman5fba1e52017-04-06 22:42:18 +000014; OPT-CI: getelementptr i8,
Matt Arsenault73e06fa2015-06-04 16:17:42 +000015
16; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
17; GCN: {{^}}BB0_2:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000018define amdgpu_kernel void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000019entry:
20 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
21 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
Tom Stellardbc4497b2016-02-12 23:45:29 +000022 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
23 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000024 br i1 %tmp0, label %endif, label %if
25
26if:
27 %tmp1 = load i32, i32 addrspace(1)* %in.gep
28 br label %endif
29
30endif:
31 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
32 store i32 %x, i32 addrspace(1)* %out.gep
33 br label %done
34
35done:
36 ret void
37}
38
39; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
40; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
41; OPT: br i1
42
43; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
44; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000045; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000046; GCN: {{^}}BB1_2:
47; GCN: s_or_b64 exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000048define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000049entry:
50 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
51 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
Tom Stellardbc4497b2016-02-12 23:45:29 +000052 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
53 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000054 br i1 %tmp0, label %endif, label %if
55
56if:
57 %tmp1 = load i8, i8 addrspace(1)* %in.gep
58 %tmp2 = sext i8 %tmp1 to i32
59 br label %endif
60
61endif:
62 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
63 store i32 %x, i32 addrspace(1)* %out.gep
64 br label %done
65
66done:
67 ret void
68}
69
70; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
71; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000072; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000073; GCN: {{^}}BB2_2:
74; GCN: s_or_b64 exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000075define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000076entry:
77 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
78 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
Tom Stellardbc4497b2016-02-12 23:45:29 +000079 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
80 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000081 br i1 %tmp0, label %endif, label %if
82
83if:
84 %tmp1 = load i8, i8 addrspace(1)* %in.gep
85 %tmp2 = sext i8 %tmp1 to i32
86 br label %endif
87
88endif:
89 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
90 store i32 %x, i32 addrspace(1)* %out.gep
91 br label %done
92
93done:
94 ret void
95}
96
97; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
98; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000099; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000100; GCN: {{^}}BB3_2:
101; GCN: s_or_b64 exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000102define amdgpu_kernel void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000103entry:
104 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
105 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
Tom Stellardbc4497b2016-02-12 23:45:29 +0000106 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
107 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000108 br i1 %tmp0, label %endif, label %if
109
110if:
111 %tmp1 = load i8, i8 addrspace(1)* %in.gep
112 %tmp2 = sext i8 %tmp1 to i32
113 br label %endif
114
115endif:
116 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
117 store i32 %x, i32 addrspace(1)* %out.gep
118 br label %done
119
120done:
121 ret void
122}
123
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000124; OPT-LABEL: @test_sink_scratch_small_offset_i32(
125; OPT-NOT: getelementptr [512 x i32]
126; OPT: br i1
Eli Friedman5fba1e52017-04-06 22:42:18 +0000127; OPT: getelementptr i8,
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000128
129; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
130; GCN: s_and_saveexec_b64
Matt Arsenault39787bd2016-10-26 15:08:16 +0000131; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
132; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
Matt Arsenault711b3902015-08-07 20:18:34 +0000133; GCN: {{^}}BB4_2:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000134define amdgpu_kernel void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000135entry:
136 %alloca = alloca [512 x i32], align 4
137 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
138 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
139 %add.arg = add i32 %arg, 8
Matt Arsenault707780b2017-02-22 21:05:25 +0000140 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1022
141 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
142 %tmp0 = icmp eq i32 %tid, 0
143 br i1 %tmp0, label %endif, label %if
144
145if:
146 store volatile i32 123, i32* %alloca.gep
147 %tmp1 = load volatile i32, i32* %alloca.gep
148 br label %endif
149
150endif:
151 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
152 store i32 %x, i32 addrspace(1)* %out.gep.0
153 %load = load volatile i32, i32* %alloca.gep
154 store i32 %load, i32 addrspace(1)* %out.gep.1
155 br label %done
156
157done:
158 ret void
159}
160
161; This ends up not fitting due to the reserved 4 bytes at offset 0
162; OPT-LABEL: @test_sink_scratch_small_offset_i32_reserved(
163; OPT-NOT: getelementptr [512 x i32]
164; OPT: br i1
Eli Friedman5fba1e52017-04-06 22:42:18 +0000165; OPT: getelementptr i8,
Matt Arsenault707780b2017-02-22 21:05:25 +0000166
167; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32_reserved:
168; GCN: s_and_saveexec_b64
169; GCN: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 4
170; GCN: buffer_store_dword {{v[0-9]+}}, [[BASE_FI0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
171; GCN: v_mov_b32_e32 [[BASE_FI1:v[0-9]+]], 4
172; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
173; GCN: {{^BB[0-9]+}}_2:
174
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
Matt Arsenault707780b2017-02-22 21:05:25 +0000176entry:
177 %alloca = alloca [512 x i32], align 4
178 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
179 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
180 %add.arg = add i32 %arg, 8
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000181 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
Tom Stellardbc4497b2016-02-12 23:45:29 +0000182 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
183 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000184 br i1 %tmp0, label %endif, label %if
185
186if:
187 store volatile i32 123, i32* %alloca.gep
188 %tmp1 = load volatile i32, i32* %alloca.gep
189 br label %endif
190
191endif:
192 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
193 store i32 %x, i32 addrspace(1)* %out.gep.0
194 %load = load volatile i32, i32* %alloca.gep
195 store i32 %load, i32 addrspace(1)* %out.gep.1
196 br label %done
197
198done:
199 ret void
200}
201
202; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
203; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
204; OPT: br i1
205; OPT-NOT: ptrtoint
206
207; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
208; GCN: s_and_saveexec_b64
209; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
210; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
Matt Arsenault707780b2017-02-22 21:05:25 +0000211; GCN: {{^BB[0-9]+}}_2:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000212define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000213entry:
214 %alloca = alloca [512 x i32], align 4
215 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
216 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
217 %add.arg = add i32 %arg, 8
218 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
Tom Stellardbc4497b2016-02-12 23:45:29 +0000219 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
220 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000221 br i1 %tmp0, label %endif, label %if
222
223if:
224 store volatile i32 123, i32* %alloca.gep
225 %tmp1 = load volatile i32, i32* %alloca.gep
226 br label %endif
227
228endif:
229 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
230 store i32 %x, i32 addrspace(1)* %out.gep.0
231 %load = load volatile i32, i32* %alloca.gep
232 store i32 %load, i32 addrspace(1)* %out.gep.1
233 br label %done
234
235done:
236 ret void
237}
238
239; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
240; GCN: s_and_saveexec_b64
Tom Stellard70580f82015-07-20 14:28:41 +0000241; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
242; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
Matt Arsenault707780b2017-02-22 21:05:25 +0000243; GCN: {{^BB[0-9]+}}_2:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000244define amdgpu_kernel void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000245entry:
246 %offset.ext = zext i32 %offset to i64
247 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
248 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
Tom Stellardbc4497b2016-02-12 23:45:29 +0000249 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
250 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000251 br i1 %tmp0, label %endif, label %if
252
253if:
254 %tmp1 = load i32, i32 addrspace(1)* %in.gep
255 br label %endif
256
257endif:
258 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
259 store i32 %x, i32 addrspace(1)* %out.gep
260 br label %done
261
262done:
263 ret void
264}
265
Matt Arsenault711b3902015-08-07 20:18:34 +0000266; OPT-LABEL: @test_sink_constant_small_offset_i32
267; OPT-NOT: getelementptr i32, i32 addrspace(2)*
268; OPT: br i1
269
270; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
271; GCN: s_and_saveexec_b64
272; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
273; GCN: s_or_b64 exec, exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000274define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000275entry:
276 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
277 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
Tom Stellardbc4497b2016-02-12 23:45:29 +0000278 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
279 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000280 br i1 %tmp0, label %endif, label %if
281
282if:
283 %tmp1 = load i32, i32 addrspace(2)* %in.gep
284 br label %endif
285
286endif:
287 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
288 store i32 %x, i32 addrspace(1)* %out.gep
289 br label %done
290
291done:
292 ret void
293}
294
295; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
296; OPT-NOT: getelementptr i32, i32 addrspace(2)*
297; OPT: br i1
298
299; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
300; GCN: s_and_saveexec_b64
301; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
302; GCN: s_or_b64 exec, exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000303define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000304entry:
305 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
306 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
Tom Stellardbc4497b2016-02-12 23:45:29 +0000307 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
308 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000309 br i1 %tmp0, label %endif, label %if
310
311if:
312 %tmp1 = load i32, i32 addrspace(2)* %in.gep
313 br label %endif
314
315endif:
316 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
317 store i32 %x, i32 addrspace(1)* %out.gep
318 br label %done
319
320done:
321 ret void
322}
323
324; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
325; OPT-SI: getelementptr i32, i32 addrspace(2)*
326; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
327; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
328; OPT: br i1
329
330; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
331; GCN: s_and_saveexec_b64
332; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
333
334; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
335; GCN: s_or_b64 exec, exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000336define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000337entry:
338 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
339 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
Tom Stellardbc4497b2016-02-12 23:45:29 +0000340 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
341 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000342 br i1 %tmp0, label %endif, label %if
343
344if:
345 %tmp1 = load i32, i32 addrspace(2)* %in.gep
346 br label %endif
347
348endif:
349 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
350 store i32 %x, i32 addrspace(1)* %out.gep
351 br label %done
352
353done:
354 ret void
355}
356
357; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
358; OPT-SI: getelementptr i32, i32 addrspace(2)*
359; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
360; OPT: br i1
361
362; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
363; GCN: s_and_saveexec_b64
Tom Stellard9a197672015-09-09 15:43:26 +0000364; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
365; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
Matt Arsenault711b3902015-08-07 20:18:34 +0000366; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
367; GCN: s_or_b64 exec, exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000368define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000369entry:
370 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
371 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
Tom Stellardbc4497b2016-02-12 23:45:29 +0000372 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
373 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000374 br i1 %tmp0, label %endif, label %if
375
376if:
377 %tmp1 = load i32, i32 addrspace(2)* %in.gep
378 br label %endif
379
380endif:
381 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
382 store i32 %x, i32 addrspace(1)* %out.gep
383 br label %done
384
385done:
386 ret void
387}
388
389; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
390; OPT: getelementptr i32, i32 addrspace(2)*
391; OPT: br i1
392
393; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
394; GCN: s_and_saveexec_b64
395; GCN: s_add_u32
396; GCN: s_addc_u32
397; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
398; GCN: s_or_b64 exec, exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000399define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000400entry:
401 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
402 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
Tom Stellardbc4497b2016-02-12 23:45:29 +0000403 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
404 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000405 br i1 %tmp0, label %endif, label %if
406
407if:
408 %tmp1 = load i32, i32 addrspace(2)* %in.gep
409 br label %endif
410
411endif:
412 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
413 store i32 %x, i32 addrspace(1)* %out.gep
414 br label %done
415
416done:
417 ret void
418}
419
420; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
421; GCN: s_and_saveexec_b64
422; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
423; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
424
425; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
426; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
427
428; GCN: s_or_b64 exec, exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000429define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000430entry:
431 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
432 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
Tom Stellardbc4497b2016-02-12 23:45:29 +0000433 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
434 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000435 br i1 %tmp0, label %endif, label %if
436
437if:
438 %tmp1 = load i32, i32 addrspace(2)* %in.gep
439 br label %endif
440
441endif:
442 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
443 store i32 %x, i32 addrspace(1)* %out.gep
444 br label %done
445
446done:
447 ret void
448}
449
450; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
451; OPT-SI: getelementptr i32, i32 addrspace(2)*
452; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
453; OPT-VI: getelementptr i32, i32 addrspace(2)*
454; OPT: br i1
455
456; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
457; GCN: s_and_saveexec_b64
458; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
459; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
460
461; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
462
463; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
464; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
465
466; GCN: s_or_b64 exec, exec
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000467define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000468entry:
469 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
470 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
Tom Stellardbc4497b2016-02-12 23:45:29 +0000471 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
472 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000473 br i1 %tmp0, label %endif, label %if
474
475if:
476 %tmp1 = load i32, i32 addrspace(2)* %in.gep
477 br label %endif
478
479endif:
480 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
481 store i32 %x, i32 addrspace(1)* %out.gep
482 br label %done
483
484done:
485 ret void
486}
Tom Stellardbc4497b2016-02-12 23:45:29 +0000487
Matt Arsenaultc1e6a452016-07-09 08:02:28 +0000488%struct.foo = type { [3 x float], [3 x float] }
489
490; OPT-LABEL: @sink_ds_address(
Eli Friedman5fba1e52017-04-06 22:42:18 +0000491; OPT: getelementptr i8,
Matt Arsenaultc1e6a452016-07-09 08:02:28 +0000492
493; GCN-LABEL: {{^}}sink_ds_address:
494; GCN: s_load_dword [[SREG1:s[0-9]+]],
495; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
496; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000497define amdgpu_kernel void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
Matt Arsenaultc1e6a452016-07-09 08:02:28 +0000498entry:
499 %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
500 %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
501 br label %bb32
502
503bb32:
504 %a = load float, float addrspace(3)* %x, align 4
505 %b = load float, float addrspace(3)* %y, align 4
506 %cmp = fcmp one float %a, %b
507 br i1 %cmp, label %bb34, label %bb33
508
509bb33:
510 unreachable
511
512bb34:
513 unreachable
514}
515
Matt Arsenault3cc1e002016-08-13 01:43:51 +0000516; Address offset is not a multiple of 4. This is a valid mubuf offset,
517; but not smrd.
518
519; OPT-LABEL: @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(
520; OPT: br i1 %tmp0,
521; OPT: if:
Eli Friedman5fba1e52017-04-06 22:42:18 +0000522; OPT: getelementptr i8, {{.*}} 4095
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000523define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
Matt Arsenault3cc1e002016-08-13 01:43:51 +0000524entry:
525 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
526 %in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095
527 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
528 %tmp0 = icmp eq i32 %tid, 0
529 br i1 %tmp0, label %endif, label %if
530
531if:
532 %bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)*
533 %tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1
534 br label %endif
535
536endif:
537 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
538 store i32 %x, i32 addrspace(1)* %out.gep
539 br label %done
540
541done:
542 ret void
543}
544
Matt Arsenault02d915b2017-03-15 22:35:20 +0000545; OPT-LABEL: @test_sink_local_small_offset_atomicrmw_i32(
Eli Friedman5fba1e52017-04-06 22:42:18 +0000546; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)*
547; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28
548; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)*
549; OPT: %tmp1 = atomicrmw add i32 addrspace(3)* %1, i32 2 seq_cst
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000550define amdgpu_kernel void @test_sink_local_small_offset_atomicrmw_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
Matt Arsenault02d915b2017-03-15 22:35:20 +0000551entry:
552 %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
553 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
554 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
555 %tmp0 = icmp eq i32 %tid, 0
556 br i1 %tmp0, label %endif, label %if
557
558if:
559 %tmp1 = atomicrmw add i32 addrspace(3)* %in.gep, i32 2 seq_cst
560 br label %endif
561
562endif:
563 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
564 store i32 %x, i32 addrspace(3)* %out.gep
565 br label %done
566
567done:
568 ret void
569}
570
571; OPT-LABEL: @test_sink_local_small_offset_cmpxchg_i32(
Eli Friedman5fba1e52017-04-06 22:42:18 +0000572; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)*
573; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28
574; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)*
575; OPT: %tmp1.struct = cmpxchg i32 addrspace(3)* %1, i32 undef, i32 2 seq_cst monotonic
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000576define amdgpu_kernel void @test_sink_local_small_offset_cmpxchg_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
Matt Arsenault02d915b2017-03-15 22:35:20 +0000577entry:
578 %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
579 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
580 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
581 %tmp0 = icmp eq i32 %tid, 0
582 br i1 %tmp0, label %endif, label %if
583
584if:
585 %tmp1.struct = cmpxchg i32 addrspace(3)* %in.gep, i32 undef, i32 2 seq_cst monotonic
586 %tmp1 = extractvalue { i32, i1 } %tmp1.struct, 0
587 br label %endif
588
589endif:
590 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
591 store i32 %x, i32 addrspace(3)* %out.gep
592 br label %done
593
594done:
595 ret void
596}
597
598; OPT-LABEL: @test_wrong_operand_local_small_offset_cmpxchg_i32(
599; OPT: %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
600; OPT: br i1
601; OPT: cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000602define amdgpu_kernel void @test_wrong_operand_local_small_offset_cmpxchg_i32(i32 addrspace(3)* addrspace(3)* %out, i32 addrspace(3)* %in) {
Matt Arsenault02d915b2017-03-15 22:35:20 +0000603entry:
604 %out.gep = getelementptr i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* %out, i32 999999
605 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
606 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
607 %tmp0 = icmp eq i32 %tid, 0
608 br i1 %tmp0, label %endif, label %if
609
610if:
611 %tmp1.struct = cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic
612 %tmp1 = extractvalue { i32 addrspace(3)*, i1 } %tmp1.struct, 0
613 br label %endif
614
615endif:
616 %x = phi i32 addrspace(3)* [ %tmp1, %if ], [ null, %entry ]
617 store i32 addrspace(3)* %x, i32 addrspace(3)* addrspace(3)* %out.gep
618 br label %done
619
620done:
621 ret void
622}
623
Matt Arsenault7dc01c92017-03-15 23:15:12 +0000624; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32(
Eli Friedman5fba1e52017-04-06 22:42:18 +0000625; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)*
626; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28
627; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)*
628; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %1, i32 2, i32 0, i32 0, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000629define amdgpu_kernel void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
Matt Arsenault7dc01c92017-03-15 23:15:12 +0000630entry:
631 %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
632 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
633 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
634 %tmp0 = icmp eq i32 %tid, 0
635 br i1 %tmp0, label %endif, label %if
636
637if:
Matt Arsenault79f837c2017-03-30 22:21:40 +0000638 %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2, i32 0, i32 0, i1 false)
Matt Arsenault7dc01c92017-03-15 23:15:12 +0000639 br label %endif
640
641endif:
642 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
643 store i32 %x, i32 addrspace(3)* %out.gep
644 br label %done
645
646done:
647 ret void
648}
649
650; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32(
Eli Friedman5fba1e52017-04-06 22:42:18 +0000651; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)*
652; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28
653; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)*
654; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %1, i32 2, i32 0, i32 0, i1 false)
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000655define amdgpu_kernel void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
Matt Arsenault7dc01c92017-03-15 23:15:12 +0000656entry:
657 %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
658 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
659 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
660 %tmp0 = icmp eq i32 %tid, 0
661 br i1 %tmp0, label %endif, label %if
662
663if:
Matt Arsenault79f837c2017-03-30 22:21:40 +0000664 %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2, i32 0, i32 0, i1 false)
Matt Arsenault7dc01c92017-03-15 23:15:12 +0000665 br label %endif
666
667endif:
668 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
669 store i32 %x, i32 addrspace(3)* %out.gep
670 br label %done
671
672done:
673 ret void
674}
675
Tom Stellardbc4497b2016-02-12 23:45:29 +0000676declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
Matt Arsenault79f837c2017-03-30 22:21:40 +0000677declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
678declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
Tom Stellardbc4497b2016-02-12 23:45:29 +0000679
680attributes #0 = { nounwind readnone }
Matt Arsenault3cc1e002016-08-13 01:43:51 +0000681attributes #1 = { nounwind }
Matt Arsenault7dc01c92017-03-15 23:15:12 +0000682attributes #2 = { nounwind argmemonly }