blob: b14797c78976db73b056de1600e79ae0300da848 [file] [log] [blame]
Matt Arsenault711b3902015-08-07 20:18:34 +00001; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
Tom Stellard70580f82015-07-20 14:28:41 +00002; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00003; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
Matt Arsenault03d85842016-06-27 20:32:13 +00004; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00006; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenault73e06fa2015-06-04 16:17:42 +00007
Matt Arsenault02d915b2017-03-15 22:35:20 +00008target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
9
Matt Arsenault73e06fa2015-06-04 16:17:42 +000010; OPT-LABEL: @test_sink_global_small_offset_i32(
Tom Stellard70580f82015-07-20 14:28:41 +000011; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
12; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
Matt Arsenault73e06fa2015-06-04 16:17:42 +000013; OPT: br i1
Tom Stellard70580f82015-07-20 14:28:41 +000014; OPT-CI: ptrtoint
Matt Arsenault73e06fa2015-06-04 16:17:42 +000015
16; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
17; GCN: {{^}}BB0_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +000018define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000019entry:
20 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
21 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
Tom Stellardbc4497b2016-02-12 23:45:29 +000022 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
23 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000024 br i1 %tmp0, label %endif, label %if
25
26if:
27 %tmp1 = load i32, i32 addrspace(1)* %in.gep
28 br label %endif
29
30endif:
31 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
32 store i32 %x, i32 addrspace(1)* %out.gep
33 br label %done
34
35done:
36 ret void
37}
38
39; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
40; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
41; OPT: br i1
42
43; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
44; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000045; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000046; GCN: {{^}}BB1_2:
47; GCN: s_or_b64 exec
Tom Stellardbc4497b2016-02-12 23:45:29 +000048define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000049entry:
50 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
51 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
Tom Stellardbc4497b2016-02-12 23:45:29 +000052 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
53 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000054 br i1 %tmp0, label %endif, label %if
55
56if:
57 %tmp1 = load i8, i8 addrspace(1)* %in.gep
58 %tmp2 = sext i8 %tmp1 to i32
59 br label %endif
60
61endif:
62 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
63 store i32 %x, i32 addrspace(1)* %out.gep
64 br label %done
65
66done:
67 ret void
68}
69
70; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
71; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000072; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000073; GCN: {{^}}BB2_2:
74; GCN: s_or_b64 exec
Tom Stellardbc4497b2016-02-12 23:45:29 +000075define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000076entry:
77 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
78 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
Tom Stellardbc4497b2016-02-12 23:45:29 +000079 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
80 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000081 br i1 %tmp0, label %endif, label %if
82
83if:
84 %tmp1 = load i8, i8 addrspace(1)* %in.gep
85 %tmp2 = sext i8 %tmp1 to i32
86 br label %endif
87
88endif:
89 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
90 store i32 %x, i32 addrspace(1)* %out.gep
91 br label %done
92
93done:
94 ret void
95}
96
97; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
98; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000099; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000100; GCN: {{^}}BB3_2:
101; GCN: s_or_b64 exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000102define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000103entry:
104 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
105 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
Tom Stellardbc4497b2016-02-12 23:45:29 +0000106 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
107 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000108 br i1 %tmp0, label %endif, label %if
109
110if:
111 %tmp1 = load i8, i8 addrspace(1)* %in.gep
112 %tmp2 = sext i8 %tmp1 to i32
113 br label %endif
114
115endif:
116 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
117 store i32 %x, i32 addrspace(1)* %out.gep
118 br label %done
119
120done:
121 ret void
122}
123
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000124; OPT-LABEL: @test_sink_scratch_small_offset_i32(
125; OPT-NOT: getelementptr [512 x i32]
126; OPT: br i1
127; OPT: ptrtoint
128
129; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
130; GCN: s_and_saveexec_b64
Matt Arsenault39787bd2016-10-26 15:08:16 +0000131; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
132; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
Matt Arsenault711b3902015-08-07 20:18:34 +0000133; GCN: {{^}}BB4_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +0000134define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000135entry:
136 %alloca = alloca [512 x i32], align 4
137 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
138 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
139 %add.arg = add i32 %arg, 8
Matt Arsenault707780b2017-02-22 21:05:25 +0000140 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1022
141 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
142 %tmp0 = icmp eq i32 %tid, 0
143 br i1 %tmp0, label %endif, label %if
144
145if:
146 store volatile i32 123, i32* %alloca.gep
147 %tmp1 = load volatile i32, i32* %alloca.gep
148 br label %endif
149
150endif:
151 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
152 store i32 %x, i32 addrspace(1)* %out.gep.0
153 %load = load volatile i32, i32* %alloca.gep
154 store i32 %load, i32 addrspace(1)* %out.gep.1
155 br label %done
156
157done:
158 ret void
159}
160
161; This ends up not fitting due to the reserved 4 bytes at offset 0
162; OPT-LABEL: @test_sink_scratch_small_offset_i32_reserved(
163; OPT-NOT: getelementptr [512 x i32]
164; OPT: br i1
165; OPT: ptrtoint
166
167; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32_reserved:
168; GCN: s_and_saveexec_b64
169; GCN: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 4
170; GCN: buffer_store_dword {{v[0-9]+}}, [[BASE_FI0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
171; GCN: v_mov_b32_e32 [[BASE_FI1:v[0-9]+]], 4
172; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
173; GCN: {{^BB[0-9]+}}_2:
174
175define void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
176entry:
177 %alloca = alloca [512 x i32], align 4
178 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
179 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
180 %add.arg = add i32 %arg, 8
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000181 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
Tom Stellardbc4497b2016-02-12 23:45:29 +0000182 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
183 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000184 br i1 %tmp0, label %endif, label %if
185
186if:
187 store volatile i32 123, i32* %alloca.gep
188 %tmp1 = load volatile i32, i32* %alloca.gep
189 br label %endif
190
191endif:
192 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
193 store i32 %x, i32 addrspace(1)* %out.gep.0
194 %load = load volatile i32, i32* %alloca.gep
195 store i32 %load, i32 addrspace(1)* %out.gep.1
196 br label %done
197
198done:
199 ret void
200}
201
202; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
203; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
204; OPT: br i1
205; OPT-NOT: ptrtoint
206
207; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
208; GCN: s_and_saveexec_b64
209; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
210; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
Matt Arsenault707780b2017-02-22 21:05:25 +0000211; GCN: {{^BB[0-9]+}}_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +0000212define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000213entry:
214 %alloca = alloca [512 x i32], align 4
215 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
216 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
217 %add.arg = add i32 %arg, 8
218 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
Tom Stellardbc4497b2016-02-12 23:45:29 +0000219 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
220 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000221 br i1 %tmp0, label %endif, label %if
222
223if:
224 store volatile i32 123, i32* %alloca.gep
225 %tmp1 = load volatile i32, i32* %alloca.gep
226 br label %endif
227
228endif:
229 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
230 store i32 %x, i32 addrspace(1)* %out.gep.0
231 %load = load volatile i32, i32* %alloca.gep
232 store i32 %load, i32 addrspace(1)* %out.gep.1
233 br label %done
234
235done:
236 ret void
237}
238
239; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
240; GCN: s_and_saveexec_b64
Tom Stellard70580f82015-07-20 14:28:41 +0000241; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
242; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
Matt Arsenault707780b2017-02-22 21:05:25 +0000243; GCN: {{^BB[0-9]+}}_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +0000244define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000245entry:
246 %offset.ext = zext i32 %offset to i64
247 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
248 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
Tom Stellardbc4497b2016-02-12 23:45:29 +0000249 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
250 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000251 br i1 %tmp0, label %endif, label %if
252
253if:
254 %tmp1 = load i32, i32 addrspace(1)* %in.gep
255 br label %endif
256
257endif:
258 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
259 store i32 %x, i32 addrspace(1)* %out.gep
260 br label %done
261
262done:
263 ret void
264}
265
Matt Arsenault711b3902015-08-07 20:18:34 +0000266; OPT-LABEL: @test_sink_constant_small_offset_i32
267; OPT-NOT: getelementptr i32, i32 addrspace(2)*
268; OPT: br i1
269
270; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
271; GCN: s_and_saveexec_b64
272; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
273; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000274define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000275entry:
276 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
277 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
Tom Stellardbc4497b2016-02-12 23:45:29 +0000278 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
279 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000280 br i1 %tmp0, label %endif, label %if
281
282if:
283 %tmp1 = load i32, i32 addrspace(2)* %in.gep
284 br label %endif
285
286endif:
287 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
288 store i32 %x, i32 addrspace(1)* %out.gep
289 br label %done
290
291done:
292 ret void
293}
294
295; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
296; OPT-NOT: getelementptr i32, i32 addrspace(2)*
297; OPT: br i1
298
299; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
300; GCN: s_and_saveexec_b64
301; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
302; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000303define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000304entry:
305 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
306 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
Tom Stellardbc4497b2016-02-12 23:45:29 +0000307 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
308 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000309 br i1 %tmp0, label %endif, label %if
310
311if:
312 %tmp1 = load i32, i32 addrspace(2)* %in.gep
313 br label %endif
314
315endif:
316 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
317 store i32 %x, i32 addrspace(1)* %out.gep
318 br label %done
319
320done:
321 ret void
322}
323
324; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
325; OPT-SI: getelementptr i32, i32 addrspace(2)*
326; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
327; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
328; OPT: br i1
329
330; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
331; GCN: s_and_saveexec_b64
332; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
333
334; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
335; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000336define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000337entry:
338 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
339 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
Tom Stellardbc4497b2016-02-12 23:45:29 +0000340 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
341 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000342 br i1 %tmp0, label %endif, label %if
343
344if:
345 %tmp1 = load i32, i32 addrspace(2)* %in.gep
346 br label %endif
347
348endif:
349 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
350 store i32 %x, i32 addrspace(1)* %out.gep
351 br label %done
352
353done:
354 ret void
355}
356
357; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
358; OPT-SI: getelementptr i32, i32 addrspace(2)*
359; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
360; OPT: br i1
361
362; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
363; GCN: s_and_saveexec_b64
Tom Stellard9a197672015-09-09 15:43:26 +0000364; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
365; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
Matt Arsenault711b3902015-08-07 20:18:34 +0000366; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
367; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000368define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000369entry:
370 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
371 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
Tom Stellardbc4497b2016-02-12 23:45:29 +0000372 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
373 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000374 br i1 %tmp0, label %endif, label %if
375
376if:
377 %tmp1 = load i32, i32 addrspace(2)* %in.gep
378 br label %endif
379
380endif:
381 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
382 store i32 %x, i32 addrspace(1)* %out.gep
383 br label %done
384
385done:
386 ret void
387}
388
389; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
390; OPT: getelementptr i32, i32 addrspace(2)*
391; OPT: br i1
392
393; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
394; GCN: s_and_saveexec_b64
395; GCN: s_add_u32
396; GCN: s_addc_u32
397; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
398; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000399define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000400entry:
401 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
402 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
Tom Stellardbc4497b2016-02-12 23:45:29 +0000403 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
404 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000405 br i1 %tmp0, label %endif, label %if
406
407if:
408 %tmp1 = load i32, i32 addrspace(2)* %in.gep
409 br label %endif
410
411endif:
412 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
413 store i32 %x, i32 addrspace(1)* %out.gep
414 br label %done
415
416done:
417 ret void
418}
419
420; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
421; GCN: s_and_saveexec_b64
422; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
423; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
424
425; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
426; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
427
428; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000429define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000430entry:
431 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
432 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
Tom Stellardbc4497b2016-02-12 23:45:29 +0000433 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
434 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000435 br i1 %tmp0, label %endif, label %if
436
437if:
438 %tmp1 = load i32, i32 addrspace(2)* %in.gep
439 br label %endif
440
441endif:
442 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
443 store i32 %x, i32 addrspace(1)* %out.gep
444 br label %done
445
446done:
447 ret void
448}
449
450; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
451; OPT-SI: getelementptr i32, i32 addrspace(2)*
452; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
453; OPT-VI: getelementptr i32, i32 addrspace(2)*
454; OPT: br i1
455
456; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
457; GCN: s_and_saveexec_b64
458; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
459; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
460
461; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
462
463; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
464; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
465
466; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000467define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000468entry:
469 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
470 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
Tom Stellardbc4497b2016-02-12 23:45:29 +0000471 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
472 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000473 br i1 %tmp0, label %endif, label %if
474
475if:
476 %tmp1 = load i32, i32 addrspace(2)* %in.gep
477 br label %endif
478
479endif:
480 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
481 store i32 %x, i32 addrspace(1)* %out.gep
482 br label %done
483
484done:
485 ret void
486}
Tom Stellardbc4497b2016-02-12 23:45:29 +0000487
Matt Arsenaultc1e6a452016-07-09 08:02:28 +0000488%struct.foo = type { [3 x float], [3 x float] }
489
490; OPT-LABEL: @sink_ds_address(
Matt Arsenault02d915b2017-03-15 22:35:20 +0000491; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i32
Matt Arsenaultc1e6a452016-07-09 08:02:28 +0000492
493; GCN-LABEL: {{^}}sink_ds_address:
494; GCN: s_load_dword [[SREG1:s[0-9]+]],
495; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
496; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5
497define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
498entry:
499 %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
500 %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
501 br label %bb32
502
503bb32:
504 %a = load float, float addrspace(3)* %x, align 4
505 %b = load float, float addrspace(3)* %y, align 4
506 %cmp = fcmp one float %a, %b
507 br i1 %cmp, label %bb34, label %bb33
508
509bb33:
510 unreachable
511
512bb34:
513 unreachable
514}
515
Matt Arsenault3cc1e002016-08-13 01:43:51 +0000516; Address offset is not a multiple of 4. This is a valid mubuf offset,
517; but not smrd.
518
519; OPT-LABEL: @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(
520; OPT: br i1 %tmp0,
521; OPT: if:
522; OPT: %sunkaddr = ptrtoint i8 addrspace(2)* %in to i64
523; OPT: %sunkaddr1 = add i64 %sunkaddr, 4095
524define void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
525entry:
526 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
527 %in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095
528 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
529 %tmp0 = icmp eq i32 %tid, 0
530 br i1 %tmp0, label %endif, label %if
531
532if:
533 %bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)*
534 %tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1
535 br label %endif
536
537endif:
538 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
539 store i32 %x, i32 addrspace(1)* %out.gep
540 br label %done
541
542done:
543 ret void
544}
545
Matt Arsenault02d915b2017-03-15 22:35:20 +0000546; OPT-LABEL: @test_sink_local_small_offset_atomicrmw_i32(
547; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
548; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
549; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
550; OPT: %tmp1 = atomicrmw add i32 addrspace(3)* %sunkaddr2, i32 2 seq_cst
551define void @test_sink_local_small_offset_atomicrmw_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
552entry:
553 %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
554 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
555 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
556 %tmp0 = icmp eq i32 %tid, 0
557 br i1 %tmp0, label %endif, label %if
558
559if:
560 %tmp1 = atomicrmw add i32 addrspace(3)* %in.gep, i32 2 seq_cst
561 br label %endif
562
563endif:
564 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
565 store i32 %x, i32 addrspace(3)* %out.gep
566 br label %done
567
568done:
569 ret void
570}
571
572; OPT-LABEL: @test_sink_local_small_offset_cmpxchg_i32(
573; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
574; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
575; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
576; OPT: %tmp1.struct = cmpxchg i32 addrspace(3)* %sunkaddr2, i32 undef, i32 2 seq_cst monotonic
577define void @test_sink_local_small_offset_cmpxchg_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
578entry:
579 %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
580 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
581 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
582 %tmp0 = icmp eq i32 %tid, 0
583 br i1 %tmp0, label %endif, label %if
584
585if:
586 %tmp1.struct = cmpxchg i32 addrspace(3)* %in.gep, i32 undef, i32 2 seq_cst monotonic
587 %tmp1 = extractvalue { i32, i1 } %tmp1.struct, 0
588 br label %endif
589
590endif:
591 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
592 store i32 %x, i32 addrspace(3)* %out.gep
593 br label %done
594
595done:
596 ret void
597}
598
599; OPT-LABEL: @test_wrong_operand_local_small_offset_cmpxchg_i32(
600; OPT: %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
601; OPT: br i1
602; OPT: cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic
603define void @test_wrong_operand_local_small_offset_cmpxchg_i32(i32 addrspace(3)* addrspace(3)* %out, i32 addrspace(3)* %in) {
604entry:
605 %out.gep = getelementptr i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* %out, i32 999999
606 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
607 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
608 %tmp0 = icmp eq i32 %tid, 0
609 br i1 %tmp0, label %endif, label %if
610
611if:
612 %tmp1.struct = cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic
613 %tmp1 = extractvalue { i32 addrspace(3)*, i1 } %tmp1.struct, 0
614 br label %endif
615
616endif:
617 %x = phi i32 addrspace(3)* [ %tmp1, %if ], [ null, %entry ]
618 store i32 addrspace(3)* %x, i32 addrspace(3)* addrspace(3)* %out.gep
619 br label %done
620
621done:
622 ret void
623}
624
Matt Arsenault7dc01c92017-03-15 23:15:12 +0000625; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32(
626; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
627; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
628; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
629; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
630define void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
631entry:
632 %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
633 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
634 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
635 %tmp0 = icmp eq i32 %tid, 0
636 br i1 %tmp0, label %endif, label %if
637
638if:
639 %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2)
640 br label %endif
641
642endif:
643 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
644 store i32 %x, i32 addrspace(3)* %out.gep
645 br label %done
646
647done:
648 ret void
649}
650
651; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32(
652; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
653; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
654; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
655; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
656define void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
657entry:
658 %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
659 %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
660 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
661 %tmp0 = icmp eq i32 %tid, 0
662 br i1 %tmp0, label %endif, label %if
663
664if:
665 %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2)
666 br label %endif
667
668endif:
669 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
670 store i32 %x, i32 addrspace(3)* %out.gep
671 br label %done
672
673done:
674 ret void
675}
676
Tom Stellardbc4497b2016-02-12 23:45:29 +0000677declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
Matt Arsenault7dc01c92017-03-15 23:15:12 +0000678declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
679declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
Tom Stellardbc4497b2016-02-12 23:45:29 +0000680
681attributes #0 = { nounwind readnone }
Matt Arsenault3cc1e002016-08-13 01:43:51 +0000682attributes #1 = { nounwind }
Matt Arsenault7dc01c92017-03-15 23:15:12 +0000683attributes #2 = { nounwind argmemonly }