blob: 12c6b8ea9695ed1780fc2c37d8d235162ac1b26e [file] [log] [blame]
Matt Arsenault711b3902015-08-07 20:18:34 +00001; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
Tom Stellard70580f82015-07-20 14:28:41 +00002; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00003; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
Matt Arsenault03d85842016-06-27 20:32:13 +00004; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00006; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenault73e06fa2015-06-04 16:17:42 +00007
Matt Arsenault73e06fa2015-06-04 16:17:42 +00008; OPT-LABEL: @test_sink_global_small_offset_i32(
Tom Stellard70580f82015-07-20 14:28:41 +00009; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
10; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
Matt Arsenault73e06fa2015-06-04 16:17:42 +000011; OPT: br i1
Tom Stellard70580f82015-07-20 14:28:41 +000012; OPT-CI: ptrtoint
Matt Arsenault73e06fa2015-06-04 16:17:42 +000013
14; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
15; GCN: {{^}}BB0_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +000016define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000017entry:
18 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
19 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
Tom Stellardbc4497b2016-02-12 23:45:29 +000020 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
21 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000022 br i1 %tmp0, label %endif, label %if
23
24if:
25 %tmp1 = load i32, i32 addrspace(1)* %in.gep
26 br label %endif
27
28endif:
29 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
30 store i32 %x, i32 addrspace(1)* %out.gep
31 br label %done
32
33done:
34 ret void
35}
36
37; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
38; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
39; OPT: br i1
40
41; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
42; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000043; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000044; GCN: {{^}}BB1_2:
45; GCN: s_or_b64 exec
Tom Stellardbc4497b2016-02-12 23:45:29 +000046define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000047entry:
48 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
49 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
Tom Stellardbc4497b2016-02-12 23:45:29 +000050 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
51 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000052 br i1 %tmp0, label %endif, label %if
53
54if:
55 %tmp1 = load i8, i8 addrspace(1)* %in.gep
56 %tmp2 = sext i8 %tmp1 to i32
57 br label %endif
58
59endif:
60 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
61 store i32 %x, i32 addrspace(1)* %out.gep
62 br label %done
63
64done:
65 ret void
66}
67
68; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
69; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000070; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000071; GCN: {{^}}BB2_2:
72; GCN: s_or_b64 exec
Tom Stellardbc4497b2016-02-12 23:45:29 +000073define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000074entry:
75 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
76 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
Tom Stellardbc4497b2016-02-12 23:45:29 +000077 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
78 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000079 br i1 %tmp0, label %endif, label %if
80
81if:
82 %tmp1 = load i8, i8 addrspace(1)* %in.gep
83 %tmp2 = sext i8 %tmp1 to i32
84 br label %endif
85
86endif:
87 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
88 store i32 %x, i32 addrspace(1)* %out.gep
89 br label %done
90
91done:
92 ret void
93}
94
95; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
96; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000097; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000098; GCN: {{^}}BB3_2:
99; GCN: s_or_b64 exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000100define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000101entry:
102 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
103 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
Tom Stellardbc4497b2016-02-12 23:45:29 +0000104 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
105 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000106 br i1 %tmp0, label %endif, label %if
107
108if:
109 %tmp1 = load i8, i8 addrspace(1)* %in.gep
110 %tmp2 = sext i8 %tmp1 to i32
111 br label %endif
112
113endif:
114 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
115 store i32 %x, i32 addrspace(1)* %out.gep
116 br label %done
117
118done:
119 ret void
120}
121
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000122; OPT-LABEL: @test_sink_scratch_small_offset_i32(
123; OPT-NOT: getelementptr [512 x i32]
124; OPT: br i1
125; OPT: ptrtoint
126
127; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
128; GCN: s_and_saveexec_b64
Matt Arsenault39787bd2016-10-26 15:08:16 +0000129; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
130; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
Matt Arsenault711b3902015-08-07 20:18:34 +0000131; GCN: {{^}}BB4_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +0000132define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000133entry:
134 %alloca = alloca [512 x i32], align 4
135 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
136 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
137 %add.arg = add i32 %arg, 8
Matt Arsenault707780b2017-02-22 21:05:25 +0000138 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1022
139 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
140 %tmp0 = icmp eq i32 %tid, 0
141 br i1 %tmp0, label %endif, label %if
142
143if:
144 store volatile i32 123, i32* %alloca.gep
145 %tmp1 = load volatile i32, i32* %alloca.gep
146 br label %endif
147
148endif:
149 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
150 store i32 %x, i32 addrspace(1)* %out.gep.0
151 %load = load volatile i32, i32* %alloca.gep
152 store i32 %load, i32 addrspace(1)* %out.gep.1
153 br label %done
154
155done:
156 ret void
157}
158
159; This ends up not fitting due to the reserved 4 bytes at offset 0
160; OPT-LABEL: @test_sink_scratch_small_offset_i32_reserved(
161; OPT-NOT: getelementptr [512 x i32]
162; OPT: br i1
163; OPT: ptrtoint
164
165; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32_reserved:
166; GCN: s_and_saveexec_b64
167; GCN: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 4
168; GCN: buffer_store_dword {{v[0-9]+}}, [[BASE_FI0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
169; GCN: v_mov_b32_e32 [[BASE_FI1:v[0-9]+]], 4
170; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
171; GCN: {{^BB[0-9]+}}_2:
172
173define void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
174entry:
175 %alloca = alloca [512 x i32], align 4
176 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
177 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
178 %add.arg = add i32 %arg, 8
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000179 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
Tom Stellardbc4497b2016-02-12 23:45:29 +0000180 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
181 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000182 br i1 %tmp0, label %endif, label %if
183
184if:
185 store volatile i32 123, i32* %alloca.gep
186 %tmp1 = load volatile i32, i32* %alloca.gep
187 br label %endif
188
189endif:
190 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
191 store i32 %x, i32 addrspace(1)* %out.gep.0
192 %load = load volatile i32, i32* %alloca.gep
193 store i32 %load, i32 addrspace(1)* %out.gep.1
194 br label %done
195
196done:
197 ret void
198}
199
200; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
201; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
202; OPT: br i1
203; OPT-NOT: ptrtoint
204
205; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
206; GCN: s_and_saveexec_b64
207; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
208; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
Matt Arsenault707780b2017-02-22 21:05:25 +0000209; GCN: {{^BB[0-9]+}}_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +0000210define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000211entry:
212 %alloca = alloca [512 x i32], align 4
213 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
214 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
215 %add.arg = add i32 %arg, 8
216 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
Tom Stellardbc4497b2016-02-12 23:45:29 +0000217 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
218 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000219 br i1 %tmp0, label %endif, label %if
220
221if:
222 store volatile i32 123, i32* %alloca.gep
223 %tmp1 = load volatile i32, i32* %alloca.gep
224 br label %endif
225
226endif:
227 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
228 store i32 %x, i32 addrspace(1)* %out.gep.0
229 %load = load volatile i32, i32* %alloca.gep
230 store i32 %load, i32 addrspace(1)* %out.gep.1
231 br label %done
232
233done:
234 ret void
235}
236
237; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
238; GCN: s_and_saveexec_b64
Tom Stellard70580f82015-07-20 14:28:41 +0000239; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
240; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
Matt Arsenault707780b2017-02-22 21:05:25 +0000241; GCN: {{^BB[0-9]+}}_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +0000242define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000243entry:
244 %offset.ext = zext i32 %offset to i64
245 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
246 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
Tom Stellardbc4497b2016-02-12 23:45:29 +0000247 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
248 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000249 br i1 %tmp0, label %endif, label %if
250
251if:
252 %tmp1 = load i32, i32 addrspace(1)* %in.gep
253 br label %endif
254
255endif:
256 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
257 store i32 %x, i32 addrspace(1)* %out.gep
258 br label %done
259
260done:
261 ret void
262}
263
Matt Arsenault711b3902015-08-07 20:18:34 +0000264; OPT-LABEL: @test_sink_constant_small_offset_i32
265; OPT-NOT: getelementptr i32, i32 addrspace(2)*
266; OPT: br i1
267
268; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
269; GCN: s_and_saveexec_b64
270; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
271; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000272define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000273entry:
274 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
275 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
Tom Stellardbc4497b2016-02-12 23:45:29 +0000276 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
277 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000278 br i1 %tmp0, label %endif, label %if
279
280if:
281 %tmp1 = load i32, i32 addrspace(2)* %in.gep
282 br label %endif
283
284endif:
285 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
286 store i32 %x, i32 addrspace(1)* %out.gep
287 br label %done
288
289done:
290 ret void
291}
292
293; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
294; OPT-NOT: getelementptr i32, i32 addrspace(2)*
295; OPT: br i1
296
297; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
298; GCN: s_and_saveexec_b64
299; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
300; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000301define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000302entry:
303 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
304 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
Tom Stellardbc4497b2016-02-12 23:45:29 +0000305 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
306 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000307 br i1 %tmp0, label %endif, label %if
308
309if:
310 %tmp1 = load i32, i32 addrspace(2)* %in.gep
311 br label %endif
312
313endif:
314 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
315 store i32 %x, i32 addrspace(1)* %out.gep
316 br label %done
317
318done:
319 ret void
320}
321
322; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
323; OPT-SI: getelementptr i32, i32 addrspace(2)*
324; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
325; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
326; OPT: br i1
327
328; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
329; GCN: s_and_saveexec_b64
330; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
331
332; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
333; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000334define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000335entry:
336 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
337 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
Tom Stellardbc4497b2016-02-12 23:45:29 +0000338 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
339 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000340 br i1 %tmp0, label %endif, label %if
341
342if:
343 %tmp1 = load i32, i32 addrspace(2)* %in.gep
344 br label %endif
345
346endif:
347 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
348 store i32 %x, i32 addrspace(1)* %out.gep
349 br label %done
350
351done:
352 ret void
353}
354
355; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
356; OPT-SI: getelementptr i32, i32 addrspace(2)*
357; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
358; OPT: br i1
359
360; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
361; GCN: s_and_saveexec_b64
Tom Stellard9a197672015-09-09 15:43:26 +0000362; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
363; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
Matt Arsenault711b3902015-08-07 20:18:34 +0000364; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
365; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000366define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000367entry:
368 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
369 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
Tom Stellardbc4497b2016-02-12 23:45:29 +0000370 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
371 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000372 br i1 %tmp0, label %endif, label %if
373
374if:
375 %tmp1 = load i32, i32 addrspace(2)* %in.gep
376 br label %endif
377
378endif:
379 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
380 store i32 %x, i32 addrspace(1)* %out.gep
381 br label %done
382
383done:
384 ret void
385}
386
387; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
388; OPT: getelementptr i32, i32 addrspace(2)*
389; OPT: br i1
390
391; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
392; GCN: s_and_saveexec_b64
393; GCN: s_add_u32
394; GCN: s_addc_u32
395; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
396; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000397define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000398entry:
399 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
400 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
Tom Stellardbc4497b2016-02-12 23:45:29 +0000401 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
402 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000403 br i1 %tmp0, label %endif, label %if
404
405if:
406 %tmp1 = load i32, i32 addrspace(2)* %in.gep
407 br label %endif
408
409endif:
410 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
411 store i32 %x, i32 addrspace(1)* %out.gep
412 br label %done
413
414done:
415 ret void
416}
417
418; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
419; GCN: s_and_saveexec_b64
420; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
421; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
422
423; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
424; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
425
426; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000427define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000428entry:
429 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
430 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
Tom Stellardbc4497b2016-02-12 23:45:29 +0000431 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
432 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000433 br i1 %tmp0, label %endif, label %if
434
435if:
436 %tmp1 = load i32, i32 addrspace(2)* %in.gep
437 br label %endif
438
439endif:
440 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
441 store i32 %x, i32 addrspace(1)* %out.gep
442 br label %done
443
444done:
445 ret void
446}
447
448; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
449; OPT-SI: getelementptr i32, i32 addrspace(2)*
450; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
451; OPT-VI: getelementptr i32, i32 addrspace(2)*
452; OPT: br i1
453
454; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
455; GCN: s_and_saveexec_b64
456; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
457; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
458
459; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
460
461; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
462; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
463
464; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000465define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000466entry:
467 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
468 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
Tom Stellardbc4497b2016-02-12 23:45:29 +0000469 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
470 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000471 br i1 %tmp0, label %endif, label %if
472
473if:
474 %tmp1 = load i32, i32 addrspace(2)* %in.gep
475 br label %endif
476
477endif:
478 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
479 store i32 %x, i32 addrspace(1)* %out.gep
480 br label %done
481
482done:
483 ret void
484}
Tom Stellardbc4497b2016-02-12 23:45:29 +0000485
Matt Arsenaultc1e6a452016-07-09 08:02:28 +0000486%struct.foo = type { [3 x float], [3 x float] }
487
488; OPT-LABEL: @sink_ds_address(
489; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i64
490
491; GCN-LABEL: {{^}}sink_ds_address:
492; GCN: s_load_dword [[SREG1:s[0-9]+]],
493; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
494; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5
495define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
496entry:
497 %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
498 %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
499 br label %bb32
500
501bb32:
502 %a = load float, float addrspace(3)* %x, align 4
503 %b = load float, float addrspace(3)* %y, align 4
504 %cmp = fcmp one float %a, %b
505 br i1 %cmp, label %bb34, label %bb33
506
507bb33:
508 unreachable
509
510bb34:
511 unreachable
512}
513
Matt Arsenault3cc1e002016-08-13 01:43:51 +0000514; Address offset is not a multiple of 4. This is a valid mubuf offset,
515; but not smrd.
516
517; OPT-LABEL: @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(
518; OPT: br i1 %tmp0,
519; OPT: if:
520; OPT: %sunkaddr = ptrtoint i8 addrspace(2)* %in to i64
521; OPT: %sunkaddr1 = add i64 %sunkaddr, 4095
522define void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
523entry:
524 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
525 %in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095
526 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
527 %tmp0 = icmp eq i32 %tid, 0
528 br i1 %tmp0, label %endif, label %if
529
530if:
531 %bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)*
532 %tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1
533 br label %endif
534
535endif:
536 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
537 store i32 %x, i32 addrspace(1)* %out.gep
538 br label %done
539
540done:
541 ret void
542}
543
Tom Stellardbc4497b2016-02-12 23:45:29 +0000544declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
545
546attributes #0 = { nounwind readnone }
Matt Arsenault3cc1e002016-08-13 01:43:51 +0000547attributes #1 = { nounwind }