blob: f3425ca9fdd5cbe2336374aeadf522f1598a583b [file] [log] [blame]
Matt Arsenault711b3902015-08-07 20:18:34 +00001; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
Tom Stellard70580f82015-07-20 14:28:41 +00002; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
3; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
Matt Arsenault03d85842016-06-27 20:32:13 +00004; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
6; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenault73e06fa2015-06-04 16:17:42 +00007
Matt Arsenault73e06fa2015-06-04 16:17:42 +00008; OPT-LABEL: @test_sink_global_small_offset_i32(
Tom Stellard70580f82015-07-20 14:28:41 +00009; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
10; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
Matt Arsenault73e06fa2015-06-04 16:17:42 +000011; OPT: br i1
Tom Stellard70580f82015-07-20 14:28:41 +000012; OPT-CI: ptrtoint
Matt Arsenault73e06fa2015-06-04 16:17:42 +000013
14; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
15; GCN: {{^}}BB0_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +000016define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000017entry:
18 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
19 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
Tom Stellardbc4497b2016-02-12 23:45:29 +000020 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
21 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000022 br i1 %tmp0, label %endif, label %if
23
24if:
25 %tmp1 = load i32, i32 addrspace(1)* %in.gep
26 br label %endif
27
28endif:
29 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
30 store i32 %x, i32 addrspace(1)* %out.gep
31 br label %done
32
33done:
34 ret void
35}
36
37; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
38; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
39; OPT: br i1
40
41; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
42; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000043; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000044; GCN: {{^}}BB1_2:
45; GCN: s_or_b64 exec
Tom Stellardbc4497b2016-02-12 23:45:29 +000046define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000047entry:
48 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
49 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
Tom Stellardbc4497b2016-02-12 23:45:29 +000050 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
51 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000052 br i1 %tmp0, label %endif, label %if
53
54if:
55 %tmp1 = load i8, i8 addrspace(1)* %in.gep
56 %tmp2 = sext i8 %tmp1 to i32
57 br label %endif
58
59endif:
60 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
61 store i32 %x, i32 addrspace(1)* %out.gep
62 br label %done
63
64done:
65 ret void
66}
67
68; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
69; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000070; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000071; GCN: {{^}}BB2_2:
72; GCN: s_or_b64 exec
Tom Stellardbc4497b2016-02-12 23:45:29 +000073define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +000074entry:
75 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
76 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
Tom Stellardbc4497b2016-02-12 23:45:29 +000077 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
78 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +000079 br i1 %tmp0, label %endif, label %if
80
81if:
82 %tmp1 = load i8, i8 addrspace(1)* %in.gep
83 %tmp2 = sext i8 %tmp1 to i32
84 br label %endif
85
86endif:
87 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
88 store i32 %x, i32 addrspace(1)* %out.gep
89 br label %done
90
91done:
92 ret void
93}
94
95; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
96; GCN: s_and_saveexec_b64
Nikolay Haustov4f672a32016-04-29 09:02:30 +000097; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
Matt Arsenault73e06fa2015-06-04 16:17:42 +000098; GCN: {{^}}BB3_2:
99; GCN: s_or_b64 exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000100define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000101entry:
102 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
103 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
Tom Stellardbc4497b2016-02-12 23:45:29 +0000104 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
105 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000106 br i1 %tmp0, label %endif, label %if
107
108if:
109 %tmp1 = load i8, i8 addrspace(1)* %in.gep
110 %tmp2 = sext i8 %tmp1 to i32
111 br label %endif
112
113endif:
114 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
115 store i32 %x, i32 addrspace(1)* %out.gep
116 br label %done
117
118done:
119 ret void
120}
121
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000122; OPT-LABEL: @test_sink_scratch_small_offset_i32(
123; OPT-NOT: getelementptr [512 x i32]
124; OPT: br i1
125; OPT: ptrtoint
126
127; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
128; GCN: s_and_saveexec_b64
Matt Arsenault39787bd2016-10-26 15:08:16 +0000129; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
130; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}}
Matt Arsenault711b3902015-08-07 20:18:34 +0000131; GCN: {{^}}BB4_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +0000132define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000133entry:
134 %alloca = alloca [512 x i32], align 4
135 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
136 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
137 %add.arg = add i32 %arg, 8
138 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
Tom Stellardbc4497b2016-02-12 23:45:29 +0000139 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
140 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000141 br i1 %tmp0, label %endif, label %if
142
143if:
144 store volatile i32 123, i32* %alloca.gep
145 %tmp1 = load volatile i32, i32* %alloca.gep
146 br label %endif
147
148endif:
149 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
150 store i32 %x, i32 addrspace(1)* %out.gep.0
151 %load = load volatile i32, i32* %alloca.gep
152 store i32 %load, i32 addrspace(1)* %out.gep.1
153 br label %done
154
155done:
156 ret void
157}
158
159; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
160; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
161; OPT: br i1
162; OPT-NOT: ptrtoint
163
164; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
165; GCN: s_and_saveexec_b64
166; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
167; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
Matt Arsenault711b3902015-08-07 20:18:34 +0000168; GCN: {{^}}BB5_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +0000169define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000170entry:
171 %alloca = alloca [512 x i32], align 4
172 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
173 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
174 %add.arg = add i32 %arg, 8
175 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
Tom Stellardbc4497b2016-02-12 23:45:29 +0000176 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
177 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000178 br i1 %tmp0, label %endif, label %if
179
180if:
181 store volatile i32 123, i32* %alloca.gep
182 %tmp1 = load volatile i32, i32* %alloca.gep
183 br label %endif
184
185endif:
186 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
187 store i32 %x, i32 addrspace(1)* %out.gep.0
188 %load = load volatile i32, i32* %alloca.gep
189 store i32 %load, i32 addrspace(1)* %out.gep.1
190 br label %done
191
192done:
193 ret void
194}
195
196; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
197; GCN: s_and_saveexec_b64
Tom Stellard70580f82015-07-20 14:28:41 +0000198; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
199; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
Matt Arsenault711b3902015-08-07 20:18:34 +0000200; GCN: {{^}}BB6_2:
Tom Stellardbc4497b2016-02-12 23:45:29 +0000201define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000202entry:
203 %offset.ext = zext i32 %offset to i64
204 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
205 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
Tom Stellardbc4497b2016-02-12 23:45:29 +0000206 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
207 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault73e06fa2015-06-04 16:17:42 +0000208 br i1 %tmp0, label %endif, label %if
209
210if:
211 %tmp1 = load i32, i32 addrspace(1)* %in.gep
212 br label %endif
213
214endif:
215 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
216 store i32 %x, i32 addrspace(1)* %out.gep
217 br label %done
218
219done:
220 ret void
221}
222
Matt Arsenault711b3902015-08-07 20:18:34 +0000223; OPT-LABEL: @test_sink_constant_small_offset_i32
224; OPT-NOT: getelementptr i32, i32 addrspace(2)*
225; OPT: br i1
226
227; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
228; GCN: s_and_saveexec_b64
229; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
230; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000231define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000232entry:
233 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
234 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
Tom Stellardbc4497b2016-02-12 23:45:29 +0000235 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
236 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000237 br i1 %tmp0, label %endif, label %if
238
239if:
240 %tmp1 = load i32, i32 addrspace(2)* %in.gep
241 br label %endif
242
243endif:
244 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
245 store i32 %x, i32 addrspace(1)* %out.gep
246 br label %done
247
248done:
249 ret void
250}
251
252; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
253; OPT-NOT: getelementptr i32, i32 addrspace(2)*
254; OPT: br i1
255
256; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
257; GCN: s_and_saveexec_b64
258; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
259; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000260define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000261entry:
262 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
263 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
Tom Stellardbc4497b2016-02-12 23:45:29 +0000264 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
265 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000266 br i1 %tmp0, label %endif, label %if
267
268if:
269 %tmp1 = load i32, i32 addrspace(2)* %in.gep
270 br label %endif
271
272endif:
273 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
274 store i32 %x, i32 addrspace(1)* %out.gep
275 br label %done
276
277done:
278 ret void
279}
280
281; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
282; OPT-SI: getelementptr i32, i32 addrspace(2)*
283; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
284; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
285; OPT: br i1
286
287; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
288; GCN: s_and_saveexec_b64
289; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
290
291; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
292; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000293define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000294entry:
295 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
296 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
Tom Stellardbc4497b2016-02-12 23:45:29 +0000297 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
298 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000299 br i1 %tmp0, label %endif, label %if
300
301if:
302 %tmp1 = load i32, i32 addrspace(2)* %in.gep
303 br label %endif
304
305endif:
306 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
307 store i32 %x, i32 addrspace(1)* %out.gep
308 br label %done
309
310done:
311 ret void
312}
313
314; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
315; OPT-SI: getelementptr i32, i32 addrspace(2)*
316; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
317; OPT: br i1
318
319; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
320; GCN: s_and_saveexec_b64
Tom Stellard9a197672015-09-09 15:43:26 +0000321; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
322; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
Matt Arsenault711b3902015-08-07 20:18:34 +0000323; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
324; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000325define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000326entry:
327 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
328 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
Tom Stellardbc4497b2016-02-12 23:45:29 +0000329 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
330 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000331 br i1 %tmp0, label %endif, label %if
332
333if:
334 %tmp1 = load i32, i32 addrspace(2)* %in.gep
335 br label %endif
336
337endif:
338 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
339 store i32 %x, i32 addrspace(1)* %out.gep
340 br label %done
341
342done:
343 ret void
344}
345
346; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
347; OPT: getelementptr i32, i32 addrspace(2)*
348; OPT: br i1
349
350; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
351; GCN: s_and_saveexec_b64
352; GCN: s_add_u32
353; GCN: s_addc_u32
354; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
355; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000356define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000357entry:
358 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
359 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
Tom Stellardbc4497b2016-02-12 23:45:29 +0000360 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
361 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000362 br i1 %tmp0, label %endif, label %if
363
364if:
365 %tmp1 = load i32, i32 addrspace(2)* %in.gep
366 br label %endif
367
368endif:
369 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
370 store i32 %x, i32 addrspace(1)* %out.gep
371 br label %done
372
373done:
374 ret void
375}
376
377; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
378; GCN: s_and_saveexec_b64
379; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
380; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
381
382; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
383; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
384
385; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000386define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000387entry:
388 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
389 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
Tom Stellardbc4497b2016-02-12 23:45:29 +0000390 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
391 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000392 br i1 %tmp0, label %endif, label %if
393
394if:
395 %tmp1 = load i32, i32 addrspace(2)* %in.gep
396 br label %endif
397
398endif:
399 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
400 store i32 %x, i32 addrspace(1)* %out.gep
401 br label %done
402
403done:
404 ret void
405}
406
407; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
408; OPT-SI: getelementptr i32, i32 addrspace(2)*
409; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
410; OPT-VI: getelementptr i32, i32 addrspace(2)*
411; OPT: br i1
412
413; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
414; GCN: s_and_saveexec_b64
415; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
416; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
417
418; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
419
420; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
421; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
422
423; GCN: s_or_b64 exec, exec
Tom Stellardbc4497b2016-02-12 23:45:29 +0000424define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
Matt Arsenault711b3902015-08-07 20:18:34 +0000425entry:
426 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
427 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
Tom Stellardbc4497b2016-02-12 23:45:29 +0000428 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
429 %tmp0 = icmp eq i32 %tid, 0
Matt Arsenault711b3902015-08-07 20:18:34 +0000430 br i1 %tmp0, label %endif, label %if
431
432if:
433 %tmp1 = load i32, i32 addrspace(2)* %in.gep
434 br label %endif
435
436endif:
437 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
438 store i32 %x, i32 addrspace(1)* %out.gep
439 br label %done
440
441done:
442 ret void
443}
Tom Stellardbc4497b2016-02-12 23:45:29 +0000444
Matt Arsenaultc1e6a452016-07-09 08:02:28 +0000445%struct.foo = type { [3 x float], [3 x float] }
446
447; OPT-LABEL: @sink_ds_address(
448; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i64
449
450; GCN-LABEL: {{^}}sink_ds_address:
451; GCN: s_load_dword [[SREG1:s[0-9]+]],
452; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
453; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5
454define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
455entry:
456 %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
457 %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
458 br label %bb32
459
460bb32:
461 %a = load float, float addrspace(3)* %x, align 4
462 %b = load float, float addrspace(3)* %y, align 4
463 %cmp = fcmp one float %a, %b
464 br i1 %cmp, label %bb34, label %bb33
465
466bb33:
467 unreachable
468
469bb34:
470 unreachable
471}
472
Matt Arsenault3cc1e002016-08-13 01:43:51 +0000473; Address offset is not a multiple of 4. This is a valid mubuf offset,
474; but not smrd.
475
476; OPT-LABEL: @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(
477; OPT: br i1 %tmp0,
478; OPT: if:
479; OPT: %sunkaddr = ptrtoint i8 addrspace(2)* %in to i64
480; OPT: %sunkaddr1 = add i64 %sunkaddr, 4095
481define void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
482entry:
483 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
484 %in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095
485 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
486 %tmp0 = icmp eq i32 %tid, 0
487 br i1 %tmp0, label %endif, label %if
488
489if:
490 %bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)*
491 %tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1
492 br label %endif
493
494endif:
495 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
496 store i32 %x, i32 addrspace(1)* %out.gep
497 br label %done
498
499done:
500 ret void
501}
502
Tom Stellardbc4497b2016-02-12 23:45:29 +0000503declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
504
505attributes #0 = { nounwind readnone }
Matt Arsenault3cc1e002016-08-13 01:43:51 +0000506attributes #1 = { nounwind }