blob: 62bd684d5411c191f546d5242f11f5403001f6cd [file] [log] [blame]
Alexander Timofeev982aee62017-07-04 17:32:00 +00001; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenaultf5262252017-02-22 23:04:58 +00003
4; GCN-LABEL: {{^}}bfe_u32_arg_arg_arg:
5; GCN: v_bfe_u32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00006define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +00007 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
8 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
9 ret void
10}
11
12; GCN-LABEL: {{^}}bfe_u32_arg_arg_imm:
13; GCN: v_bfe_u32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +000015 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
16 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
17 ret void
18}
19
20; GCN-LABEL: {{^}}bfe_u32_arg_imm_arg:
21; GCN: v_bfe_u32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000022define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +000023 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
24 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
25 ret void
26}
27
28; GCN-LABEL: {{^}}bfe_u32_imm_arg_arg:
29; GCN: v_bfe_u32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000030define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +000031 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
32 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
33 ret void
34}
35
36; GCN-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
37; GCN-NOT: {{[^@]}}bfe
38; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000039define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +000040 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
41 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
42 ret void
43}
44
45; GCN-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
46; GCN-NOT: {{[^@]}}bfe
47; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000048define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +000049 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
50 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
51 ret void
52}
53
54; GCN-LABEL: {{^}}bfe_u32_zextload_i8:
55; GCN: buffer_load_ubyte
56; GCN-NOT: {{[^@]}}bfe
57; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000058define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +000059 %load = load i8, i8 addrspace(1)* %in
60 %ext = zext i8 %load to i32
61 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
62 store i32 %bfe, i32 addrspace(1)* %out, align 4
63 ret void
64}
65
66; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
67; GCN: buffer_load_dword
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +000068; GCN: v_add_{{[iu]}}32
Matt Arsenaultf5262252017-02-22 23:04:58 +000069; GCN-NEXT: v_and_b32_e32
70; FIXME: Should be using s_add_i32
71; GCN-NOT: {{[^@]}}bfe
72; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000073define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +000074 %load = load i32, i32 addrspace(1)* %in, align 4
75 %add = add i32 %load, 1
76 %ext = and i32 %add, 255
77 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
78 store i32 %bfe, i32 addrspace(1)* %out, align 4
79 ret void
80}
81
82; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
83; GCN: buffer_load_dword
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +000084; GCN: v_add_{{[iu]}}32
Matt Arsenaultf5262252017-02-22 23:04:58 +000085; GCN-NEXT: v_and_b32_e32
86; GCN-NOT: {{[^@]}}bfe
87; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000088define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +000089 %load = load i32, i32 addrspace(1)* %in, align 4
90 %add = add i32 %load, 1
91 %ext = and i32 %add, 65535
92 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
93 store i32 %bfe, i32 addrspace(1)* %out, align 4
94 ret void
95}
96
97; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
98; GCN: buffer_load_dword
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +000099; GCN: v_add_{{[iu]}}32
Matt Arsenaultf5262252017-02-22 23:04:58 +0000100; GCN: bfe
101; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000102define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000103 %load = load i32, i32 addrspace(1)* %in, align 4
104 %add = add i32 %load, 1
105 %ext = and i32 %add, 255
106 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
107 store i32 %bfe, i32 addrspace(1)* %out, align 4
108 ret void
109}
110
111; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
112; GCN: buffer_load_dword
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000113; GCN: v_add_{{[iu]}}32
Matt Arsenaultf5262252017-02-22 23:04:58 +0000114; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
115; GCN-NEXT: bfe
116; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000117define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000118 %load = load i32, i32 addrspace(1)* %in, align 4
119 %add = add i32 %load, 1
120 %ext = and i32 %add, 255
121 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
122 store i32 %bfe, i32 addrspace(1)* %out, align 4
123 ret void
124}
125
126; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
127; GCN: buffer_load_dword
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000128; GCN: v_add_{{[iu]}}32
Matt Arsenaultf5262252017-02-22 23:04:58 +0000129; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
130; GCN-NEXT: bfe
131; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000132define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000133 %load = load i32, i32 addrspace(1)* %in, align 4
134 %add = add i32 %load, 1
135 %ext = and i32 %add, 255
136 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
137 store i32 %bfe, i32 addrspace(1)* %out, align 4
138 ret void
139}
140
141; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
142; GCN: buffer_load_dword
Dmitry Preobrazhenskya0342dc2017-11-20 18:24:21 +0000143; GCN: v_add_{{[iu]}}32
Matt Arsenaultf5262252017-02-22 23:04:58 +0000144; GCN-NEXT: bfe
145; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000146define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000147 %load = load i32, i32 addrspace(1)* %in, align 4
148 %add = add i32 %load, 1
149 %ext = and i32 %add, 65535
150 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
151 store i32 %bfe, i32 addrspace(1)* %out, align 4
152 ret void
153}
154
155; GCN-LABEL: {{^}}bfe_u32_test_1:
156; GCN: buffer_load_dword
157; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
158; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000159define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000160 %x = load i32, i32 addrspace(1)* %in, align 4
161 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
162 store i32 %bfe, i32 addrspace(1)* %out, align 4
163 ret void
164}
165
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000166define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000167 %x = load i32, i32 addrspace(1)* %in, align 4
168 %shl = shl i32 %x, 31
169 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
170 store i32 %bfe, i32 addrspace(1)* %out, align 4
171 ret void
172}
173
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000174define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000175 %x = load i32, i32 addrspace(1)* %in, align 4
176 %shl = shl i32 %x, 31
177 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
178 store i32 %bfe, i32 addrspace(1)* %out, align 4
179 ret void
180}
181
182; GCN-LABEL: {{^}}bfe_u32_test_4:
183; GCN-NOT: lshl
184; GCN-NOT: shr
185; GCN-NOT: {{[^@]}}bfe
186; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
187; GCN: buffer_store_dword [[VREG]],
188; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000189define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000190 %x = load i32, i32 addrspace(1)* %in, align 4
191 %shl = shl i32 %x, 31
192 %shr = lshr i32 %shl, 31
193 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
194 store i32 %bfe, i32 addrspace(1)* %out, align 4
195 ret void
196}
197
198; GCN-LABEL: {{^}}bfe_u32_test_5:
199; GCN: buffer_load_dword
200; GCN-NOT: lshl
201; GCN-NOT: shr
202; GCN: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
203; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000204define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000205 %x = load i32, i32 addrspace(1)* %in, align 4
206 %shl = shl i32 %x, 31
207 %shr = ashr i32 %shl, 31
208 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
209 store i32 %bfe, i32 addrspace(1)* %out, align 4
210 ret void
211}
212
213; GCN-LABEL: {{^}}bfe_u32_test_6:
214; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
215; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
216; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000217define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000218 %x = load i32, i32 addrspace(1)* %in, align 4
219 %shl = shl i32 %x, 31
220 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
221 store i32 %bfe, i32 addrspace(1)* %out, align 4
222 ret void
223}
224
225; GCN-LABEL: {{^}}bfe_u32_test_7:
226; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
227; GCN-NOT: {{[^@]}}bfe
228; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000229define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000230 %x = load i32, i32 addrspace(1)* %in, align 4
231 %shl = shl i32 %x, 31
232 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
233 store i32 %bfe, i32 addrspace(1)* %out, align 4
234 ret void
235}
236
237; GCN-LABEL: {{^}}bfe_u32_test_8:
238; GCN-NOT: {{[^@]}}bfe
239; GCN: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
240; GCN-NOT: {{[^@]}}bfe
241; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000242define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000243 %x = load i32, i32 addrspace(1)* %in, align 4
244 %shl = shl i32 %x, 31
245 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
246 store i32 %bfe, i32 addrspace(1)* %out, align 4
247 ret void
248}
249
250; GCN-LABEL: {{^}}bfe_u32_test_9:
251; GCN-NOT: {{[^@]}}bfe
252; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
253; GCN-NOT: {{[^@]}}bfe
254; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000255define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000256 %x = load i32, i32 addrspace(1)* %in, align 4
257 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
258 store i32 %bfe, i32 addrspace(1)* %out, align 4
259 ret void
260}
261
262; GCN-LABEL: {{^}}bfe_u32_test_10:
263; GCN-NOT: {{[^@]}}bfe
264; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
265; GCN-NOT: {{[^@]}}bfe
266; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000267define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000268 %x = load i32, i32 addrspace(1)* %in, align 4
269 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
270 store i32 %bfe, i32 addrspace(1)* %out, align 4
271 ret void
272}
273
274; GCN-LABEL: {{^}}bfe_u32_test_11:
275; GCN-NOT: {{[^@]}}bfe
276; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
277; GCN-NOT: {{[^@]}}bfe
278; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000279define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000280 %x = load i32, i32 addrspace(1)* %in, align 4
281 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
282 store i32 %bfe, i32 addrspace(1)* %out, align 4
283 ret void
284}
285
286; GCN-LABEL: {{^}}bfe_u32_test_12:
287; GCN-NOT: {{[^@]}}bfe
288; GCN: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
289; GCN-NOT: {{[^@]}}bfe
290; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000291define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000292 %x = load i32, i32 addrspace(1)* %in, align 4
293 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
294 store i32 %bfe, i32 addrspace(1)* %out, align 4
295 ret void
296}
297
298; GCN-LABEL: {{^}}bfe_u32_test_13:
299; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
300; GCN-NOT: {{[^@]}}bfe
301; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000302define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000303 %x = load i32, i32 addrspace(1)* %in, align 4
304 %shl = ashr i32 %x, 31
305 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
306 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
307}
308
309; GCN-LABEL: {{^}}bfe_u32_test_14:
310; GCN-NOT: lshr
311; GCN-NOT: {{[^@]}}bfe
312; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000313define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000314 %x = load i32, i32 addrspace(1)* %in, align 4
315 %shl = lshr i32 %x, 31
316 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
317 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
318}
319
320; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_0:
321; GCN-NOT: {{[^@]}}bfe
322; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
323; GCN: buffer_store_dword [[VREG]],
324; GCN: s_endpgm
325; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000326define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000327 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
328 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
329 ret void
330}
331
332; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_1:
333; GCN-NOT: {{[^@]}}bfe
334; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
335; GCN: buffer_store_dword [[VREG]],
336; GCN: s_endpgm
337; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000338define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000339 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
340 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
341 ret void
342}
343
344; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_2:
345; GCN-NOT: {{[^@]}}bfe
346; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
347; GCN: buffer_store_dword [[VREG]],
348; GCN: s_endpgm
349; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000350define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000351 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
352 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
353 ret void
354}
355
356; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_3:
357; GCN-NOT: {{[^@]}}bfe
358; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
359; GCN: buffer_store_dword [[VREG]],
360; GCN: s_endpgm
361; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000362define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000363 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
364 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
365 ret void
366}
367
368; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_4:
369; GCN-NOT: {{[^@]}}bfe
370; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
371; GCN: buffer_store_dword [[VREG]],
372; GCN: s_endpgm
373; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000374define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000375 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
376 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
377 ret void
378}
379
380; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_5:
381; GCN-NOT: {{[^@]}}bfe
382; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
383; GCN: buffer_store_dword [[VREG]],
384; GCN: s_endpgm
385; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000386define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000387 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
388 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
389 ret void
390}
391
392; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_6:
393; GCN-NOT: {{[^@]}}bfe
394; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
395; GCN: buffer_store_dword [[VREG]],
396; GCN: s_endpgm
397; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000398define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000399 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
400 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
401 ret void
402}
403
404; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_7:
405; GCN-NOT: {{[^@]}}bfe
406; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
407; GCN: buffer_store_dword [[VREG]],
408; GCN: s_endpgm
409; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000410define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000411 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
412 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
413 ret void
414}
415
416; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_8:
417; GCN-NOT: {{[^@]}}bfe
418; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
419; GCN: buffer_store_dword [[VREG]],
420; GCN: s_endpgm
421; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000422define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000423 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
424 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
425 ret void
426}
427
428; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_9:
429; GCN-NOT: {{[^@]}}bfe
430; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
431; GCN: buffer_store_dword [[VREG]],
432; GCN: s_endpgm
433; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000434define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000435 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
436 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
437 ret void
438}
439
440; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_10:
441; GCN-NOT: {{[^@]}}bfe
442; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
443; GCN: buffer_store_dword [[VREG]],
444; GCN: s_endpgm
445; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000446define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000447 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
448 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
449 ret void
450}
451
452; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_11:
453; GCN-NOT: {{[^@]}}bfe
454; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
455; GCN: buffer_store_dword [[VREG]],
456; GCN: s_endpgm
457; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000458define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000459 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
460 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
461 ret void
462}
463
464; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_12:
465; GCN-NOT: {{[^@]}}bfe
466; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
467; GCN: buffer_store_dword [[VREG]],
468; GCN: s_endpgm
469; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000470define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000471 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
472 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
473 ret void
474}
475
476; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_13:
477; GCN-NOT: {{[^@]}}bfe
478; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
479; GCN: buffer_store_dword [[VREG]],
480; GCN: s_endpgm
481; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000482define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000483 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
484 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
485 ret void
486}
487
488; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_14:
489; GCN-NOT: {{[^@]}}bfe
490; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
491; GCN: buffer_store_dword [[VREG]],
492; GCN: s_endpgm
493; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000494define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000495 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
496 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
497 ret void
498}
499
500; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_15:
501; GCN-NOT: {{[^@]}}bfe
502; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
503; GCN: buffer_store_dword [[VREG]],
504; GCN: s_endpgm
505; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000506define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000507 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
508 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
509 ret void
510}
511
512; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_16:
513; GCN-NOT: {{[^@]}}bfe
514; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
515; GCN: buffer_store_dword [[VREG]],
516; GCN: s_endpgm
517; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000518define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000519 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
520 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
521 ret void
522}
523
524; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_17:
525; GCN-NOT: {{[^@]}}bfe
526; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
527; GCN: buffer_store_dword [[VREG]],
528; GCN: s_endpgm
529; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000530define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000531 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
532 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
533 ret void
534}
535
536; GCN-LABEL: {{^}}bfe_u32_constant_fold_test_18:
537; GCN-NOT: {{[^@]}}bfe
538; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
539; GCN: buffer_store_dword [[VREG]],
540; GCN: s_endpgm
541; EG-NOT: BFE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000542define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000543 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
544 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
545 ret void
546}
547
548; Make sure that SimplifyDemandedBits doesn't cause the and to be
549; reduced to the bits demanded by the bfe.
550
551; XXX: The operand to v_bfe_u32 could also just directly be the load register.
552; GCN-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
553; GCN: buffer_load_dword [[ARG:v[0-9]+]]
554; GCN: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
555; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
556; GCN-DAG: buffer_store_dword [[AND]]
557; GCN-DAG: buffer_store_dword [[BFE]]
558; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000559define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
Matt Arsenaultf5262252017-02-22 23:04:58 +0000560 i32 addrspace(1)* %out1,
561 i32 addrspace(1)* %in) #0 {
562 %src = load i32, i32 addrspace(1)* %in, align 4
563 %and = and i32 %src, 63
564 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
565 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
566 store i32 %and, i32 addrspace(1)* %out1, align 4
567 ret void
568}
569
570; GCN-LABEL: {{^}}lshr_and:
571; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
572; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000573define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000574 %b = lshr i32 %a, 6
575 %c = and i32 %b, 7
576 store i32 %c, i32 addrspace(1)* %out, align 8
577 ret void
578}
579
580; GCN-LABEL: {{^}}v_lshr_and:
581; GCN: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
582; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000583define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000584 %c = lshr i32 %a, %b
585 %d = and i32 %c, 7
586 store i32 %d, i32 addrspace(1)* %out, align 8
587 ret void
588}
589
590; GCN-LABEL: {{^}}and_lshr:
591; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
592; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000593define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000594 %b = and i32 %a, 448
595 %c = lshr i32 %b, 6
596 store i32 %c, i32 addrspace(1)* %out, align 8
597 ret void
598}
599
600; GCN-LABEL: {{^}}and_lshr2:
601; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
602; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000603define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000604 %b = and i32 %a, 511
605 %c = lshr i32 %b, 6
606 store i32 %c, i32 addrspace(1)* %out, align 8
607 ret void
608}
609
610; GCN-LABEL: {{^}}shl_lshr:
611; GCN: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
612; GCN: buffer_store_dword
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000613define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
Matt Arsenaultf5262252017-02-22 23:04:58 +0000614 %b = shl i32 %a, 9
615 %c = lshr i32 %b, 11
616 store i32 %c, i32 addrspace(1)* %out, align 8
617 ret void
618}
619
620declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
621
622attributes #0 = { nounwind }
623attributes #1 = { nounwind readnone }