blob: 541119242a94548e00db607274be0917d28d4649 [file] [log] [blame]
Tom Stellard49f8bfd2015-01-06 18:00:21 +00001; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Marek Olsak75170772015-01-27 17:27:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
Matt Arsenault4c537172014-03-31 18:21:18 +00003; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
6
Tom Stellard79243d92014-10-01 17:15:17 +00007; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
Tom Stellard326d6ec2014-11-05 14:50:53 +00008; SI: v_bfe_u32
Matt Arsenault4c537172014-03-31 18:21:18 +00009; EG: BFE_UINT
10define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
11 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
12 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
13 ret void
14}
15
Tom Stellard79243d92014-10-01 17:15:17 +000016; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
Tom Stellard326d6ec2014-11-05 14:50:53 +000017; SI: v_bfe_u32
Matt Arsenault4c537172014-03-31 18:21:18 +000018; EG: BFE_UINT
19define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
20 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
21 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
22 ret void
23}
24
Tom Stellard79243d92014-10-01 17:15:17 +000025; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
Tom Stellard326d6ec2014-11-05 14:50:53 +000026; SI: v_bfe_u32
Matt Arsenault4c537172014-03-31 18:21:18 +000027; EG: BFE_UINT
28define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
29 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
30 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
31 ret void
32}
33
Tom Stellard79243d92014-10-01 17:15:17 +000034; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
Tom Stellard326d6ec2014-11-05 14:50:53 +000035; SI: v_bfe_u32
Matt Arsenault4c537172014-03-31 18:21:18 +000036; EG: BFE_UINT
37define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
38 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
39 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
40 ret void
41}
Matt Arsenault5565f65e2014-05-22 18:09:07 +000042
Tom Stellard79243d92014-10-01 17:15:17 +000043; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
Tom Stellard326d6ec2014-11-05 14:50:53 +000044; SI-NOT: {{[^@]}}bfe
45; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +000046; EG-NOT: BFE
47define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
48 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
49 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
50 ret void
51}
52
Tom Stellard79243d92014-10-01 17:15:17 +000053; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
Tom Stellard326d6ec2014-11-05 14:50:53 +000054; SI-NOT: {{[^@]}}bfe
55; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +000056; EG-NOT: BFE
57define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
58 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
59 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
60 ret void
61}
62
Tom Stellard79243d92014-10-01 17:15:17 +000063; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +000064; SI: buffer_load_ubyte
65; SI-NOT: {{[^@]}}bfe
66; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +000067define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +000068 %load = load i8, i8 addrspace(1)* %in
Matt Arsenault5565f65e2014-05-22 18:09:07 +000069 %ext = zext i8 %load to i32
70 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
71 store i32 %bfe, i32 addrspace(1)* %out, align 4
72 ret void
73}
74
Tom Stellard79243d92014-10-01 17:15:17 +000075; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
Tom Stellard326d6ec2014-11-05 14:50:53 +000076; SI: buffer_load_dword
77; SI: v_add_i32
78; SI-NEXT: v_and_b32_e32
79; SI-NOT: {{[^@]}}bfe
80; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +000081define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +000082 %load = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +000083 %add = add i32 %load, 1
84 %ext = and i32 %add, 255
85 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
86 store i32 %bfe, i32 addrspace(1)* %out, align 4
87 ret void
88}
89
Tom Stellard79243d92014-10-01 17:15:17 +000090; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
Tom Stellard326d6ec2014-11-05 14:50:53 +000091; SI: buffer_load_dword
92; SI: v_add_i32
93; SI-NEXT: v_and_b32_e32
94; SI-NOT: {{[^@]}}bfe
95; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +000096define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +000097 %load = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +000098 %add = add i32 %load, 1
99 %ext = and i32 %add, 65535
100 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
101 store i32 %bfe, i32 addrspace(1)* %out, align 4
102 ret void
103}
104
Tom Stellard79243d92014-10-01 17:15:17 +0000105; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000106; SI: buffer_load_dword
107; SI: v_add_i32
108; SI: bfe
109; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000110define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000111 %load = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000112 %add = add i32 %load, 1
113 %ext = and i32 %add, 255
114 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
115 store i32 %bfe, i32 addrspace(1)* %out, align 4
116 ret void
117}
118
Tom Stellard79243d92014-10-01 17:15:17 +0000119; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000120; SI: buffer_load_dword
121; SI: v_add_i32
122; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
123; SI-NEXT: bfe
124; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000125define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000126 %load = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000127 %add = add i32 %load, 1
128 %ext = and i32 %add, 255
129 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
130 store i32 %bfe, i32 addrspace(1)* %out, align 4
131 ret void
132}
133
Tom Stellard79243d92014-10-01 17:15:17 +0000134; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000135; SI: buffer_load_dword
136; SI: v_add_i32
137; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
138; SI-NEXT: bfe
139; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000140define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000141 %load = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000142 %add = add i32 %load, 1
143 %ext = and i32 %add, 255
144 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
145 store i32 %bfe, i32 addrspace(1)* %out, align 4
146 ret void
147}
148
Tom Stellard79243d92014-10-01 17:15:17 +0000149; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000150; SI: buffer_load_dword
151; SI: v_add_i32
152; SI-NEXT: bfe
153; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000154define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000155 %load = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000156 %add = add i32 %load, 1
157 %ext = and i32 %add, 65535
158 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
159 store i32 %bfe, i32 addrspace(1)* %out, align 4
160 ret void
161}
162
Tom Stellard79243d92014-10-01 17:15:17 +0000163; FUNC-LABEL: {{^}}bfe_u32_test_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000164; SI: buffer_load_dword
165; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
166; SI: s_endpgm
Matt Arsenault05e96f42014-05-22 18:09:12 +0000167; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000168define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000169 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000170 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
171 store i32 %bfe, i32 addrspace(1)* %out, align 4
172 ret void
173}
174
175define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000176 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000177 %shl = shl i32 %x, 31
178 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
179 store i32 %bfe, i32 addrspace(1)* %out, align 4
180 ret void
181}
182
183define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000184 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000185 %shl = shl i32 %x, 31
186 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
187 store i32 %bfe, i32 addrspace(1)* %out, align 4
188 ret void
189}
190
Tom Stellard79243d92014-10-01 17:15:17 +0000191; FUNC-LABEL: {{^}}bfe_u32_test_4:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000192; SI-NOT: lshl
193; SI-NOT: shr
194; SI-NOT: {{[^@]}}bfe
195; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
196; SI: buffer_store_dword [[VREG]],
197; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000198define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000199 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000200 %shl = shl i32 %x, 31
201 %shr = lshr i32 %shl, 31
202 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
203 store i32 %bfe, i32 addrspace(1)* %out, align 4
204 ret void
205}
206
Tom Stellard79243d92014-10-01 17:15:17 +0000207; FUNC-LABEL: {{^}}bfe_u32_test_5:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000208; SI: buffer_load_dword
209; SI-NOT: lshl
210; SI-NOT: shr
211; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
212; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000213define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000214 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000215 %shl = shl i32 %x, 31
216 %shr = ashr i32 %shl, 31
217 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
218 store i32 %bfe, i32 addrspace(1)* %out, align 4
219 ret void
220}
221
Tom Stellard79243d92014-10-01 17:15:17 +0000222; FUNC-LABEL: {{^}}bfe_u32_test_6:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000223; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
224; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
225; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000226define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000227 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000228 %shl = shl i32 %x, 31
229 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
230 store i32 %bfe, i32 addrspace(1)* %out, align 4
231 ret void
232}
233
Tom Stellard79243d92014-10-01 17:15:17 +0000234; FUNC-LABEL: {{^}}bfe_u32_test_7:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000235; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
236; SI-NOT: {{[^@]}}bfe
237; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000238define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000239 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000240 %shl = shl i32 %x, 31
241 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
242 store i32 %bfe, i32 addrspace(1)* %out, align 4
243 ret void
244}
245
Tom Stellard79243d92014-10-01 17:15:17 +0000246; FUNC-LABEL: {{^}}bfe_u32_test_8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000247; SI-NOT: {{[^@]}}bfe
248; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
249; SI-NOT: {{[^@]}}bfe
250; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000251define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000252 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000253 %shl = shl i32 %x, 31
254 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
255 store i32 %bfe, i32 addrspace(1)* %out, align 4
256 ret void
257}
258
Tom Stellard79243d92014-10-01 17:15:17 +0000259; FUNC-LABEL: {{^}}bfe_u32_test_9:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000260; SI-NOT: {{[^@]}}bfe
261; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
262; SI-NOT: {{[^@]}}bfe
263; SI: s_endpgm
Matt Arsenault05e96f42014-05-22 18:09:12 +0000264define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000265 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault05e96f42014-05-22 18:09:12 +0000266 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
267 store i32 %bfe, i32 addrspace(1)* %out, align 4
268 ret void
269}
270
Tom Stellard79243d92014-10-01 17:15:17 +0000271; FUNC-LABEL: {{^}}bfe_u32_test_10:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000272; SI-NOT: {{[^@]}}bfe
273; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
274; SI-NOT: {{[^@]}}bfe
275; SI: s_endpgm
Matt Arsenault05e96f42014-05-22 18:09:12 +0000276define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000277 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault05e96f42014-05-22 18:09:12 +0000278 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
279 store i32 %bfe, i32 addrspace(1)* %out, align 4
280 ret void
281}
282
Tom Stellard79243d92014-10-01 17:15:17 +0000283; FUNC-LABEL: {{^}}bfe_u32_test_11:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000284; SI-NOT: {{[^@]}}bfe
285; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
286; SI-NOT: {{[^@]}}bfe
287; SI: s_endpgm
Matt Arsenault05e96f42014-05-22 18:09:12 +0000288define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000289 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault05e96f42014-05-22 18:09:12 +0000290 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
291 store i32 %bfe, i32 addrspace(1)* %out, align 4
292 ret void
293}
294
Tom Stellard79243d92014-10-01 17:15:17 +0000295; FUNC-LABEL: {{^}}bfe_u32_test_12:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000296; SI-NOT: {{[^@]}}bfe
297; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
298; SI-NOT: {{[^@]}}bfe
299; SI: s_endpgm
Matt Arsenault05e96f42014-05-22 18:09:12 +0000300define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000301 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault05e96f42014-05-22 18:09:12 +0000302 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
303 store i32 %bfe, i32 addrspace(1)* %out, align 4
304 ret void
305}
306
Tom Stellard79243d92014-10-01 17:15:17 +0000307; FUNC-LABEL: {{^}}bfe_u32_test_13:
Matt Arsenault05e96f42014-05-22 18:09:12 +0000308; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
Tom Stellard326d6ec2014-11-05 14:50:53 +0000309; SI-NOT: {{[^@]}}bfe
310; SI: s_endpgm
Matt Arsenault05e96f42014-05-22 18:09:12 +0000311define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000312 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault05e96f42014-05-22 18:09:12 +0000313 %shl = ashr i32 %x, 31
314 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
315 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
316}
317
Tom Stellard79243d92014-10-01 17:15:17 +0000318; FUNC-LABEL: {{^}}bfe_u32_test_14:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000319; SI-NOT: lshr
320; SI-NOT: {{[^@]}}bfe
321; SI: s_endpgm
Matt Arsenault05e96f42014-05-22 18:09:12 +0000322define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000323 %x = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault05e96f42014-05-22 18:09:12 +0000324 %shl = lshr i32 %x, 31
325 %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
326 store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
327}
328
Tom Stellard79243d92014-10-01 17:15:17 +0000329; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000330; SI-NOT: {{[^@]}}bfe
331; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
332; SI: buffer_store_dword [[VREG]],
333; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000334; EG-NOT: BFE
335define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
336 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
337 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
338 ret void
339}
340
Tom Stellard79243d92014-10-01 17:15:17 +0000341; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000342; SI-NOT: {{[^@]}}bfe
343; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
344; SI: buffer_store_dword [[VREG]],
345; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000346; EG-NOT: BFE
347define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
348 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
349 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
350 ret void
351}
352
Tom Stellard79243d92014-10-01 17:15:17 +0000353; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000354; SI-NOT: {{[^@]}}bfe
355; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
356; SI: buffer_store_dword [[VREG]],
357; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000358; EG-NOT: BFE
359define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
360 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
361 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
362 ret void
363}
364
Tom Stellard79243d92014-10-01 17:15:17 +0000365; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000366; SI-NOT: {{[^@]}}bfe
367; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
368; SI: buffer_store_dword [[VREG]],
369; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000370; EG-NOT: BFE
371define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
372 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
373 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
374 ret void
375}
376
Tom Stellard79243d92014-10-01 17:15:17 +0000377; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000378; SI-NOT: {{[^@]}}bfe
379; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
380; SI: buffer_store_dword [[VREG]],
381; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000382; EG-NOT: BFE
383define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
384 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
385 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
386 ret void
387}
388
Tom Stellard79243d92014-10-01 17:15:17 +0000389; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000390; SI-NOT: {{[^@]}}bfe
391; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
392; SI: buffer_store_dword [[VREG]],
393; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000394; EG-NOT: BFE
395define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
396 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
397 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
398 ret void
399}
400
Tom Stellard79243d92014-10-01 17:15:17 +0000401; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000402; SI-NOT: {{[^@]}}bfe
403; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
404; SI: buffer_store_dword [[VREG]],
405; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000406; EG-NOT: BFE
407define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
408 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
409 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
410 ret void
411}
412
Tom Stellard79243d92014-10-01 17:15:17 +0000413; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000414; SI-NOT: {{[^@]}}bfe
415; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
416; SI: buffer_store_dword [[VREG]],
417; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000418; EG-NOT: BFE
419define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
420 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
421 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
422 ret void
423}
424
Tom Stellard79243d92014-10-01 17:15:17 +0000425; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000426; SI-NOT: {{[^@]}}bfe
427; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
428; SI: buffer_store_dword [[VREG]],
429; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000430; EG-NOT: BFE
431define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
432 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
433 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
434 ret void
435}
436
Tom Stellard79243d92014-10-01 17:15:17 +0000437; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000438; SI-NOT: {{[^@]}}bfe
439; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
440; SI: buffer_store_dword [[VREG]],
441; SI: s_endpgm
Marek Olsak9b728682015-03-24 13:40:27 +0000442; EG-NOT: BFE
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000443define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
444 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
445 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
446 ret void
447}
448
Tom Stellard79243d92014-10-01 17:15:17 +0000449; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000450; SI-NOT: {{[^@]}}bfe
451; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
452; SI: buffer_store_dword [[VREG]],
453; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000454; EG-NOT: BFE
455define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
456 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
457 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
458 ret void
459}
460
Tom Stellard79243d92014-10-01 17:15:17 +0000461; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000462; SI-NOT: {{[^@]}}bfe
463; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
464; SI: buffer_store_dword [[VREG]],
465; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000466; EG-NOT: BFE
467define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
468 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
469 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
470 ret void
471}
472
Tom Stellard79243d92014-10-01 17:15:17 +0000473; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000474; SI-NOT: {{[^@]}}bfe
475; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
476; SI: buffer_store_dword [[VREG]],
477; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000478; EG-NOT: BFE
479define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
480 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
481 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
482 ret void
483}
484
Tom Stellard79243d92014-10-01 17:15:17 +0000485; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000486; SI-NOT: {{[^@]}}bfe
487; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
488; SI: buffer_store_dword [[VREG]],
489; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000490; EG-NOT: BFE
491define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
492 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
493 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
494 ret void
495}
496
Tom Stellard79243d92014-10-01 17:15:17 +0000497; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000498; SI-NOT: {{[^@]}}bfe
499; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
500; SI: buffer_store_dword [[VREG]],
501; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000502; EG-NOT: BFE
503define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
504 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
505 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
506 ret void
507}
508
Tom Stellard79243d92014-10-01 17:15:17 +0000509; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000510; SI-NOT: {{[^@]}}bfe
511; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
512; SI: buffer_store_dword [[VREG]],
513; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000514; EG-NOT: BFE
515define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
516 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
517 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
518 ret void
519}
520
Tom Stellard79243d92014-10-01 17:15:17 +0000521; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000522; SI-NOT: {{[^@]}}bfe
523; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
524; SI: buffer_store_dword [[VREG]],
525; SI: s_endpgm
Matt Arsenault5565f65e2014-05-22 18:09:07 +0000526; EG-NOT: BFE
527define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
528 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
529 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
530 ret void
531}
Matt Arsenault05e96f42014-05-22 18:09:12 +0000532
Tom Stellard79243d92014-10-01 17:15:17 +0000533; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000534; SI-NOT: {{[^@]}}bfe
535; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
536; SI: buffer_store_dword [[VREG]],
537; SI: s_endpgm
Matt Arsenault05e96f42014-05-22 18:09:12 +0000538; EG-NOT: BFE
539define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
540 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
541 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
542 ret void
543}
544
Tom Stellard79243d92014-10-01 17:15:17 +0000545; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000546; SI-NOT: {{[^@]}}bfe
547; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
548; SI: buffer_store_dword [[VREG]],
549; SI: s_endpgm
Matt Arsenault05e96f42014-05-22 18:09:12 +0000550; EG-NOT: BFE
551define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
552 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
553 store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
554 ret void
555}
Matt Arsenault7b68fdf2014-10-15 17:58:34 +0000556
557; Make sure that SimplifyDemandedBits doesn't cause the and to be
558; reduced to the bits demanded by the bfe.
559
560; XXX: The operand to v_bfe_u32 could also just directly be the load register.
561; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000562; SI: buffer_load_dword [[ARG:v[0-9]+]]
563; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
564; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
565; SI-DAG: buffer_store_dword [[AND]]
566; SI-DAG: buffer_store_dword [[BFE]]
567; SI: s_endpgm
Matt Arsenault7b68fdf2014-10-15 17:58:34 +0000568define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
569 i32 addrspace(1)* %out1,
570 i32 addrspace(1)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000571 %src = load i32, i32 addrspace(1)* %in, align 4
Matt Arsenault7b68fdf2014-10-15 17:58:34 +0000572 %and = and i32 %src, 63
573 %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
574 store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
575 store i32 %and, i32 addrspace(1)* %out1, align 4
576 ret void
577}
Marek Olsak9b728682015-03-24 13:40:27 +0000578
579; FUNC-LABEL: {{^}}lshr_and:
580; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
581; SI: buffer_store_dword
582define void @lshr_and(i32 addrspace(1)* %out, i32 %a) nounwind {
583 %b = lshr i32 %a, 6
584 %c = and i32 %b, 7
585 store i32 %c, i32 addrspace(1)* %out, align 8
586 ret void
587}
588
Marek Olsak949f5da2015-03-24 13:40:34 +0000589; FUNC-LABEL: {{^}}v_lshr_and:
590; SI: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
591; SI: buffer_store_dword
592define void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
593 %c = lshr i32 %a, %b
594 %d = and i32 %c, 7
595 store i32 %d, i32 addrspace(1)* %out, align 8
596 ret void
597}
598
Marek Olsak9b728682015-03-24 13:40:27 +0000599; FUNC-LABEL: {{^}}and_lshr:
600; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
601; SI: buffer_store_dword
602define void @and_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
603 %b = and i32 %a, 448
604 %c = lshr i32 %b, 6
605 store i32 %c, i32 addrspace(1)* %out, align 8
606 ret void
607}
608
609; FUNC-LABEL: {{^}}and_lshr2:
610; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
611; SI: buffer_store_dword
612define void @and_lshr2(i32 addrspace(1)* %out, i32 %a) nounwind {
613 %b = and i32 %a, 511
614 %c = lshr i32 %b, 6
615 store i32 %c, i32 addrspace(1)* %out, align 8
616 ret void
617}
618
619; FUNC-LABEL: {{^}}shl_lshr:
620; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
621; SI: buffer_store_dword
622define void @shl_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
623 %b = shl i32 %a, 9
624 %c = lshr i32 %b, 11
625 store i32 %c, i32 addrspace(1)* %out, align 8
626 ret void
627}