blob: 71af6a9a4f510eb1627715ca5327113d933a2dac [file] [log] [blame]
Matt Arsenault72b0e382018-07-28 12:34:25 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
3; RUN: llc -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM %s
Jan Vesely6ff58ed2018-07-27 15:00:13 +00004
5; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
6; but with all 64-bit tests, and tests with loads dropped.
7
8; Patterns:
9; a) x & (1 << nbits) - 1
10; b) x & ~(-1 << nbits)
11; c) x & (-1 >> (32 - y))
12; d) x << (32 - y) >> (32 - y)
13; are equivalent.
14
15; ---------------------------------------------------------------------------- ;
16; Pattern a. 32-bit
17; ---------------------------------------------------------------------------- ;
18
Jan Vesely6ff58ed2018-07-27 15:00:13 +000019define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +000020; EG-LABEL: bzhi32_a0:
21; EG: ; %bb.0:
22; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
23; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
24; EG-NEXT: CF_END
25; EG-NEXT: PAD
26; EG-NEXT: ALU clause starting at 4:
27; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
28; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
29; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
30;
31; CM-LABEL: bzhi32_a0:
32; CM: ; %bb.0:
33; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
34; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
35; CM-NEXT: CF_END
36; CM-NEXT: PAD
37; CM-NEXT: ALU clause starting at 4:
38; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
39; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
40; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
Jan Vesely6ff58ed2018-07-27 15:00:13 +000041 %onebit = shl i32 1, %numlowbits
42 %mask = add nsw i32 %onebit, -1
43 %masked = and i32 %mask, %val
44 store i32 %masked, i32 addrspace(1)* %out
45 ret void
46}
47
Jan Vesely6ff58ed2018-07-27 15:00:13 +000048define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +000049; EG-LABEL: bzhi32_a1_indexzext:
50; EG: ; %bb.0:
51; EG-NEXT: ALU 0, @8, KC0[], KC1[]
52; EG-NEXT: TEX 0 @6
53; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
54; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
55; EG-NEXT: CF_END
56; EG-NEXT: PAD
57; EG-NEXT: Fetch clause starting at 6:
58; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
59; EG-NEXT: ALU clause starting at 8:
60; EG-NEXT: MOV * T0.X, 0.0,
61; EG-NEXT: ALU clause starting at 9:
62; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
63; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
64; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W,
65; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
66; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
67;
68; CM-LABEL: bzhi32_a1_indexzext:
69; CM: ; %bb.0:
70; CM-NEXT: ALU 0, @8, KC0[], KC1[]
71; CM-NEXT: TEX 0 @6
72; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
73; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
74; CM-NEXT: CF_END
75; CM-NEXT: PAD
76; CM-NEXT: Fetch clause starting at 6:
77; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
78; CM-NEXT: ALU clause starting at 8:
79; CM-NEXT: MOV * T0.X, 0.0,
80; CM-NEXT: ALU clause starting at 9:
81; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
82; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
83; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W,
84; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
85; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
Jan Vesely6ff58ed2018-07-27 15:00:13 +000086 %conv = zext i8 %numlowbits to i32
87 %onebit = shl i32 1, %conv
88 %mask = add nsw i32 %onebit, -1
89 %masked = and i32 %mask, %val
90 store i32 %masked, i32 addrspace(1)* %out
91 ret void
92}
93
Jan Vesely6ff58ed2018-07-27 15:00:13 +000094define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +000095; EG-LABEL: bzhi32_a4_commutative:
96; EG: ; %bb.0:
97; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
98; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
99; EG-NEXT: CF_END
100; EG-NEXT: PAD
101; EG-NEXT: ALU clause starting at 4:
102; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
103; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
104; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
105;
106; CM-LABEL: bzhi32_a4_commutative:
107; CM: ; %bb.0:
108; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
109; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
110; CM-NEXT: CF_END
111; CM-NEXT: PAD
112; CM-NEXT: ALU clause starting at 4:
113; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
114; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
115; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000116 %onebit = shl i32 1, %numlowbits
117 %mask = add nsw i32 %onebit, -1
118 %masked = and i32 %val, %mask ; swapped order
119 store i32 %masked, i32 addrspace(1)* %out
120 ret void
121}
122
123; ---------------------------------------------------------------------------- ;
124; Pattern b. 32-bit
125; ---------------------------------------------------------------------------- ;
126
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000127define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +0000128; EG-LABEL: bzhi32_b0:
129; EG: ; %bb.0:
130; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
131; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
132; EG-NEXT: CF_END
133; EG-NEXT: PAD
134; EG-NEXT: ALU clause starting at 4:
135; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
136; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
137; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
138;
139; CM-LABEL: bzhi32_b0:
140; CM: ; %bb.0:
141; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
142; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
143; CM-NEXT: CF_END
144; CM-NEXT: PAD
145; CM-NEXT: ALU clause starting at 4:
146; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
147; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
148; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000149 %notmask = shl i32 -1, %numlowbits
150 %mask = xor i32 %notmask, -1
151 %masked = and i32 %mask, %val
152 store i32 %masked, i32 addrspace(1)* %out
153 ret void
154}
155
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000156define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +0000157; EG-LABEL: bzhi32_b1_indexzext:
158; EG: ; %bb.0:
159; EG-NEXT: ALU 0, @8, KC0[], KC1[]
160; EG-NEXT: TEX 0 @6
161; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
162; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
163; EG-NEXT: CF_END
164; EG-NEXT: PAD
165; EG-NEXT: Fetch clause starting at 6:
166; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
167; EG-NEXT: ALU clause starting at 8:
168; EG-NEXT: MOV * T0.X, 0.0,
169; EG-NEXT: ALU clause starting at 9:
170; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
171; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
172; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W,
173; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
174; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
175;
176; CM-LABEL: bzhi32_b1_indexzext:
177; CM: ; %bb.0:
178; CM-NEXT: ALU 0, @8, KC0[], KC1[]
179; CM-NEXT: TEX 0 @6
180; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
181; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
182; CM-NEXT: CF_END
183; CM-NEXT: PAD
184; CM-NEXT: Fetch clause starting at 6:
185; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
186; CM-NEXT: ALU clause starting at 8:
187; CM-NEXT: MOV * T0.X, 0.0,
188; CM-NEXT: ALU clause starting at 9:
189; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
190; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
191; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W,
192; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
193; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000194 %conv = zext i8 %numlowbits to i32
195 %notmask = shl i32 -1, %conv
196 %mask = xor i32 %notmask, -1
197 %masked = and i32 %mask, %val
198 store i32 %masked, i32 addrspace(1)* %out
199 ret void
200}
201
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000202define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +0000203; EG-LABEL: bzhi32_b4_commutative:
204; EG: ; %bb.0:
205; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
206; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
207; EG-NEXT: CF_END
208; EG-NEXT: PAD
209; EG-NEXT: ALU clause starting at 4:
210; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
211; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
212; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
213;
214; CM-LABEL: bzhi32_b4_commutative:
215; CM: ; %bb.0:
216; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
217; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
218; CM-NEXT: CF_END
219; CM-NEXT: PAD
220; CM-NEXT: ALU clause starting at 4:
221; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
222; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
223; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000224 %notmask = shl i32 -1, %numlowbits
225 %mask = xor i32 %notmask, -1
226 %masked = and i32 %val, %mask ; swapped order
227 store i32 %masked, i32 addrspace(1)* %out
228 ret void
229}
230
231; ---------------------------------------------------------------------------- ;
232; Pattern c. 32-bit
233; ---------------------------------------------------------------------------- ;
234
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000235define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +0000236; EG-LABEL: bzhi32_c0:
237; EG: ; %bb.0:
238; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
239; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
240; EG-NEXT: CF_END
241; EG-NEXT: PAD
242; EG-NEXT: ALU clause starting at 4:
243; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
244; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
245; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
246;
247; CM-LABEL: bzhi32_c0:
248; CM: ; %bb.0:
249; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
250; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
251; CM-NEXT: CF_END
252; CM-NEXT: PAD
253; CM-NEXT: ALU clause starting at 4:
254; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
255; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
256; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000257 %numhighbits = sub i32 32, %numlowbits
258 %mask = lshr i32 -1, %numhighbits
259 %masked = and i32 %mask, %val
260 store i32 %masked, i32 addrspace(1)* %out
261 ret void
262}
263
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000264define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +0000265; EG-LABEL: bzhi32_c1_indexzext:
266; EG: ; %bb.0:
267; EG-NEXT: ALU 0, @8, KC0[], KC1[]
268; EG-NEXT: TEX 0 @6
269; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
270; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
271; EG-NEXT: CF_END
272; EG-NEXT: PAD
273; EG-NEXT: Fetch clause starting at 6:
274; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
275; EG-NEXT: ALU clause starting at 8:
276; EG-NEXT: MOV * T0.X, 0.0,
277; EG-NEXT: ALU clause starting at 9:
278; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X,
279; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
280; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
281; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
282; EG-NEXT: LSHR * T0.W, literal.x, PV.W,
283; EG-NEXT: -1(nan), 0(0.000000e+00)
284; EG-NEXT: AND_INT T0.X, PV.W, KC0[2].Y,
285; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
286; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
287;
288; CM-LABEL: bzhi32_c1_indexzext:
289; CM: ; %bb.0:
290; CM-NEXT: ALU 0, @8, KC0[], KC1[]
291; CM-NEXT: TEX 0 @6
292; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
293; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
294; CM-NEXT: CF_END
295; CM-NEXT: PAD
296; CM-NEXT: Fetch clause starting at 6:
297; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
298; CM-NEXT: ALU clause starting at 8:
299; CM-NEXT: MOV * T0.X, 0.0,
300; CM-NEXT: ALU clause starting at 9:
301; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X,
302; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
303; CM-NEXT: AND_INT * T0.W, PV.W, literal.x,
304; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
305; CM-NEXT: LSHR * T0.W, literal.x, PV.W,
306; CM-NEXT: -1(nan), 0(0.000000e+00)
307; CM-NEXT: AND_INT * T0.X, PV.W, KC0[2].Y,
308; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
309; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000310 %numhighbits = sub i8 32, %numlowbits
311 %sh_prom = zext i8 %numhighbits to i32
312 %mask = lshr i32 -1, %sh_prom
313 %masked = and i32 %mask, %val
314 store i32 %masked, i32 addrspace(1)* %out
315 ret void
316}
317
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000318define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +0000319; EG-LABEL: bzhi32_c4_commutative:
320; EG: ; %bb.0:
321; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
322; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
323; EG-NEXT: CF_END
324; EG-NEXT: PAD
325; EG-NEXT: ALU clause starting at 4:
326; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
327; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
328; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
329;
330; CM-LABEL: bzhi32_c4_commutative:
331; CM: ; %bb.0:
332; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
333; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
334; CM-NEXT: CF_END
335; CM-NEXT: PAD
336; CM-NEXT: ALU clause starting at 4:
337; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
338; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
339; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000340 %numhighbits = sub i32 32, %numlowbits
341 %mask = lshr i32 -1, %numhighbits
342 %masked = and i32 %val, %mask ; swapped order
343 store i32 %masked, i32 addrspace(1)* %out
344 ret void
345}
346
347; ---------------------------------------------------------------------------- ;
348; Pattern d. 32-bit.
349; ---------------------------------------------------------------------------- ;
350
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000351define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +0000352; EG-LABEL: bzhi32_d0:
353; EG: ; %bb.0:
354; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
355; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
356; EG-NEXT: CF_END
357; EG-NEXT: PAD
358; EG-NEXT: ALU clause starting at 4:
359; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
360; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
361; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
362;
363; CM-LABEL: bzhi32_d0:
364; CM: ; %bb.0:
365; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
366; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
367; CM-NEXT: CF_END
368; CM-NEXT: PAD
369; CM-NEXT: ALU clause starting at 4:
370; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
371; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
372; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000373 %numhighbits = sub i32 32, %numlowbits
374 %highbitscleared = shl i32 %val, %numhighbits
375 %masked = lshr i32 %highbitscleared, %numhighbits
376 store i32 %masked, i32 addrspace(1)* %out
377 ret void
378}
379
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000380define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
Matt Arsenault72b0e382018-07-28 12:34:25 +0000381; EG-LABEL: bzhi32_d1_indexzext:
382; EG: ; %bb.0:
383; EG-NEXT: ALU 0, @8, KC0[], KC1[]
384; EG-NEXT: TEX 0 @6
385; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
386; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
387; EG-NEXT: CF_END
388; EG-NEXT: PAD
389; EG-NEXT: Fetch clause starting at 6:
390; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
391; EG-NEXT: ALU clause starting at 8:
392; EG-NEXT: MOV * T0.X, 0.0,
393; EG-NEXT: ALU clause starting at 9:
394; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X,
395; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
396; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
397; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
398; EG-NEXT: LSHL * T1.W, KC0[2].Y, PV.W,
399; EG-NEXT: LSHR T0.X, PV.W, T0.W,
400; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
401; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
402;
403; CM-LABEL: bzhi32_d1_indexzext:
404; CM: ; %bb.0:
405; CM-NEXT: ALU 0, @8, KC0[], KC1[]
406; CM-NEXT: TEX 0 @6
407; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
408; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
409; CM-NEXT: CF_END
410; CM-NEXT: PAD
411; CM-NEXT: Fetch clause starting at 6:
412; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
413; CM-NEXT: ALU clause starting at 8:
414; CM-NEXT: MOV * T0.X, 0.0,
415; CM-NEXT: ALU clause starting at 9:
416; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X,
417; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
418; CM-NEXT: AND_INT * T0.W, PV.W, literal.x,
419; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
420; CM-NEXT: LSHL * T1.W, KC0[2].Y, PV.W,
421; CM-NEXT: LSHR * T0.X, PV.W, T0.W,
422; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
423; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
Jan Vesely6ff58ed2018-07-27 15:00:13 +0000424 %numhighbits = sub i8 32, %numlowbits
425 %sh_prom = zext i8 %numhighbits to i32
426 %highbitscleared = shl i32 %val, %sh_prom
427 %masked = lshr i32 %highbitscleared, %sh_prom
428 store i32 %masked, i32 addrspace(1)* %out
429 ret void
430}