blob: bd02008096f0aba4e367a95606ee19a1f548286c [file] [log] [blame]
Jan Vesely6ff58ed2018-07-27 15:00:13 +00001; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=EG %s
2; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=CM %s
3
4; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
5; but with all 64-bit tests, and tests with loads dropped.
6
7; Patterns:
8; a) x & (1 << nbits) - 1
9; b) x & ~(-1 << nbits)
10; c) x & (-1 >> (32 - y))
11; d) x << (32 - y) >> (32 - y)
12; are equivalent.
13
14; ---------------------------------------------------------------------------- ;
15; Pattern a. 32-bit
16; ---------------------------------------------------------------------------- ;
17
18; R600-LABEL: bzhi32_a0:
19; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
20; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
21; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
22define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
23 %onebit = shl i32 1, %numlowbits
24 %mask = add nsw i32 %onebit, -1
25 %masked = and i32 %mask, %val
26 store i32 %masked, i32 addrspace(1)* %out
27 ret void
28}
29
30; R600-LABEL: bzhi32_a1_indexzext:
31; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
32; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
33; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
34define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
35 %conv = zext i8 %numlowbits to i32
36 %onebit = shl i32 1, %conv
37 %mask = add nsw i32 %onebit, -1
38 %masked = and i32 %mask, %val
39 store i32 %masked, i32 addrspace(1)* %out
40 ret void
41}
42
43; R600-LABEL: bzhi32_a4_commutative:
44; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
45; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
46; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
47define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
48 %onebit = shl i32 1, %numlowbits
49 %mask = add nsw i32 %onebit, -1
50 %masked = and i32 %val, %mask ; swapped order
51 store i32 %masked, i32 addrspace(1)* %out
52 ret void
53}
54
55; ---------------------------------------------------------------------------- ;
56; Pattern b. 32-bit
57; ---------------------------------------------------------------------------- ;
58
59; R600-LABEL: bzhi32_b0:
60; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
61; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
62; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
63define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
64 %notmask = shl i32 -1, %numlowbits
65 %mask = xor i32 %notmask, -1
66 %masked = and i32 %mask, %val
67 store i32 %masked, i32 addrspace(1)* %out
68 ret void
69}
70
71; R600-LABEL: bzhi32_b1_indexzext:
72; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
73; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
74; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
75define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
76 %conv = zext i8 %numlowbits to i32
77 %notmask = shl i32 -1, %conv
78 %mask = xor i32 %notmask, -1
79 %masked = and i32 %mask, %val
80 store i32 %masked, i32 addrspace(1)* %out
81 ret void
82}
83
84; R600-LABEL: bzhi32_b4_commutative:
85; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
86; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
87; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
88define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
89 %notmask = shl i32 -1, %numlowbits
90 %mask = xor i32 %notmask, -1
91 %masked = and i32 %val, %mask ; swapped order
92 store i32 %masked, i32 addrspace(1)* %out
93 ret void
94}
95
96; ---------------------------------------------------------------------------- ;
97; Pattern c. 32-bit
98; ---------------------------------------------------------------------------- ;
99
100; R600-LABEL: bzhi32_c0:
101; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
102; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
103; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
104define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
105 %numhighbits = sub i32 32, %numlowbits
106 %mask = lshr i32 -1, %numhighbits
107 %masked = and i32 %mask, %val
108 store i32 %masked, i32 addrspace(1)* %out
109 ret void
110}
111
112; R600-LABEL: bzhi32_c1_indexzext:
113; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
114; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
115; R600: SUB_INT {{\*?}} [[SUBR:T[0-9]+]].[[SUBC:[XYZW]]], literal.x, KC0[2].Z
116; R600-NEXT: 32
117; R600-NEXT: AND_INT {{\*?}} {{T[0-9]+}}.[[AND1C:[XYZW]]], {{T[0-9]+|PV}}.[[SUBC]], literal.x
118; R600-NEXT: 255
119; R600: LSHR {{\*?}} {{T[0-9]}}.[[LSHRC:[XYZW]]], literal.x, {{T[0-9]+|PV}}.[[AND1C]]
120; R600-NEXT: -1
121; R600-NEXT: AND_INT {{[* ]*}}[[RET]], {{T[0-9]+|PV}}.[[LSHRC]], KC0[2].Y
122define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
123 %numhighbits = sub i8 32, %numlowbits
124 %sh_prom = zext i8 %numhighbits to i32
125 %mask = lshr i32 -1, %sh_prom
126 %masked = and i32 %mask, %val
127 store i32 %masked, i32 addrspace(1)* %out
128 ret void
129}
130
131; R600-LABEL: bzhi32_c4_commutative:
132; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
133; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
134; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
135define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
136 %numhighbits = sub i32 32, %numlowbits
137 %mask = lshr i32 -1, %numhighbits
138 %masked = and i32 %val, %mask ; swapped order
139 store i32 %masked, i32 addrspace(1)* %out
140 ret void
141}
142
143; ---------------------------------------------------------------------------- ;
144; Pattern d. 32-bit.
145; ---------------------------------------------------------------------------- ;
146
147; R600-LABEL: bzhi32_d0:
148; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
149; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
150; R600: BFE_UINT {{\*?}} [[RET]], KC0[2].Y, 0.0, KC0[2].Z
151define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
152 %numhighbits = sub i32 32, %numlowbits
153 %highbitscleared = shl i32 %val, %numhighbits
154 %masked = lshr i32 %highbitscleared, %numhighbits
155 store i32 %masked, i32 addrspace(1)* %out
156 ret void
157}
158
159; R600-LABEL: bzhi32_d1_indexzext:
160; EG: MEM_RAT_CACHELESS STORE_RAW [[RET:T[0-1]+\.[XYZW]]]
161; CM: MEM_RAT_CACHELESS STORE_DWORD [[RET:T[0-1]+\.[XYZW]]]
162; R600: SUB_INT {{\*?}} [[SUBR:T[0-9]+]].[[SUBC:[XYZW]]], literal.x, KC0[2].Z
163; R600-NEXT: 32
164; R600-NEXT: AND_INT {{\*?}} [[AND:T[0-9]+\.[XYZW]]], {{T[0-9]+|PV}}.[[SUBC]], literal.x
165; R600-NEXT: 255
166; R600: LSHL {{\*?}} {{T[0-9]}}.[[LSHLC:[XYZW]]], KC0[2].Y, {{T[0-9]+|PV}}.[[AND1C]]
167; R600: LSHR {{[* ]*}}[[RET]], {{T[0-9]+|PV}}.[[LSHLC]], [[AND]]
168define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
169 %numhighbits = sub i8 32, %numlowbits
170 %sh_prom = zext i8 %numhighbits to i32
171 %highbitscleared = shl i32 %val, %sh_prom
172 %masked = lshr i32 %highbitscleared, %sh_prom
173 store i32 %masked, i32 addrspace(1)* %out
174 ret void
175}