blob: b85714ea54c1a5b80b2ec81cb3db74997c894b4c [file] [log] [blame]
Matt Arsenault8d1052f2016-04-21 18:03:06 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3; Make sure 64-bit BFE pattern does a 32-bit BFE on the relevant half.
4
5; Extract the high bit of the low half
6; GCN-LABEL: {{^}}v_uextract_bit_31_i64:
7; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
8; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
9; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
10; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
11define void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
12 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
13 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
14 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
15 %ld.64 = load i64, i64 addrspace(1)* %in.gep
16 %srl = lshr i64 %ld.64, 31
17 %bit = and i64 %srl, 1
18 store i64 %bit, i64 addrspace(1)* %out.gep
19 ret void
20}
21
22; Extract the high bit of the high half
23; GCN-LABEL: {{^}}v_uextract_bit_63_i64:
24; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
25; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
26; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
27; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
28define void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
29 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
30 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
31 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
32 %ld.64 = load i64, i64 addrspace(1)* %in.gep
33 %srl = lshr i64 %ld.64, 63
34 %bit = and i64 %srl, 1
35 store i64 %bit, i64 addrspace(1)* %out.gep
36 ret void
37}
38
39; GCN-LABEL: {{^}}v_uextract_bit_1_i64:
40; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
41; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1
42; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
43; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
44define void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
45 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
46 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
47 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
48 %ld.64 = load i64, i64 addrspace(1)* %in.gep
49 %srl = lshr i64 %ld.64, 1
50 %bit = and i64 %srl, 1
51 store i64 %bit, i64 addrspace(1)* %out.gep
52 ret void
53}
54
55; GCN-LABEL: {{^}}v_uextract_bit_20_i64:
56; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
57; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 1
58; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
59; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
60define void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
61 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
62 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
63 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
64 %ld.64 = load i64, i64 addrspace(1)* %in.gep
65 %srl = lshr i64 %ld.64, 20
66 %bit = and i64 %srl, 1
67 store i64 %bit, i64 addrspace(1)* %out.gep
68 ret void
69}
70
71; GCN-LABEL: {{^}}v_uextract_bit_32_i64:
72; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
73; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]]
74; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
75; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
76define void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
77 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
78 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
79 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
80 %ld.64 = load i64, i64 addrspace(1)* %in.gep
81 %srl = lshr i64 %ld.64, 32
82 %bit = and i64 %srl, 1
83 store i64 %bit, i64 addrspace(1)* %out.gep
84 ret void
85}
86
87; GCN-LABEL: {{^}}v_uextract_bit_33_i64:
88; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
89; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}}
90; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
91; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
92define void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
93 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
94 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
95 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
96 %ld.64 = load i64, i64 addrspace(1)* %in.gep
97 %srl = lshr i64 %ld.64, 33
98 %bit = and i64 %srl, 1
99 store i64 %bit, i64 addrspace(1)* %out.gep
100 ret void
101}
102
103; GCN-LABEL: {{^}}v_uextract_bit_20_21_i64:
104; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
105; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 20, 2
106; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
107; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
108define void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
109 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
110 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
111 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
112 %ld.64 = load i64, i64 addrspace(1)* %in.gep
113 %srl = lshr i64 %ld.64, 20
114 %bit = and i64 %srl, 3
115 store i64 %bit, i64 addrspace(1)* %out.gep
116 ret void
117}
118
119; GCN-LABEL: {{^}}v_uextract_bit_1_30_i64:
120; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
121; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
122; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
123; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
124define void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
125 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
126 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
127 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
128 %ld.64 = load i64, i64 addrspace(1)* %in.gep
129 %srl = lshr i64 %ld.64, 1
130 %bit = and i64 %srl, 1073741823
131 store i64 %bit, i64 addrspace(1)* %out.gep
132 ret void
133}
134
135; GCN-LABEL: {{^}}v_uextract_bit_1_31_i64:
136; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
137; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 1, [[VAL]]
138; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
139; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
140define void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
141 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
142 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
143 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
144 %ld.64 = load i64, i64 addrspace(1)* %in.gep
145 %srl = lshr i64 %ld.64, 1
146 %bit = and i64 %srl, 2147483647
147 store i64 %bit, i64 addrspace(1)* %out.gep
148 ret void
149}
150
151; Spans the dword boundary, so requires full shift
152; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64:
153; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
154; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
155; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]{{$}}
156; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
157; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
158define void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
159 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
160 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
161 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
162 %ld.64 = load i64, i64 addrspace(1)* %in.gep
163 %srl = lshr i64 %ld.64, 31
164 %bit = and i64 %srl, 3
165 store i64 %bit, i64 addrspace(1)* %out.gep
166 ret void
167}
168
169; GCN-LABEL: {{^}}v_uextract_bit_32_33_i64:
170; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
171; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2
172; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
173; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
174define void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
175 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
176 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
177 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
178 %ld.64 = load i64, i64 addrspace(1)* %in.gep
179 %srl = lshr i64 %ld.64, 33
180 %bit = and i64 %srl, 3
181 store i64 %bit, i64 addrspace(1)* %out.gep
182 ret void
183}
184
185; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64:
186; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
187; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 30
188; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}}
189; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
190; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
191define void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
192 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
193 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
194 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
195 %ld.64 = load i64, i64 addrspace(1)* %in.gep
196 %srl = lshr i64 %ld.64, 30
197 %bit = and i64 %srl, 1073741823
198 store i64 %bit, i64 addrspace(1)* %out.gep
199 ret void
200}
201
202; GCN-LABEL: {{^}}v_uextract_bit_33_63_i64:
203; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
204; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30
205; GCN-DAG: v_mov_b32_e32 v[[BFE:[0-9]+]], 0{{$}}
206; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}}
207define void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
208 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
209 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
210 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
211 %ld.64 = load i64, i64 addrspace(1)* %in.gep
212 %srl = lshr i64 %ld.64, 33
213 %bit = and i64 %srl, 1073741823
214 store i64 %bit, i64 addrspace(1)* %out.gep
215 ret void
216}
217
218; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64:
219; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
220; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
221; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
222; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
223define void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
224 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
225 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
226 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
227 %ld.64 = load i64, i64 addrspace(1)* %in.gep
228 %srl = lshr i64 %ld.64, 31
229 %and = and i64 %srl, 4294967295
230 store i64 %and, i64 addrspace(1)* %out
231 ret void
232}
233
234; trunc applied before and mask
235; GCN-LABEL: {{^}}v_uextract_bit_31_i64_trunc_i32:
236; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
237; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]]
238; GCN: buffer_store_dword v[[SHIFT]]
239define void @v_uextract_bit_31_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
240 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
241 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
242 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
243 %ld.64 = load i64, i64 addrspace(1)* %in.gep
244 %srl = lshr i64 %ld.64, 31
245 %trunc = trunc i64 %srl to i32
246 %bit = and i32 %trunc, 1
247 store i32 %bit, i32 addrspace(1)* %out.gep
248 ret void
249}
250
251; GCN-LABEL: {{^}}v_uextract_bit_3_i64_trunc_i32:
252; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
253; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 3, 1{{$}}
254; GCN: buffer_store_dword [[BFE]]
255define void @v_uextract_bit_3_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
256 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
257 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
258 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
259 %ld.64 = load i64, i64 addrspace(1)* %in.gep
260 %srl = lshr i64 %ld.64, 3
261 %trunc = trunc i64 %srl to i32
262 %bit = and i32 %trunc, 1
263 store i32 %bit, i32 addrspace(1)* %out.gep
264 ret void
265}
266
267; GCN-LABEL: {{^}}v_uextract_bit_33_i64_trunc_i32:
268; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
269; GCN: v_bfe_u32 [[BFE:v[0-9]+]], [[VAL]], 1, 1{{$}}
270; GCN: buffer_store_dword [[BFE]]
271define void @v_uextract_bit_33_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
272 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
273 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
274 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
275 %ld.64 = load i64, i64 addrspace(1)* %in.gep
276 %srl = lshr i64 %ld.64, 33
277 %trunc = trunc i64 %srl to i32
278 %bit = and i32 %trunc, 1
279 store i32 %bit, i32 addrspace(1)* %out.gep
280 ret void
281}
282
283; GCN-LABEL: {{^}}v_uextract_bit_31_32_i64_trunc_i32:
284; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
285; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31
286; GCN-NEXT: v_and_b32_e32 v[[SHRLO]], 3, v[[SHRLO]]
287; GCN-NOT: v[[SHRLO]]
288; GCN: buffer_store_dword v[[SHRLO]]
289define void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
290 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
291 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
292 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x
293 %ld.64 = load i64, i64 addrspace(1)* %in.gep
294 %srl = lshr i64 %ld.64, 31
295 %trunc = trunc i64 %srl to i32
296 %bit = and i32 %trunc, 3
297 store i32 %bit, i32 addrspace(1)* %out.gep
298 ret void
299}
300
301; GCN-LABEL: {{^}}and_not_mask_i64:
Matt Arsenaultefa3fe12016-04-22 22:48:38 +0000302; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}}
Tom Stellardcb6ba622016-04-30 00:23:06 +0000303; GCN: v_mov_b32_e32 v[[SHRHI]], 0{{$}}
Matt Arsenaultefa3fe12016-04-22 22:48:38 +0000304; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 20, v[[VALLO]]
305; GCN-DAG: v_and_b32_e32 v[[SHRLO]], 4, [[SHR]]
Matt Arsenault8d1052f2016-04-21 18:03:06 +0000306; GCN-NOT: v[[SHRLO]]
307; GCN-NOT: v[[SHRHI]]
308; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
309define void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
310 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
311 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
312 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
313 %ld.64 = load i64, i64 addrspace(1)* %in.gep
314 %srl = lshr i64 %ld.64, 20
315 %bit = and i64 %srl, 4
316 store i64 %bit, i64 addrspace(1)* %out.gep
317 ret void
318}
319
320; The instruction count is the same with/without hasOneUse, but
321; keeping the 32-bit and has a smaller encoding size than the bfe.
322
323; GCN-LABEL: {{^}}v_uextract_bit_27_29_multi_use_shift_i64:
324; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
325; GCN-DAG: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 27
326; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]]
327; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
328; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}}
329; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}}
330define void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
331 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
332 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
333 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
334 %ld.64 = load i64, i64 addrspace(1)* %in.gep
335 %srl = lshr i64 %ld.64, 27
336 %bit = and i64 %srl, 3
337 store volatile i64 %srl, i64 addrspace(1)* %out
338 store volatile i64 %bit, i64 addrspace(1)* %out
339 ret void
340}
341
342; GCN-LABEL: {{^}}v_uextract_bit_34_37_multi_use_shift_i64:
343; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000344; GCN: v_mov_b32_e32 v[[ZERO_SHR:[0-9]+]], 0{{$}}
345; GCN: v_mov_b32_e32 v[[ZERO_BFE:[0-9]+]], v[[ZERO_SHR]]
Matt Arsenault8d1052f2016-04-21 18:03:06 +0000346; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]]
347; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000348; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[SHR]]:[[ZERO_SHR]]{{\]}}
349; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO_BFE]]{{\]}}
Matt Arsenault8d1052f2016-04-21 18:03:06 +0000350define void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
351 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
352 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
353 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x
354 %ld.64 = load i64, i64 addrspace(1)* %in.gep
355 %srl = lshr i64 %ld.64, 34
356 %bit = and i64 %srl, 7
357 store volatile i64 %srl, i64 addrspace(1)* %out
358 store volatile i64 %bit, i64 addrspace(1)* %out
359 ret void
360}
361
362; GCN-LABEL: {{^}}v_uextract_bit_33_36_use_upper_half_shift_i64:
363; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
364; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3
365; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
366; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
367; GCN: buffer_store_dword v[[ZERO]]
368define void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 {
369 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x()
370 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x
371 %out0.gep = getelementptr i64, i64 addrspace(1)* %out0, i32 %id.x
372 %out1.gep = getelementptr i32, i32 addrspace(1)* %out1, i32 %id.x
373 %ld.64 = load i64, i64 addrspace(1)* %in.gep
374 %srl = lshr i64 %ld.64, 33
375 %bit = and i64 %srl, 7
376 store volatile i64 %bit, i64 addrspace(1)* %out0.gep
377
378 %srl.srl32 = lshr i64 %srl, 32
379 %srl.hi = trunc i64 %srl.srl32 to i32
380 store volatile i32 %srl.hi, i32 addrspace(1)* %out1.gep
381 ret void
382}
383
384declare i32 @llvm.amdgcn.workitem.id.x() #0
385
386attributes #0 = { nounwind readnone }
387attributes #1 = { nounwind }