blob: 04ad3bcccd3f33ebee62871b006af8ed9b7a8479 [file] [log] [blame]
Matt Arsenault59b8b772016-03-01 04:58:17 +00001; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00002; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
Matt Arsenault59b8b772016-03-01 04:58:17 +00003; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00004; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
Matt Arsenault59b8b772016-03-01 04:58:17 +00005
6; This particular case will actually be worse in terms of code size
7; from sinking into both.
8
9; OPT-LABEL: @sink_ubfe_i32(
10; OPT: entry:
11; OPT-NEXT: br i1
12
13; OPT: bb0:
14; OPT: %0 = lshr i32 %arg1, 8
15; OPT-NEXT: %val0 = and i32 %0, 255
16; OPT: br label
17
18; OPT: bb1:
19; OPT: %1 = lshr i32 %arg1, 8
20; OPT-NEXT: %val1 = and i32 %1, 127
21; OPT: br label
22
23; OPT: ret:
24; OPT: store
25; OPT: ret
26
27
28; GCN-LABEL: {{^}}sink_ubfe_i32:
29; GCN-NOT: lshr
Matt Arsenault327188a2016-12-15 21:57:11 +000030; GCN: s_cbranch_scc1
Matt Arsenault59b8b772016-03-01 04:58:17 +000031
32; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
33; GCN: BB0_2:
34; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
35
36; GCN: BB0_3:
37; GCN: buffer_store_dword
38; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000039define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
Matt Arsenault59b8b772016-03-01 04:58:17 +000040entry:
41 %shr = lshr i32 %arg1, 8
42 br i1 undef, label %bb0, label %bb1
43
44bb0:
45 %val0 = and i32 %shr, 255
46 store volatile i32 0, i32 addrspace(1)* undef
47 br label %ret
48
49bb1:
50 %val1 = and i32 %shr, 127
51 store volatile i32 0, i32 addrspace(1)* undef
52 br label %ret
53
54ret:
55 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
56 store i32 %phi, i32 addrspace(1)* %out
57 ret void
58}
59
60; OPT-LABEL: @sink_sbfe_i32(
61; OPT: entry:
62; OPT-NEXT: br i1
63
64; OPT: bb0:
65; OPT: %0 = ashr i32 %arg1, 8
66; OPT-NEXT: %val0 = and i32 %0, 255
67; OPT: br label
68
69; OPT: bb1:
70; OPT: %1 = ashr i32 %arg1, 8
71; OPT-NEXT: %val1 = and i32 %1, 127
72; OPT: br label
73
74; OPT: ret:
75; OPT: store
76; OPT: ret
77
78; GCN-LABEL: {{^}}sink_sbfe_i32:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000079define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
Matt Arsenault59b8b772016-03-01 04:58:17 +000080entry:
81 %shr = ashr i32 %arg1, 8
82 br i1 undef, label %bb0, label %bb1
83
84bb0:
85 %val0 = and i32 %shr, 255
86 store volatile i32 0, i32 addrspace(1)* undef
87 br label %ret
88
89bb1:
90 %val1 = and i32 %shr, 127
91 store volatile i32 0, i32 addrspace(1)* undef
92 br label %ret
93
94ret:
95 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
96 store i32 %phi, i32 addrspace(1)* %out
97 ret void
98}
99
100
101; OPT-LABEL: @sink_ubfe_i16(
102; OPT: entry:
103; OPT-NEXT: br i1
104
105; OPT: bb0:
106; OPT: %0 = lshr i16 %arg1, 4
107; OPT-NEXT: %val0 = and i16 %0, 255
108; OPT: br label
109
110; OPT: bb1:
111; OPT: %1 = lshr i16 %arg1, 4
112; OPT-NEXT: %val1 = and i16 %1, 127
113; OPT: br label
114
115; OPT: ret:
116; OPT: store
117; OPT: ret
118
Tom Stellard115a6152016-11-10 16:02:37 +0000119; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.
Matt Arsenault59b8b772016-03-01 04:58:17 +0000120
121; GCN-LABEL: {{^}}sink_ubfe_i16:
122; GCN-NOT: lshr
Stanislav Mekhanoshin79da2a72017-03-11 00:29:27 +0000123; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
124; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
Matt Arsenault327188a2016-12-15 21:57:11 +0000125; GCN: s_cbranch_scc1
Matt Arsenault59b8b772016-03-01 04:58:17 +0000126
Tom Stellard115a6152016-11-10 16:02:37 +0000127; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
Stanislav Mekhanoshin79da2a72017-03-11 00:29:27 +0000128; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff
Tom Stellard115a6152016-11-10 16:02:37 +0000129
Matt Arsenault59b8b772016-03-01 04:58:17 +0000130; GCN: BB2_2:
Tom Stellard115a6152016-11-10 16:02:37 +0000131; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
Stanislav Mekhanoshin79da2a72017-03-11 00:29:27 +0000132; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f
Matt Arsenault59b8b772016-03-01 04:58:17 +0000133
134; GCN: BB2_3:
135; GCN: buffer_store_short
136; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000137define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
Matt Arsenault59b8b772016-03-01 04:58:17 +0000138entry:
139 %shr = lshr i16 %arg1, 4
140 br i1 undef, label %bb0, label %bb1
141
142bb0:
143 %val0 = and i16 %shr, 255
144 store volatile i16 0, i16 addrspace(1)* undef
145 br label %ret
146
147bb1:
148 %val1 = and i16 %shr, 127
149 store volatile i16 0, i16 addrspace(1)* undef
150 br label %ret
151
152ret:
153 %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]
154 store i16 %phi, i16 addrspace(1)* %out
155 ret void
156}
157
158; We don't really want to sink this one since it isn't reducible to a
159; 32-bit BFE on one half of the integer.
160
161; OPT-LABEL: @sink_ubfe_i64_span_midpoint(
162; OPT: entry:
163; OPT-NOT: lshr
164; OPT: br i1
165
166; OPT: bb0:
167; OPT: %0 = lshr i64 %arg1, 30
168; OPT-NEXT: %val0 = and i64 %0, 255
169
170; OPT: bb1:
171; OPT: %1 = lshr i64 %arg1, 30
172; OPT-NEXT: %val1 = and i64 %1, 127
173
174; OPT: ret:
175; OPT: store
176; OPT: ret
177
178; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:
Matt Arsenault59b8b772016-03-01 04:58:17 +0000179
Stanislav Mekhanoshind4454552017-06-28 02:52:39 +0000180; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
181; GCN: s_cbranch_scc1 BB3_2
182; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]
Matt Arsenault59b8b772016-03-01 04:58:17 +0000183
184; GCN: BB3_2:
Stanislav Mekhanoshind4454552017-06-28 02:52:39 +0000185; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]
Matt Arsenault59b8b772016-03-01 04:58:17 +0000186
187; GCN: BB3_3:
188; GCN: buffer_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000189define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
Matt Arsenault59b8b772016-03-01 04:58:17 +0000190entry:
191 %shr = lshr i64 %arg1, 30
192 br i1 undef, label %bb0, label %bb1
193
194bb0:
195 %val0 = and i64 %shr, 255
196 store volatile i32 0, i32 addrspace(1)* undef
197 br label %ret
198
199bb1:
200 %val1 = and i64 %shr, 127
201 store volatile i32 0, i32 addrspace(1)* undef
202 br label %ret
203
204ret:
205 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
206 store i64 %phi, i64 addrspace(1)* %out
207 ret void
208}
209
210; OPT-LABEL: @sink_ubfe_i64_low32(
211; OPT: entry:
212; OPT-NOT: lshr
213; OPT: br i1
214
215; OPT: bb0:
216; OPT: %0 = lshr i64 %arg1, 15
217; OPT-NEXT: %val0 = and i64 %0, 255
218
219; OPT: bb1:
220; OPT: %1 = lshr i64 %arg1, 15
221; OPT-NEXT: %val1 = and i64 %1, 127
222
223; OPT: ret:
224; OPT: store
225; OPT: ret
226
227; GCN-LABEL: {{^}}sink_ubfe_i64_low32:
228
Matt Arsenault327188a2016-12-15 21:57:11 +0000229; GCN: s_cbranch_scc1 BB4_2
Matt Arsenault59b8b772016-03-01 04:58:17 +0000230
Matt Arsenault8d1052f2016-04-21 18:03:06 +0000231; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f
Matt Arsenault59b8b772016-03-01 04:58:17 +0000232
233; GCN: BB4_2:
Matt Arsenault8d1052f2016-04-21 18:03:06 +0000234; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f
Matt Arsenault59b8b772016-03-01 04:58:17 +0000235
236; GCN: BB4_3:
237; GCN: buffer_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000238define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
Matt Arsenault59b8b772016-03-01 04:58:17 +0000239entry:
240 %shr = lshr i64 %arg1, 15
241 br i1 undef, label %bb0, label %bb1
242
243bb0:
244 %val0 = and i64 %shr, 255
245 store volatile i32 0, i32 addrspace(1)* undef
246 br label %ret
247
248bb1:
249 %val1 = and i64 %shr, 127
250 store volatile i32 0, i32 addrspace(1)* undef
251 br label %ret
252
253ret:
254 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
255 store i64 %phi, i64 addrspace(1)* %out
256 ret void
257}
258
259; OPT-LABEL: @sink_ubfe_i64_high32(
260; OPT: entry:
261; OPT-NOT: lshr
262; OPT: br i1
263
264; OPT: bb0:
265; OPT: %0 = lshr i64 %arg1, 35
266; OPT-NEXT: %val0 = and i64 %0, 255
267
268; OPT: bb1:
269; OPT: %1 = lshr i64 %arg1, 35
270; OPT-NEXT: %val1 = and i64 %1, 127
271
272; OPT: ret:
273; OPT: store
274; OPT: ret
275
276; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
Matt Arsenault327188a2016-12-15 21:57:11 +0000277; GCN: s_cbranch_scc1 BB5_2
Matt Arsenault59b8b772016-03-01 04:58:17 +0000278; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003
279
280; GCN: BB5_2:
281; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003
282
283; GCN: BB5_3:
284; GCN: buffer_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000285define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
Matt Arsenault59b8b772016-03-01 04:58:17 +0000286entry:
287 %shr = lshr i64 %arg1, 35
288 br i1 undef, label %bb0, label %bb1
289
290bb0:
291 %val0 = and i64 %shr, 255
292 store volatile i32 0, i32 addrspace(1)* undef
293 br label %ret
294
295bb1:
296 %val1 = and i64 %shr, 127
297 store volatile i32 0, i32 addrspace(1)* undef
298 br label %ret
299
300ret:
301 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
302 store i64 %phi, i64 addrspace(1)* %out
303 ret void
304}
305
306attributes #0 = { nounwind }