blob: e34441653e30cb2cb3d966caba81b8dda7fb9dee [file] [log] [blame]
Matt Arsenault4b7938c2017-11-13 23:24:26 +00001; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
Matt Arsenaulte1cd4822017-11-13 00:22:09 +00003
4; GCN-LABEL: {{^}}load_local_lo_v2i16_undeflo:
5; GCN: s_waitcnt
6; GFX9-NEXT: ds_read_u16_d16 v0, v0
7; GFX9-NEXT: s_waitcnt
8; GFX9-NEXT: s_setpc_b64
9
10; VI: ds_read_u16
11define <2 x i16> @load_local_lo_v2i16_undeflo(i16 addrspace(3)* %in) #0 {
12entry:
13 %load = load i16, i16 addrspace(3)* %in
14 %build = insertelement <2 x i16> undef, i16 %load, i32 0
15 ret <2 x i16> %build
16}
17
18; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo:
19; GCN: s_waitcnt
20; GFX9-NEXT: ds_read_u16_d16 v0, v0
21; GFX9-NEXT: s_waitcnt
22; GFX9-NEXT: s_setpc_b64
23
24; VI: ds_read_u16
25define <2 x i16> @load_local_lo_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 {
26entry:
27 %load = load i16, i16 addrspace(3)* %in
28 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
29 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
30 ret <2 x i16> %build1
31}
32
33; Show that we get reasonable regalloc without physreg constraints.
34; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg:
35; GCN: s_waitcnt
36; GFX9-NEXT: ds_read_u16_d16 v0, v0
37; GFX9-NEXT: s_waitcnt
38; GFX9-NEXT: global_store_dword v[0:1], v0, off{{$}}
39; GFX9-NEXT: s_waitcnt
40; GFX9-NEXT: s_setpc_b64
41
42; VI: ds_read_u16
43define void @load_local_lo_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 {
44entry:
45 %load = load i16, i16 addrspace(3)* %in
46 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
47 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
48 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
49 ret void
50}
51
52; GCN-LABEL: {{^}}load_local_lo_v2i16_zerolo:
53; GCN: s_waitcnt
54; GFX9-NEXT: v_mov_b32_e32 v1, 0
55; GFX9-NEXT: ds_read_u16_d16 v1, v0
56; GFX9-NEXT: s_waitcnt
57; GFX9-NEXT: v_mov_b32_e32 v0, v1
58; GFX9-NEXT: s_setpc_b64
59
60; VI: ds_read_u16 v
61define <2 x i16> @load_local_lo_v2i16_zerolo(i16 addrspace(3)* %in) #0 {
62entry:
63 %load = load i16, i16 addrspace(3)* %in
64 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0
65 ret <2 x i16> %build
66}
67
68; GCN-LABEL: {{^}}load_local_lo_v2f16_fpimm:
69; GCN: s_waitcnt
70; GFX9-NEXT: v_mov_b32_e32 v1, 2.0
71; GFX9-NEXT: ds_read_u16_d16 v1, v0
72; GFX9-NEXT: s_waitcnt
73; GFX9-NEXT: v_mov_b32_e32 v0, v1
74; GFX9-NEXT: s_setpc_b64
75
76; VI: ds_read_u16 v
77define <2 x half> @load_local_lo_v2f16_fpimm(half addrspace(3)* %in) #0 {
78entry:
79 %load = load half, half addrspace(3)* %in
80 %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0
81 ret <2 x half> %build
82}
83
84; GCN-LABEL: {{^}}load_local_lo_v2f16_reghi_vreg:
85; GCN: s_waitcnt
86; GFX9-NEXT: ds_read_u16_d16 v1, v0
87; GFX9-NEXT: s_waitcnt
88; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
89; GFX9-NEXT: s_waitcnt
90; GFX9-NEXT: s_setpc_b64
91
92; VI: ds_read_u16 v
93define void @load_local_lo_v2f16_reghi_vreg(half addrspace(3)* %in, i32 %reg) #0 {
94entry:
95 %reg.bc = bitcast i32 %reg to <2 x half>
96 %load = load half, half addrspace(3)* %in
97 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
98 store <2 x half> %build1, <2 x half> addrspace(1)* undef
99 ret void
100}
101
102; GCN-LABEL: {{^}}load_local_lo_v2f16_reglo_vreg:
103
104; GFX9: ds_read_u16 v
105; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
106; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}}
107; GFX9: global_store_dword
108
109; VI: ds_read_u16 v
110define void @load_local_lo_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 {
111entry:
112 %load = load half, half addrspace(3)* %in
113 %build0 = insertelement <2 x half> undef, half %reg, i32 1
114 %build1 = insertelement <2 x half> %build0, half %load, i32 0
115 store <2 x half> %build1, <2 x half> addrspace(1)* undef
116 ret void
117}
118
119; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_zexti8:
120; GCN: s_waitcnt
121; GFX9-NEXT: ds_read_u8_d16 v1, v0
122; GFX9-NEXT: s_waitcnt
123; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
124; GFX9-NEXT: s_waitcnt
125; GFX9-NEXT: s_setpc_b64
126
127; VI: ds_read_u8 v
128define void @load_local_lo_v2i16_reghi_vreg_zexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
129entry:
130 %reg.bc = bitcast i32 %reg to <2 x i16>
131 %load = load i8, i8 addrspace(3)* %in
132 %ext = zext i8 %load to i16
133 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
134 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
135 ret void
136}
137
138; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_zexti8:
139; GCN: s_waitcnt
140; GFX9: ds_read_u8 v
141; GFX9: global_store_dword
142; GFX9-NEXT: s_waitcnt
143; GFX9-NEXT: s_setpc_b64
144
145; VI: ds_read_u8 v
146define void @load_local_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
147entry:
148 %load = load i8, i8 addrspace(3)* %in
149 %ext = zext i8 %load to i16
150 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
151 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
152 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
153 ret void
154}
155
156; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_sexti8:
157; GCN: s_waitcnt
158; GFX9-NEXT: ds_read_i8_d16 v1, v0
159; GFX9-NEXT: s_waitcnt
160; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}}
161; GFX9-NEXT: s_waitcnt
162; GFX9-NEXT: s_setpc_b64
163
164; VI: ds_read_i8 v
165define void @load_local_lo_v2i16_reghi_vreg_sexti8(i8 addrspace(3)* %in, i32 %reg) #0 {
166entry:
167 %reg.bc = bitcast i32 %reg to <2 x i16>
168 %load = load i8, i8 addrspace(3)* %in
169 %ext = sext i8 %load to i16
170 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
171 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
172 ret void
173}
174
175; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_sexti8:
176; GCN: s_waitcnt
177; GFX9: ds_read_i8 v
178; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
179; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}}
180
181; VI: ds_read_i8 v
182define void @load_local_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 {
183entry:
184 %load = load i8, i8 addrspace(3)* %in
185 %ext = sext i8 %load to i16
186 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
187 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0
188 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
189 ret void
190}
191
192; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg:
193; GCN: s_waitcnt
194; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
195; GFX9-NEXT: s_waitcnt
196; GFX9-NEXT: global_store_dword
197; GFX9-NEXT: s_waitcnt
198; GFX9-NEXT: s_setpc_b64
199define void @load_global_lo_v2i16_reglo_vreg(i16 addrspace(1)* %in, i32 %reg) #0 {
200entry:
201 %reg.bc = bitcast i32 %reg to <2 x i16>
202 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047
203 %load = load i16, i16 addrspace(1)* %gep
204 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
205 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
206 ret void
207}
208
209; GCN-LABEL: {{^}}load_global_lo_v2f16_reglo_vreg:
210; GCN: s_waitcnt
211; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
212; GFX9-NEXT: s_waitcnt
213; GFX9-NEXT: global_store_dword
214; GFX9-NEXT: s_waitcnt
215; GFX9-NEXT: s_setpc_b64
216define void @load_global_lo_v2f16_reglo_vreg(half addrspace(1)* %in, i32 %reg) #0 {
217entry:
218 %reg.bc = bitcast i32 %reg to <2 x half>
219 %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047
220 %load = load half, half addrspace(1)* %gep
221 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
222 store <2 x half> %build1, <2 x half> addrspace(1)* undef
223 ret void
224}
225
226; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_zexti8:
227; GCN: s_waitcnt
228; GFX9-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095
229; GFX9-NEXT: s_waitcnt
230; GFX9-NEXT: global_store_dword
231; GFX9-NEXT: s_waitcnt
232; GFX9-NEXT: s_setpc_b64
233define void @load_global_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
234entry:
235 %reg.bc = bitcast i32 %reg to <2 x i16>
236 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
237 %load = load i8, i8 addrspace(1)* %gep
238 %ext = zext i8 %load to i16
239 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
240 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
241 ret void
242}
243
244; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_sexti8:
245; GCN: s_waitcnt
246; GFX9-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095
247; GFX9-NEXT: s_waitcnt
248; GFX9-NEXT: global_store_dword
249; GFX9-NEXT: s_waitcnt
250; GFX9-NEXT: s_setpc_b64
251define void @load_global_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 {
252entry:
253 %reg.bc = bitcast i32 %reg to <2 x i16>
254 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095
255 %load = load i8, i8 addrspace(1)* %gep
256 %ext = sext i8 %load to i16
257 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
258 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
259 ret void
260}
261
262; GCN-LABEL: {{^}}load_flat_lo_v2i16_reghi_vreg:
263; GCN: s_waitcnt
264; GFX9-NEXT: flat_load_short_d16 v2, v[0:1]
265; GFX9-NEXT: s_waitcnt
266; GFX9-NEXT: global_store_dword v[0:1], v2
267; GFX9-NEXT: s_waitcnt
268; GFX9-NEXT: s_setpc_b64
269
270; VI: flat_load_ushort v{{[0-9]+}}
271; VI: v_or_b32_e32
272define void @load_flat_lo_v2i16_reghi_vreg(i16 addrspace(4)* %in, i32 %reg) #0 {
273entry:
274 %reg.bc = bitcast i32 %reg to <2 x i16>
275 %load = load i16, i16 addrspace(4)* %in
276 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
277 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
278 ret void
279}
280
281; GCN-LABEL: {{^}}load_flat_lo_v2f16_reghi_vreg:
282; GCN: s_waitcnt
283; GFX9-NEXT: flat_load_short_d16 v2, v[0:1]
284; GFX9-NEXT: s_waitcnt
285; GFX9-NEXT: global_store_dword v[0:1], v2
286; GFX9-NEXT: s_waitcnt
287; GFX9-NEXT: s_setpc_b64
288
289; VI: flat_load_ushort v{{[0-9]+}}
290; VI: v_or_b32_e32
291define void @load_flat_lo_v2f16_reghi_vreg(half addrspace(4)* %in, i32 %reg) #0 {
292entry:
293 %reg.bc = bitcast i32 %reg to <2 x half>
294 %load = load half, half addrspace(4)* %in
295 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
296 store <2 x half> %build1, <2 x half> addrspace(1)* undef
297 ret void
298}
299
300; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_zexti8:
301; GCN: s_waitcnt
302; GFX9-NEXT: flat_load_ubyte_d16 v2, v[0:1]
303; GFX9-NEXT: s_waitcnt
304; GFX9-NEXT: global_store_dword v[0:1], v2
305; GFX9-NEXT: s_waitcnt
306; GFX9-NEXT: s_setpc_b64
307
308; VI: flat_load_ubyte v{{[0-9]+}}
309; VI: v_or_b32_e32
310define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
311entry:
312 %reg.bc = bitcast i32 %reg to <2 x i16>
313 %load = load i8, i8 addrspace(4)* %in
314 %ext = zext i8 %load to i16
315 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
316 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
317 ret void
318}
319
320; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_sexti8:
321; GCN: s_waitcnt
322; GFX9-NEXT: flat_load_sbyte_d16 v2, v[0:1]
323; GFX9-NEXT: s_waitcnt
324; GFX9-NEXT: global_store_dword v[0:1], v2
325; GFX9-NEXT: s_waitcnt
326; GFX9-NEXT: s_setpc_b64
327
328; VI: flat_load_sbyte v{{[0-9]+}}
329; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
330
331define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(4)* %in, i32 %reg) #0 {
332entry:
333 %reg.bc = bitcast i32 %reg to <2 x i16>
334 %load = load i8, i8 addrspace(4)* %in
335 %ext = sext i8 %load to i16
336 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
337 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
338 ret void
339}
340
341; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg:
342; GCN: s_waitcnt
343; GFX9-NEXT: buffer_load_short_d16 v1, v0, s[0:3], s4 offen offset:4094{{$}}
344; GFX9-NEXT: s_waitcnt
345; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
346; GFX9-NEXT: s_waitcnt
347; GFX9-NEXT: s_setpc_b64
348
349; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
350define void @load_private_lo_v2i16_reglo_vreg(i16* %in, i32 %reg) #0 {
351entry:
352 %reg.bc = bitcast i32 %reg to <2 x i16>
353 %gep = getelementptr inbounds i16, i16* %in, i64 2047
354 %load = load i16, i16* %gep
355 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
356 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
357 ret void
358}
359
360; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg:
361; GCN: s_waitcnt
362; GFX9-NEXT: buffer_load_ushort v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s4 offen offset:4094{{$}}
363; GFX9-NEXT: s_waitcnt
364; GFX9: v_and_b32
365; GFX9: v_lshl_or_b32
366
367; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
368; GFX9-NEXT: s_waitcnt
369; GFX9-NEXT: s_setpc_b64
370
371; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
372define void @load_private_lo_v2i16_reghi_vreg(i16* %in, i16 %reg) #0 {
373entry:
374 %gep = getelementptr inbounds i16, i16* %in, i64 2047
375 %load = load i16, i16* %gep
376 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1
377 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0
378 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
379 ret void
380}
381
382; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg:
383; GCN: s_waitcnt
384; GFX9-NEXT: buffer_load_short_d16 v1, v0, s[0:3], s4 offen offset:4094{{$}}
385; GFX9-NEXT: s_waitcnt
386; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
387; GFX9-NEXT: s_waitcnt
388; GFX9-NEXT: s_setpc_b64
389
390; VI: buffer_load_ushort v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4094{{$}}
391define void @load_private_lo_v2f16_reglo_vreg(half* %in, i32 %reg) #0 {
392entry:
393 %reg.bc = bitcast i32 %reg to <2 x half>
394 %gep = getelementptr inbounds half, half* %in, i64 2047
395 %load = load half, half* %gep
396 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
397 store <2 x half> %build1, <2 x half> addrspace(1)* undef
398 ret void
399}
400
401; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff:
402; GCN: s_waitcnt
403; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
404; GFX9-NEXT: s_waitcnt
405; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
406; GFX9-NEXT: s_waitcnt
407; GFX9-NEXT: s_setpc_b64
408
409; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
410define void @load_private_lo_v2i16_reglo_vreg_nooff(i16* %in, i32 %reg) #0 {
411entry:
412 %reg.bc = bitcast i32 %reg to <2 x i16>
413 %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
414 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
415 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
416 ret void
417}
418
419; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg_nooff:
420; GCN: s_waitcnt
421; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
422; GFX9-NEXT: s_waitcnt
423; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
424; GFX9-NEXT: s_waitcnt
425; GFX9-NEXT: s_setpc_b64
426
427; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
428define void @load_private_lo_v2i16_reghi_vreg_nooff(i16* %in, i32 %reg) #0 {
429entry:
430 %reg.bc = bitcast i32 %reg to <2 x i16>
431 %load = load volatile i16, i16* inttoptr (i32 4094 to i16*)
432 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
433 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
434 ret void
435}
436
437; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff:
438; GCN: s_waitcnt
439; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}}
440; GFX9-NEXT: s_waitcnt
441; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
442; GFX9-NEXT: s_waitcnt
443; GFX9-NEXT: s_setpc_b64
444
445; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}}
446define void @load_private_lo_v2f16_reglo_vreg_nooff(half* %in, i32 %reg) #0 {
447entry:
448 %reg.bc = bitcast i32 %reg to <2 x half>
449 %load = load volatile half, half* inttoptr (i32 4094 to half*)
450 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
451 store <2 x half> %build1, <2 x half> addrspace(1)* undef
452 ret void
453}
454
455; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8:
456; GCN: s_waitcnt
457; GFX9-NEXT: buffer_load_ubyte_d16 v1, v0, s[0:3], s4 offen offset:2047{{$}}
458; GFX9-NEXT: s_waitcnt
459; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
460; GFX9-NEXT: s_waitcnt
461; GFX9-NEXT: s_setpc_b64
462
463; VI: buffer_load_ubyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}}
464define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 {
465entry:
466 %reg.bc = bitcast i32 %reg to <2 x i16>
467 %gep = getelementptr inbounds i8, i8* %in, i64 2047
468 %load = load i8, i8* %gep
469 %ext = zext i8 %load to i16
470 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
471 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
472 ret void
473}
474
475; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8:
476; GCN: s_waitcnt
477; GFX9-NEXT: buffer_load_sbyte_d16 v1, v0, s[0:3], s4 offen offset:2047{{$}}
478; GFX9-NEXT: s_waitcnt
479; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
480; GFX9-NEXT: s_waitcnt
481; GFX9-NEXT: s_setpc_b64
482
483; VI: buffer_load_sbyte v{{[0-9]+}}, v0, s[0:3], s4 offen offset:2047{{$}}
484define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 {
485entry:
486 %reg.bc = bitcast i32 %reg to <2 x i16>
487 %gep = getelementptr inbounds i8, i8* %in, i64 2047
488 %load = load i8, i8* %gep
489 %ext = sext i8 %load to i16
490 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
491 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
492 ret void
493}
494
495; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_zexti8:
496; GCN: s_waitcnt
497; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
498; GFX9-NEXT: s_waitcnt
499; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
500; GFX9-NEXT: s_waitcnt
501; GFX9-NEXT: s_setpc_b64
502
503; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
504define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8* %in, i32 %reg) #0 {
505entry:
506 %reg.bc = bitcast i32 %reg to <2 x i16>
507 %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
508 %ext = zext i8 %load to i16
509 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
510 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
511 ret void
512}
513
514; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_sexti8:
515; GCN: s_waitcnt
516; GFX9-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
517; GFX9-NEXT: s_waitcnt
518; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
519; GFX9-NEXT: s_waitcnt
520; GFX9-NEXT: s_setpc_b64
521
522; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}}
523define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8* %in, i32 %reg) #0 {
524entry:
525 %reg.bc = bitcast i32 %reg to <2 x i16>
526 %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
527 %ext = sext i8 %load to i16
528 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0
529 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
530 ret void
531}
532
533; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff_zexti8:
534; GCN: s_waitcnt
535; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}}
536; GFX9-NEXT: s_waitcnt
537; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
538; GFX9-NEXT: s_waitcnt
539; GFX9-NEXT: s_setpc_b64
540
541; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}}
542define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8* %in, i32 %reg) #0 {
543entry:
544 %reg.bc = bitcast i32 %reg to <2 x half>
545 %load = load volatile i8, i8* inttoptr (i32 4094 to i8*)
546 %ext = zext i8 %load to i16
547 %bc.ext = bitcast i16 %ext to half
548 %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0
549 store <2 x half> %build1, <2 x half> addrspace(1)* undef
550 ret void
551}
552
553; GCN-LABEL: {{^}}load_constant_lo_v2i16_reglo_vreg:
554; GCN: s_waitcnt
555; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
556; GFX9-NEXT: s_waitcnt
557; GFX9-NEXT: global_store_dword
558; GFX9-NEXT: s_waitcnt
559; GFX9-NEXT: s_setpc_b64
560
561; VI: flat_load_ushort
562define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(2)* %in, i32 %reg) #0 {
563entry:
564 %reg.bc = bitcast i32 %reg to <2 x i16>
565 %gep = getelementptr inbounds i16, i16 addrspace(2)* %in, i64 -2047
566 %load = load i16, i16 addrspace(2)* %gep
567 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
568 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
569 ret void
570}
571
572; GCN-LABEL: load_constant_lo_v2f16_reglo_vreg
573; GCN: s_waitcnt
574; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094
575; GFX9-NEXT: s_waitcnt
576; GFX9-NEXT: global_store_dword
577; GFX9-NEXT: s_waitcnt
578; GFX9-NEXT: s_setpc_b64
579
580; VI: flat_load_ushort
581define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(2)* %in, i32 %reg) #0 {
582entry:
583 %reg.bc = bitcast i32 %reg to <2 x half>
584 %gep = getelementptr inbounds half, half addrspace(2)* %in, i64 -2047
585 %load = load half, half addrspace(2)* %gep
586 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0
587 store <2 x half> %build1, <2 x half> addrspace(1)* undef
588 ret void
589}
590
Matt Arsenault4b7938c2017-11-13 23:24:26 +0000591; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_to_offset:
592; GFX9: buffer_store_dword
593; GFX9-NEXT: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094
594
595; VI: buffer_load_ushort v
596define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 {
597entry:
598 %obj0 = alloca [10 x i32], align 4
599 %obj1 = alloca [4096 x i16], align 2
600 %reg.bc = bitcast i32 %reg to <2 x i16>
601 %bc = bitcast [10 x i32]* %obj0 to i32*
602 store volatile i32 123, i32* %bc
603 %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
604 %load = load volatile i16, i16* %gep
605 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0
606 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
607 ret void
608}
609
610; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8_to_offset:
611; GFX9: buffer_store_dword
612; GFX9-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095
613
614; VI: buffer_load_sbyte v
615define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 {
616entry:
617 %obj0 = alloca [10 x i32], align 4
618 %obj1 = alloca [4096 x i8], align 2
619 %reg.bc = bitcast i32 %reg to <2 x i16>
620 %bc = bitcast [10 x i32]* %obj0 to i32*
621 store volatile i32 123, i32* %bc
622 %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
623 %load = load volatile i8, i8* %gep
624 %load.ext = sext i8 %load to i16
625 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
626 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
627 ret void
628}
629
630; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8_to_offset:
631; GFX9: buffer_store_dword
632; GFX9-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095
633
634; VI: buffer_load_ubyte v
635define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 {
636entry:
637 %obj0 = alloca [10 x i32], align 4
638 %obj1 = alloca [4096 x i8], align 2
639 %reg.bc = bitcast i32 %reg to <2 x i16>
640 %bc = bitcast [10 x i32]* %obj0 to i32*
641 store volatile i32 123, i32* %bc
642 %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
643 %load = load volatile i8, i8* %gep
644 %load.ext = zext i8 %load to i16
645 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0
646 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
647 ret void
648}
649
Matt Arsenaulte1cd4822017-11-13 00:22:09 +0000650attributes #0 = { nounwind }