blob: b20f6ba55a7677316e550847ec032c511d2922ba [file] [log] [blame]
Yaxun Liu35845f02017-11-10 02:03:28 +00001; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
3; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00004
5
6; FUNC-LABEL: {{^}}local_load_i8:
7; GCN-NOT: s_wqm_b64
8; GCN: s_mov_b32 m0
9; GCN: ds_read_u8
10
11; EG: LDS_UBYTE_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000013entry:
14 %ld = load i8, i8 addrspace(3)* %in
15 store i8 %ld, i8 addrspace(3)* %out
16 ret void
17}
18
19; FUNC-LABEL: {{^}}local_load_v2i8:
20; GCN-NOT: s_wqm_b64
21; GCN: s_mov_b32 m0
22; GCN: ds_read_u16
23
24; EG: LDS_USHORT_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000025define amdgpu_kernel void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000026entry:
27 %ld = load <2 x i8>, <2 x i8> addrspace(3)* %in
28 store <2 x i8> %ld, <2 x i8> addrspace(3)* %out
29 ret void
30}
31
32; FUNC-LABEL: {{^}}local_load_v3i8:
33; GCN: ds_read_b32
34
35; EG: DS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000036define amdgpu_kernel void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000037entry:
38 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
39 store <3 x i8> %ld, <3 x i8> addrspace(3)* %out
40 ret void
41}
42
43; FUNC-LABEL: {{^}}local_load_v4i8:
44; GCN: ds_read_b32
45
46; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000047define amdgpu_kernel void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000048entry:
49 %ld = load <4 x i8>, <4 x i8> addrspace(3)* %in
50 store <4 x i8> %ld, <4 x i8> addrspace(3)* %out
51 ret void
52}
53
54; FUNC-LABEL: {{^}}local_load_v8i8:
55; GCN: ds_read_b64
56
57; EG: LDS_READ_RET
58; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000059define amdgpu_kernel void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000060entry:
61 %ld = load <8 x i8>, <8 x i8> addrspace(3)* %in
62 store <8 x i8> %ld, <8 x i8> addrspace(3)* %out
63 ret void
64}
65
66; FUNC-LABEL: {{^}}local_load_v16i8:
Tom Stellarde175d8a2016-08-26 21:36:47 +000067; GCN: ds_read2_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
68; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset1:1{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000069
70; EG: LDS_READ_RET
71; EG: LDS_READ_RET
72; EG: LDS_READ_RET
73; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000074define amdgpu_kernel void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000075entry:
76 %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in
77 store <16 x i8> %ld, <16 x i8> addrspace(3)* %out
78 ret void
79}
80
81; FUNC-LABEL: {{^}}local_zextload_i8_to_i32:
82; GCN-NOT: s_wqm_b64
83; GCN: s_mov_b32 m0
84; GCN: ds_read_u8
85
86; EG: LDS_UBYTE_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000087define amdgpu_kernel void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000088 %a = load i8, i8 addrspace(3)* %in
89 %ext = zext i8 %a to i32
90 store i32 %ext, i32 addrspace(3)* %out
91 ret void
92}
93
94; FUNC-LABEL: {{^}}local_sextload_i8_to_i32:
95; GCN-NOT: s_wqm_b64
96; GCN: s_mov_b32 m0
97; GCN: ds_read_i8
98
99; EG: LDS_UBYTE_READ_RET
100; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000101define amdgpu_kernel void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000102 %ld = load i8, i8 addrspace(3)* %in
103 %ext = sext i8 %ld to i32
104 store i32 %ext, i32 addrspace(3)* %out
105 ret void
106}
107
108; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000109
110; EG: LDS_UBYTE_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000111define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000112 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
113 %ext = zext <1 x i8> %load to <1 x i32>
114 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
115 ret void
116}
117
118; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000119
120; EG: LDS_UBYTE_READ_RET
121; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000122define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000123 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
124 %ext = sext <1 x i8> %load to <1 x i32>
125 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
126 ret void
127}
128
129; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000130; GCN: ds_read_u16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000131
Jan Vesely38814fa2016-08-27 19:09:43 +0000132; EG: LDS_USHORT_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000133define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000134 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
135 %ext = zext <2 x i8> %load to <2 x i32>
136 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
137 ret void
138}
139
140; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32:
141; GCN-NOT: s_wqm_b64
142; GCN: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000143; GCN: ds_read_u16
Tom Stellard115a6152016-11-10 16:02:37 +0000144; FIXME: Need to optimize this sequence to avoid extra shift on VI.
Matt Arsenault7aad8fd2017-01-24 22:02:15 +0000145; t23: i16 = srl t39, Constant:i32<8>
Tom Stellard115a6152016-11-10 16:02:37 +0000146; t31: i32 = any_extend t23
147; t33: i32 = sign_extend_inreg t31, ValueType:ch:i8
148
149; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
150; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
151
152; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}}
153; VI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
154; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000155
Jan Vesely38814fa2016-08-27 19:09:43 +0000156; EG: LDS_USHORT_READ_RET
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000157; EG-DAG: BFE_INT
158; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000159define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000160 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
161 %ext = sext <2 x i8> %load to <2 x i32>
162 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
163 ret void
164}
165
166; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32:
167; GCN: ds_read_b32
168
Tom Stellard115a6152016-11-10 16:02:37 +0000169; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
170; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, {{v[0-9]+}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000171; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
172; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
Jan Vesely38814fa2016-08-27 19:09:43 +0000173
174; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000176entry:
177 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
178 %ext = zext <3 x i8> %ld to <3 x i32>
179 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
180 ret void
181}
182
183; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32:
184; GCN-NOT: s_wqm_b64
185; GCN: s_mov_b32 m0
186; GCN: ds_read_b32
187
188; GCN-DAG: v_bfe_i32
189; GCN-DAG: v_bfe_i32
190; GCN-DAG: v_bfe_i32
191; GCN-DAG: v_bfe_i32
192
193; GCN-DAG: ds_write_b64
194; GCN-DAG: ds_write_b32
195
Jan Vesely38814fa2016-08-27 19:09:43 +0000196; EG: LDS_READ_RET
197; EG-DAG: BFE_INT
198; EG-DAG: BFE_INT
199; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000200define amdgpu_kernel void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000201entry:
202 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
203 %ext = sext <3 x i8> %ld to <3 x i32>
204 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
205 ret void
206}
207
208; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32:
209; GCN-NOT: s_wqm_b64
210; GCN: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000211; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000212
Jan Vesely38814fa2016-08-27 19:09:43 +0000213; EG: LDS_READ_RET
214; EG-DAG: BFE_UINT
215; EG-DAG: BFE_UINT
216; EG-DAG: BFE_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000217define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000218 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
219 %ext = zext <4 x i8> %load to <4 x i32>
220 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
221 ret void
222}
223
224; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32:
225; GCN-NOT: s_wqm_b64
226; GCN: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000227; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000228
Jan Vesely38814fa2016-08-27 19:09:43 +0000229; EG-DAG: LDS_READ_RET
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000230; EG-DAG: BFE_INT
231; EG-DAG: BFE_INT
232; EG-DAG: BFE_INT
233; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000234define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000235 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
236 %ext = sext <4 x i8> %load to <4 x i32>
237 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
238 ret void
239}
240
241; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000242
243; EG-DAG: LDS_READ_RET
244; EG-DAG: LDS_READ_RET
245; EG-DAG: BFE_UINT
246; EG-DAG: BFE_UINT
247; EG-DAG: BFE_UINT
248; EG-DAG: BFE_UINT
249; EG-DAG: BFE_UINT
250; EG-DAG: BFE_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000251define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000252 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
253 %ext = zext <8 x i8> %load to <8 x i32>
254 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
255 ret void
256}
257
258; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000259
260; EG-DAG: LDS_READ_RET
261; EG-DAG: LDS_READ_RET
262; EG-DAG: BFE_INT
263; EG-DAG: BFE_INT
264; EG-DAG: BFE_INT
265; EG-DAG: BFE_INT
266; EG-DAG: BFE_INT
267; EG-DAG: BFE_INT
268; EG-DAG: BFE_INT
269; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000270define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000271 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
272 %ext = sext <8 x i8> %load to <8 x i32>
273 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
274 ret void
275}
276
277; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000278
279; EG-DAG: LDS_READ_RET
280; EG-DAG: LDS_READ_RET
281; EG-DAG: LDS_READ_RET
282; EG-DAG: LDS_READ_RET
283; EG-DAG: BFE_UINT
284; EG-DAG: BFE_UINT
285; EG-DAG: BFE_UINT
286; EG-DAG: BFE_UINT
287; EG-DAG: BFE_UINT
288; EG-DAG: BFE_UINT
289; EG-DAG: BFE_UINT
290; EG-DAG: BFE_UINT
291; EG-DAG: BFE_UINT
292; EG-DAG: BFE_UINT
293; EG-DAG: BFE_UINT
294; EG-DAG: BFE_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000295define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000296 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
297 %ext = zext <16 x i8> %load to <16 x i32>
298 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
299 ret void
300}
301
302; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000303
304; EG-DAG: LDS_READ_RET
305; EG-DAG: LDS_READ_RET
306; EG-DAG: LDS_READ_RET
307; EG-DAG: LDS_READ_RET
308; EG-DAG: BFE_INT
309; EG-DAG: BFE_INT
310; EG-DAG: BFE_INT
311; EG-DAG: BFE_INT
312; EG-DAG: BFE_INT
313; EG-DAG: BFE_INT
314; EG-DAG: BFE_INT
315; EG-DAG: BFE_INT
316; EG-DAG: BFE_INT
317; EG-DAG: BFE_INT
318; EG-DAG: BFE_INT
319; EG-DAG: BFE_INT
320; EG-DAG: BFE_INT
321; EG-DAG: BFE_INT
322; EG-DAG: BFE_INT
323; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000324define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000325 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
326 %ext = sext <16 x i8> %load to <16 x i32>
327 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
328 ret void
329}
330
331; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000332
333; EG-DAG: LDS_READ_RET
334; EG-DAG: LDS_READ_RET
335; EG-DAG: LDS_READ_RET
336; EG-DAG: LDS_READ_RET
337; EG-DAG: LDS_READ_RET
338; EG-DAG: LDS_READ_RET
339; EG-DAG: LDS_READ_RET
340; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000341define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000342 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
343 %ext = zext <32 x i8> %load to <32 x i32>
344 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
345 ret void
346}
347
348; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000349
350; EG-DAG: LDS_READ_RET
351; EG-DAG: LDS_READ_RET
352; EG-DAG: LDS_READ_RET
353; EG-DAG: LDS_READ_RET
354; EG-DAG: LDS_READ_RET
355; EG-DAG: LDS_READ_RET
356; EG-DAG: LDS_READ_RET
357; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000358define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000359 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
360 %ext = sext <32 x i8> %load to <32 x i32>
361 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
362 ret void
363}
364
365; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000366
367; EG-DAG: LDS_READ_RET
368; EG-DAG: LDS_READ_RET
369; EG-DAG: LDS_READ_RET
370; EG-DAG: LDS_READ_RET
371; EG-DAG: LDS_READ_RET
372; EG-DAG: LDS_READ_RET
373; EG-DAG: LDS_READ_RET
374; EG-DAG: LDS_READ_RET
375; EG-DAG: LDS_READ_RET
376; EG-DAG: LDS_READ_RET
377; EG-DAG: LDS_READ_RET
378; EG-DAG: LDS_READ_RET
379; EG-DAG: LDS_READ_RET
380; EG-DAG: LDS_READ_RET
381; EG-DAG: LDS_READ_RET
382; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000383define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000384 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
385 %ext = zext <64 x i8> %load to <64 x i32>
386 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
387 ret void
388}
389
390; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000391
392; EG-DAG: LDS_READ_RET
393; EG-DAG: LDS_READ_RET
394; EG-DAG: LDS_READ_RET
395; EG-DAG: LDS_READ_RET
396; EG-DAG: LDS_READ_RET
397; EG-DAG: LDS_READ_RET
398; EG-DAG: LDS_READ_RET
399; EG-DAG: LDS_READ_RET
400; EG-DAG: LDS_READ_RET
401; EG-DAG: LDS_READ_RET
402; EG-DAG: LDS_READ_RET
403; EG-DAG: LDS_READ_RET
404; EG-DAG: LDS_READ_RET
405; EG-DAG: LDS_READ_RET
406; EG-DAG: LDS_READ_RET
407; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000408define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000409 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
410 %ext = sext <64 x i8> %load to <64 x i32>
411 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
412 ret void
413}
414
415; FUNC-LABEL: {{^}}local_zextload_i8_to_i64:
416; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
417; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]],
418; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000419
420; EG: LDS_UBYTE_READ_RET
421; EG: MOV {{.*}}, literal
422; EG: 0.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000423define amdgpu_kernel void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000424 %a = load i8, i8 addrspace(3)* %in
425 %ext = zext i8 %a to i64
426 store i64 %ext, i64 addrspace(3)* %out
427 ret void
428}
429
430; FUNC-LABEL: {{^}}local_sextload_i8_to_i64:
431; GCN: ds_read_i8 v[[LO:[0-9]+]],
432; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
433
434; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000435
436; EG: LDS_UBYTE_READ_RET
437; EG: ASHR
438; TODO: why not 7?
439; EG: 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000440define amdgpu_kernel void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000441 %a = load i8, i8 addrspace(3)* %in
442 %ext = sext i8 %a to i64
443 store i64 %ext, i64 addrspace(3)* %out
444 ret void
445}
446
447; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000448
449; EG: LDS_UBYTE_READ_RET
450; EG: MOV {{.*}}, literal
451; TODO: merge?
452; EG: 0.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000453define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000454 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
455 %ext = zext <1 x i8> %load to <1 x i64>
456 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
457 ret void
458}
459
460; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000461
462; EG: LDS_UBYTE_READ_RET
463; EG: ASHR
464; TODO: why not 7?
465; EG: 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000466define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000467 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
468 %ext = sext <1 x i8> %load to <1 x i64>
469 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
470 ret void
471}
472
473; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000474
475; EG: LDS_USHORT_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000476define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000477 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
478 %ext = zext <2 x i8> %load to <2 x i64>
479 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
480 ret void
481}
482
483; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000484
485; EG: LDS_USHORT_READ_RET
486; EG: BFE_INT
487; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000488define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000489 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
490 %ext = sext <2 x i8> %load to <2 x i64>
491 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
492 ret void
493}
494
495; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000496
497; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000498define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000499 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
500 %ext = zext <4 x i8> %load to <4 x i64>
501 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
502 ret void
503}
504
505; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000506
507; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000508define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000509 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
510 %ext = sext <4 x i8> %load to <4 x i64>
511 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
512 ret void
513}
514
515; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000516
517; EG: LDS_READ_RET
518; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000519define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000520 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
521 %ext = zext <8 x i8> %load to <8 x i64>
522 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
523 ret void
524}
525
526; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000527
528; EG: LDS_READ_RET
529; EG: LDS_READ_RET
530; EG-DAG: ASHR
531; EG-DAG: ASHR
532; EG-DAG: BFE_INT
533; EG-DAG: BFE_INT
534; EG-DAG: BFE_INT
535; EG-DAG: BFE_INT
536; EG-DAG: BFE_INT
537; EG-DAG: BFE_INT
538; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000539define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000540 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
541 %ext = sext <8 x i8> %load to <8 x i64>
542 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
543 ret void
544}
545
546; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000547
548; EG: LDS_READ_RET
549; EG: LDS_READ_RET
550; EG: LDS_READ_RET
551; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000552define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000553 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
554 %ext = zext <16 x i8> %load to <16 x i64>
555 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
556 ret void
557}
558
559; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000560
561; EG: LDS_READ_RET
562; EG: LDS_READ_RET
563; EG: LDS_READ_RET
564; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000565define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000566 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
567 %ext = sext <16 x i8> %load to <16 x i64>
568 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
569 ret void
570}
571
572; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000573
574; EG: LDS_READ_RET
575; EG: LDS_READ_RET
576; EG: LDS_READ_RET
577; EG: LDS_READ_RET
578; EG: LDS_READ_RET
579; EG: LDS_READ_RET
580; EG: LDS_READ_RET
581; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000582define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000583 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
584 %ext = zext <32 x i8> %load to <32 x i64>
585 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
586 ret void
587}
588
589; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000590
591; EG: LDS_READ_RET
592; EG: LDS_READ_RET
593; EG: LDS_READ_RET
594; EG: LDS_READ_RET
595; EG: LDS_READ_RET
596; EG: LDS_READ_RET
597; EG: LDS_READ_RET
598; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000599define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000600 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
601 %ext = sext <32 x i8> %load to <32 x i64>
602 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
603 ret void
604}
605
606; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000607; define amdgpu_kernel void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000608; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
609; %ext = zext <64 x i8> %load to <64 x i64>
610; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
611; ret void
612; }
613
614; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000615; define amdgpu_kernel void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000616; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
617; %ext = sext <64 x i8> %load to <64 x i64>
618; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
619; ret void
620; }
621
622; FUNC-LABEL: {{^}}local_zextload_i8_to_i16:
623; GCN: ds_read_u8 v[[VAL:[0-9]+]],
624; GCN: ds_write_b16 v[[VAL:[0-9]+]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000625
626; EG: LDS_UBYTE_READ_RET
627; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000628define amdgpu_kernel void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000629 %a = load i8, i8 addrspace(3)* %in
630 %ext = zext i8 %a to i16
631 store i16 %ext, i16 addrspace(3)* %out
632 ret void
633}
634
635; FUNC-LABEL: {{^}}local_sextload_i8_to_i16:
636; GCN: ds_read_i8 v[[VAL:[0-9]+]],
637; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000638
639; EG: LDS_UBYTE_READ_RET
640; EG: BFE_INT
641; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000642define amdgpu_kernel void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000643 %a = load i8, i8 addrspace(3)* %in
644 %ext = sext i8 %a to i16
645 store i16 %ext, i16 addrspace(3)* %out
646 ret void
647}
648
649; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000650
651; EG: LDS_UBYTE_READ_RET
652; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000653define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000654 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
655 %ext = zext <1 x i8> %load to <1 x i16>
656 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
657 ret void
658}
659
660; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000661
662; EG: LDS_UBYTE_READ_RET
663; EG: BFE_INT
664; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000665define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000666 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
667 %ext = sext <1 x i8> %load to <1 x i16>
668 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
669 ret void
670}
671
672; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000673
674; EG: LDS_USHORT_READ_RET
675; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000676define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000677 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
678 %ext = zext <2 x i8> %load to <2 x i16>
679 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
680 ret void
681}
682
683; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000684
685; EG: LDS_USHORT_READ_RET
686; EG: BFE_INT
687; EG: BFE_INT
688; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000689define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000690 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
691 %ext = sext <2 x i8> %load to <2 x i16>
692 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
693 ret void
694}
695
696; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000697
698; EG: LDS_READ_RET
699; EG: LDS_WRITE
700; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000701define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000702 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
703 %ext = zext <4 x i8> %load to <4 x i16>
704 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
705 ret void
706}
707
708; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000709
710; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000711; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000712; EG-DAG: BFE_INT
713; EG-DAG: BFE_INT
714; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000715; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000716; EG: LDS_WRITE
717; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000718define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000719 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
720 %ext = sext <4 x i8> %load to <4 x i16>
721 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
722 ret void
723}
724
725; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000726
727; EG: LDS_READ_RET
728; EG: LDS_READ_RET
729; EG: LDS_WRITE
730; EG: LDS_WRITE
731; EG: LDS_WRITE
732; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000733define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000734 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
735 %ext = zext <8 x i8> %load to <8 x i16>
736 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
737 ret void
738}
739
740; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000741
742; EG: LDS_READ_RET
743; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000744; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000745; EG-DAG: BFE_INT
746; EG-DAG: BFE_INT
747; EG-DAG: BFE_INT
748; EG-DAG: BFE_INT
749; EG-DAG: BFE_INT
750; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000751; EG-DAG: BFE_INT
752; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000753; EG: LDS_WRITE
754; EG: LDS_WRITE
755; EG: LDS_WRITE
756; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000757define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000758 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
759 %ext = sext <8 x i8> %load to <8 x i16>
760 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
761 ret void
762}
763
764; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000765
766; EG: LDS_READ_RET
767; EG: LDS_READ_RET
768; EG: LDS_READ_RET
769; EG: LDS_READ_RET
770; EG: LDS_WRITE
771; EG: LDS_WRITE
772; EG: LDS_WRITE
773; EG: LDS_WRITE
774; EG: LDS_WRITE
775; EG: LDS_WRITE
776; EG: LDS_WRITE
777; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000778define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000779 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
780 %ext = zext <16 x i8> %load to <16 x i16>
781 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
782 ret void
783}
784
785; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000786
787; EG: LDS_READ_RET
788; EG: LDS_READ_RET
789; EG: LDS_READ_RET
790; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000791; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000792; EG-DAG: BFE_INT
793; EG-DAG: BFE_INT
794; EG-DAG: BFE_INT
795; EG-DAG: BFE_INT
796; EG-DAG: BFE_INT
797; EG-DAG: BFE_INT
798; EG-DAG: BFE_INT
799; EG-DAG: BFE_INT
800; EG-DAG: BFE_INT
801; EG-DAG: BFE_INT
802; EG-DAG: BFE_INT
803; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000804; EG-DAG: BFE_INT
805; EG-DAG: BFE_INT
806; EG-DAG: BFE_INT
807; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000808; EG: LDS_WRITE
809; EG: LDS_WRITE
810; EG: LDS_WRITE
811; EG: LDS_WRITE
812; EG: LDS_WRITE
813; EG: LDS_WRITE
814; EG: LDS_WRITE
815; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000816define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000817 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
818 %ext = sext <16 x i8> %load to <16 x i16>
819 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
820 ret void
821}
822
823; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000824
825; EG: LDS_READ_RET
826; EG: LDS_READ_RET
827; EG: LDS_READ_RET
828; EG: LDS_READ_RET
829; EG: LDS_READ_RET
830; EG: LDS_READ_RET
831; EG: LDS_READ_RET
832; EG: LDS_READ_RET
833; EG: LDS_WRITE
834; EG: LDS_WRITE
835; EG: LDS_WRITE
836; EG: LDS_WRITE
837; EG: LDS_WRITE
838; EG: LDS_WRITE
839; EG: LDS_WRITE
840; EG: LDS_WRITE
841; EG: LDS_WRITE
842; EG: LDS_WRITE
843; EG: LDS_WRITE
844; EG: LDS_WRITE
845; EG: LDS_WRITE
846; EG: LDS_WRITE
847; EG: LDS_WRITE
848; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000849define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000850 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
851 %ext = zext <32 x i8> %load to <32 x i16>
852 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
853 ret void
854}
855
856; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16:
Jan Vesely38814fa2016-08-27 19:09:43 +0000857
858; EG: LDS_READ_RET
859; EG: LDS_READ_RET
860; EG: LDS_READ_RET
861; EG: LDS_READ_RET
862; EG: LDS_READ_RET
863; EG: LDS_READ_RET
864; EG: LDS_READ_RET
865; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000866; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000867; EG-DAG: BFE_INT
868; EG-DAG: BFE_INT
869; EG-DAG: BFE_INT
870; EG-DAG: BFE_INT
871; EG-DAG: BFE_INT
872; EG-DAG: BFE_INT
873; EG-DAG: BFE_INT
874; EG-DAG: BFE_INT
875; EG-DAG: BFE_INT
876; EG-DAG: BFE_INT
877; EG-DAG: BFE_INT
878; EG-DAG: BFE_INT
879; EG-DAG: BFE_INT
880; EG-DAG: BFE_INT
881; EG-DAG: BFE_INT
882; EG-DAG: BFE_INT
883; EG-DAG: BFE_INT
884; EG-DAG: BFE_INT
885; EG-DAG: BFE_INT
886; EG-DAG: BFE_INT
887; EG-DAG: BFE_INT
888; EG-DAG: BFE_INT
889; EG-DAG: BFE_INT
890; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000891; EG-DAG: BFE_INT
892; EG-DAG: BFE_INT
893; EG-DAG: BFE_INT
894; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000895; EG: LDS_WRITE
896; EG: LDS_WRITE
897; EG: LDS_WRITE
898; EG: LDS_WRITE
899; EG: LDS_WRITE
900; EG: LDS_WRITE
901; EG: LDS_WRITE
902; EG: LDS_WRITE
903; EG: LDS_WRITE
904; EG: LDS_WRITE
905; EG: LDS_WRITE
906; EG: LDS_WRITE
907; EG: LDS_WRITE
908; EG: LDS_WRITE
909; EG: LDS_WRITE
910; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000911define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000912 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
913 %ext = sext <32 x i8> %load to <32 x i16>
914 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
915 ret void
916}
917
918; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000919; define amdgpu_kernel void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000920; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
921; %ext = zext <64 x i8> %load to <64 x i16>
922; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
923; ret void
924; }
925
926; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000927; define amdgpu_kernel void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000928; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
929; %ext = sext <64 x i8> %load to <64 x i16>
930; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
931; ret void
932; }
933
934attributes #0 = { nounwind }