blob: 72f5408675fccb8ec92f5a46b3aa3050d7583a3a [file] [log] [blame]
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00001; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SICIVI,FUNC %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
Yaxun Liu35845f02017-11-10 02:03:28 +00004; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00005
6
7; FUNC-LABEL: {{^}}local_load_i8:
8; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00009; SICIVI: s_mov_b32 m0
10; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000011; GCN: ds_read_u8
12
13; EG: LDS_UBYTE_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000014define amdgpu_kernel void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000015entry:
16 %ld = load i8, i8 addrspace(3)* %in
17 store i8 %ld, i8 addrspace(3)* %out
18 ret void
19}
20
21; FUNC-LABEL: {{^}}local_load_v2i8:
22; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000023; SICIVI: s_mov_b32 m0
24; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000025; GCN: ds_read_u16
26
27; EG: LDS_USHORT_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000028define amdgpu_kernel void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000029entry:
30 %ld = load <2 x i8>, <2 x i8> addrspace(3)* %in
31 store <2 x i8> %ld, <2 x i8> addrspace(3)* %out
32 ret void
33}
34
35; FUNC-LABEL: {{^}}local_load_v3i8:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000036; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000037; GCN: ds_read_b32
38
39; EG: DS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000040define amdgpu_kernel void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000041entry:
42 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
43 store <3 x i8> %ld, <3 x i8> addrspace(3)* %out
44 ret void
45}
46
47; FUNC-LABEL: {{^}}local_load_v4i8:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000048; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000049; GCN: ds_read_b32
50
51; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000052define amdgpu_kernel void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000053entry:
54 %ld = load <4 x i8>, <4 x i8> addrspace(3)* %in
55 store <4 x i8> %ld, <4 x i8> addrspace(3)* %out
56 ret void
57}
58
59; FUNC-LABEL: {{^}}local_load_v8i8:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000060; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000061; GCN: ds_read_b64
62
63; EG: LDS_READ_RET
64; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000065define amdgpu_kernel void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000066entry:
67 %ld = load <8 x i8>, <8 x i8> addrspace(3)* %in
68 store <8 x i8> %ld, <8 x i8> addrspace(3)* %out
69 ret void
70}
71
72; FUNC-LABEL: {{^}}local_load_v16i8:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000073; GFX9-NOT: m0
Tom Stellarde175d8a2016-08-26 21:36:47 +000074; GCN: ds_read2_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
75; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset1:1{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000076
77; EG: LDS_READ_RET
78; EG: LDS_READ_RET
79; EG: LDS_READ_RET
80; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000081define amdgpu_kernel void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000082entry:
83 %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in
84 store <16 x i8> %ld, <16 x i8> addrspace(3)* %out
85 ret void
86}
87
88; FUNC-LABEL: {{^}}local_zextload_i8_to_i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000089; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000090; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000091; SICIVI: s_mov_b32 m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000092; GCN: ds_read_u8
93
94; EG: LDS_UBYTE_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000095define amdgpu_kernel void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000096 %a = load i8, i8 addrspace(3)* %in
97 %ext = zext i8 %a to i32
98 store i32 %ext, i32 addrspace(3)* %out
99 ret void
100}
101
102; FUNC-LABEL: {{^}}local_sextload_i8_to_i32:
103; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000104; GFX9-NOT: m0
105; SICIVI: s_mov_b32 m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000106; GCN: ds_read_i8
107
108; EG: LDS_UBYTE_READ_RET
109; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000110define amdgpu_kernel void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000111 %ld = load i8, i8 addrspace(3)* %in
112 %ext = sext i8 %ld to i32
113 store i32 %ext, i32 addrspace(3)* %out
114 ret void
115}
116
117; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000118
119; EG: LDS_UBYTE_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000120define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000121 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
122 %ext = zext <1 x i8> %load to <1 x i32>
123 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
124 ret void
125}
126
127; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000128; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000129
130; EG: LDS_UBYTE_READ_RET
131; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000132define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000133 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
134 %ext = sext <1 x i8> %load to <1 x i32>
135 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
136 ret void
137}
138
139; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000140; GFX9-NOT: m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000141; GCN: ds_read_u16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000142
Jan Vesely38814fa2016-08-27 19:09:43 +0000143; EG: LDS_USHORT_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000144define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000145 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
146 %ext = zext <2 x i8> %load to <2 x i32>
147 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
148 ret void
149}
150
151; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32:
152; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000153; GFX9-NOT: m0
154; SICIVI: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000155; GCN: ds_read_u16
Tom Stellard115a6152016-11-10 16:02:37 +0000156; FIXME: Need to optimize this sequence to avoid extra shift on VI.
Matt Arsenault7aad8fd2017-01-24 22:02:15 +0000157; t23: i16 = srl t39, Constant:i32<8>
Tom Stellard115a6152016-11-10 16:02:37 +0000158; t31: i32 = any_extend t23
159; t33: i32 = sign_extend_inreg t31, ValueType:ch:i8
160
161; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
162; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
163
164; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}}
165; VI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
166; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000167
Jan Vesely38814fa2016-08-27 19:09:43 +0000168; EG: LDS_USHORT_READ_RET
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000169; EG-DAG: BFE_INT
170; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000171define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000172 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
173 %ext = sext <2 x i8> %load to <2 x i32>
174 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
175 ret void
176}
177
178; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000179; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000180; GCN: ds_read_b32
181
Tom Stellard115a6152016-11-10 16:02:37 +0000182; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
183; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, {{v[0-9]+}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000184; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
185; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
Jan Vesely38814fa2016-08-27 19:09:43 +0000186
187; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000188define amdgpu_kernel void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000189entry:
190 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
191 %ext = zext <3 x i8> %ld to <3 x i32>
192 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
193 ret void
194}
195
196; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32:
197; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000198; GFX9-NOT: m0
199; SICIVI: s_mov_b32 m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000200; GCN: ds_read_b32
201
202; GCN-DAG: v_bfe_i32
203; GCN-DAG: v_bfe_i32
204; GCN-DAG: v_bfe_i32
205; GCN-DAG: v_bfe_i32
206
207; GCN-DAG: ds_write_b64
208; GCN-DAG: ds_write_b32
209
Jan Vesely38814fa2016-08-27 19:09:43 +0000210; EG: LDS_READ_RET
211; EG-DAG: BFE_INT
212; EG-DAG: BFE_INT
213; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000214define amdgpu_kernel void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000215entry:
216 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
217 %ext = sext <3 x i8> %ld to <3 x i32>
218 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
219 ret void
220}
221
222; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32:
223; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000224; GFX9-NOT: m0
225; SICIVI: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000226; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000227
Jan Vesely38814fa2016-08-27 19:09:43 +0000228; EG: LDS_READ_RET
229; EG-DAG: BFE_UINT
230; EG-DAG: BFE_UINT
231; EG-DAG: BFE_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000232define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000233 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
234 %ext = zext <4 x i8> %load to <4 x i32>
235 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
236 ret void
237}
238
239; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32:
240; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000241; GFX9-NOT: m0
242; SICIVI: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000243; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000244
Jan Vesely38814fa2016-08-27 19:09:43 +0000245; EG-DAG: LDS_READ_RET
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000246; EG-DAG: BFE_INT
247; EG-DAG: BFE_INT
248; EG-DAG: BFE_INT
249; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000250define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000251 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
252 %ext = sext <4 x i8> %load to <4 x i32>
253 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
254 ret void
255}
256
257; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000258; SICIVI: s_mov_b32 m0
259; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000260
261; EG-DAG: LDS_READ_RET
262; EG-DAG: LDS_READ_RET
263; EG-DAG: BFE_UINT
264; EG-DAG: BFE_UINT
265; EG-DAG: BFE_UINT
266; EG-DAG: BFE_UINT
267; EG-DAG: BFE_UINT
268; EG-DAG: BFE_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000269define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000270 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
271 %ext = zext <8 x i8> %load to <8 x i32>
272 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
273 ret void
274}
275
276; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000277; SICIVI: s_mov_b32 m0
278; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000279
280; EG-DAG: LDS_READ_RET
281; EG-DAG: LDS_READ_RET
282; EG-DAG: BFE_INT
283; EG-DAG: BFE_INT
284; EG-DAG: BFE_INT
285; EG-DAG: BFE_INT
286; EG-DAG: BFE_INT
287; EG-DAG: BFE_INT
288; EG-DAG: BFE_INT
289; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000290define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000291 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
292 %ext = sext <8 x i8> %load to <8 x i32>
293 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
294 ret void
295}
296
297; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000298; SICIVI: s_mov_b32 m0
299; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000300
301; EG-DAG: LDS_READ_RET
302; EG-DAG: LDS_READ_RET
303; EG-DAG: LDS_READ_RET
304; EG-DAG: LDS_READ_RET
305; EG-DAG: BFE_UINT
306; EG-DAG: BFE_UINT
307; EG-DAG: BFE_UINT
308; EG-DAG: BFE_UINT
309; EG-DAG: BFE_UINT
310; EG-DAG: BFE_UINT
311; EG-DAG: BFE_UINT
312; EG-DAG: BFE_UINT
313; EG-DAG: BFE_UINT
314; EG-DAG: BFE_UINT
315; EG-DAG: BFE_UINT
316; EG-DAG: BFE_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000317define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000318 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
319 %ext = zext <16 x i8> %load to <16 x i32>
320 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
321 ret void
322}
323
324; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000325; SICIVI: s_mov_b32 m0
326; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000327
328; EG-DAG: LDS_READ_RET
329; EG-DAG: LDS_READ_RET
330; EG-DAG: LDS_READ_RET
331; EG-DAG: LDS_READ_RET
332; EG-DAG: BFE_INT
333; EG-DAG: BFE_INT
334; EG-DAG: BFE_INT
335; EG-DAG: BFE_INT
336; EG-DAG: BFE_INT
337; EG-DAG: BFE_INT
338; EG-DAG: BFE_INT
339; EG-DAG: BFE_INT
340; EG-DAG: BFE_INT
341; EG-DAG: BFE_INT
342; EG-DAG: BFE_INT
343; EG-DAG: BFE_INT
344; EG-DAG: BFE_INT
345; EG-DAG: BFE_INT
346; EG-DAG: BFE_INT
347; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000348define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000349 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
350 %ext = sext <16 x i8> %load to <16 x i32>
351 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
352 ret void
353}
354
355; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000356; SICIVI: s_mov_b32 m0
357; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000358
359; EG-DAG: LDS_READ_RET
360; EG-DAG: LDS_READ_RET
361; EG-DAG: LDS_READ_RET
362; EG-DAG: LDS_READ_RET
363; EG-DAG: LDS_READ_RET
364; EG-DAG: LDS_READ_RET
365; EG-DAG: LDS_READ_RET
366; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000367define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000368 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
369 %ext = zext <32 x i8> %load to <32 x i32>
370 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
371 ret void
372}
373
374; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000375; SICIVI: s_mov_b32 m0
376; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000377
378; EG-DAG: LDS_READ_RET
379; EG-DAG: LDS_READ_RET
380; EG-DAG: LDS_READ_RET
381; EG-DAG: LDS_READ_RET
382; EG-DAG: LDS_READ_RET
383; EG-DAG: LDS_READ_RET
384; EG-DAG: LDS_READ_RET
385; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000386define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000387 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
388 %ext = sext <32 x i8> %load to <32 x i32>
389 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
390 ret void
391}
392
393; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000394; SICIVI: s_mov_b32 m0
395; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000396
397; EG-DAG: LDS_READ_RET
398; EG-DAG: LDS_READ_RET
399; EG-DAG: LDS_READ_RET
400; EG-DAG: LDS_READ_RET
401; EG-DAG: LDS_READ_RET
402; EG-DAG: LDS_READ_RET
403; EG-DAG: LDS_READ_RET
404; EG-DAG: LDS_READ_RET
405; EG-DAG: LDS_READ_RET
406; EG-DAG: LDS_READ_RET
407; EG-DAG: LDS_READ_RET
408; EG-DAG: LDS_READ_RET
409; EG-DAG: LDS_READ_RET
410; EG-DAG: LDS_READ_RET
411; EG-DAG: LDS_READ_RET
412; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000413define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000414 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
415 %ext = zext <64 x i8> %load to <64 x i32>
416 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
417 ret void
418}
419
420; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000421; SICIVI: s_mov_b32 m0
422; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000423
424; EG-DAG: LDS_READ_RET
425; EG-DAG: LDS_READ_RET
426; EG-DAG: LDS_READ_RET
427; EG-DAG: LDS_READ_RET
428; EG-DAG: LDS_READ_RET
429; EG-DAG: LDS_READ_RET
430; EG-DAG: LDS_READ_RET
431; EG-DAG: LDS_READ_RET
432; EG-DAG: LDS_READ_RET
433; EG-DAG: LDS_READ_RET
434; EG-DAG: LDS_READ_RET
435; EG-DAG: LDS_READ_RET
436; EG-DAG: LDS_READ_RET
437; EG-DAG: LDS_READ_RET
438; EG-DAG: LDS_READ_RET
439; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000440define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000441 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
442 %ext = sext <64 x i8> %load to <64 x i32>
443 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
444 ret void
445}
446
447; FUNC-LABEL: {{^}}local_zextload_i8_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000448; SICIVI: s_mov_b32 m0
449; GFX9-NOT: m0
450
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000451; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
452; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]],
453; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000454
455; EG: LDS_UBYTE_READ_RET
456; EG: MOV {{.*}}, literal
457; EG: 0.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000458define amdgpu_kernel void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000459 %a = load i8, i8 addrspace(3)* %in
460 %ext = zext i8 %a to i64
461 store i64 %ext, i64 addrspace(3)* %out
462 ret void
463}
464
465; FUNC-LABEL: {{^}}local_sextload_i8_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000466; SICIVI: s_mov_b32 m0
467; GFX9-NOT: m0
468
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000469; GCN: ds_read_i8 v[[LO:[0-9]+]],
470; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
471
472; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000473
474; EG: LDS_UBYTE_READ_RET
475; EG: ASHR
476; TODO: why not 7?
477; EG: 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000478define amdgpu_kernel void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000479 %a = load i8, i8 addrspace(3)* %in
480 %ext = sext i8 %a to i64
481 store i64 %ext, i64 addrspace(3)* %out
482 ret void
483}
484
485; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000486; SICIVI: s_mov_b32 m0
487; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000488
489; EG: LDS_UBYTE_READ_RET
490; EG: MOV {{.*}}, literal
491; TODO: merge?
492; EG: 0.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000493define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000494 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
495 %ext = zext <1 x i8> %load to <1 x i64>
496 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
497 ret void
498}
499
500; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000501; SICIVI: s_mov_b32 m0
502; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000503
504; EG: LDS_UBYTE_READ_RET
505; EG: ASHR
506; TODO: why not 7?
507; EG: 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000508define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000509 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
510 %ext = sext <1 x i8> %load to <1 x i64>
511 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
512 ret void
513}
514
515; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000516; SICIVI: s_mov_b32 m0
517; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000518
519; EG: LDS_USHORT_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000520define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000521 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
522 %ext = zext <2 x i8> %load to <2 x i64>
523 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
524 ret void
525}
526
527; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000528; SICIVI: s_mov_b32 m0
529; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000530
531; EG: LDS_USHORT_READ_RET
532; EG: BFE_INT
533; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000534define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000535 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
536 %ext = sext <2 x i8> %load to <2 x i64>
537 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
538 ret void
539}
540
541; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000542; SICIVI: s_mov_b32 m0
543; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000544
545; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000546define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000547 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
548 %ext = zext <4 x i8> %load to <4 x i64>
549 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
550 ret void
551}
552
553; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000554; SICIVI: s_mov_b32 m0
555; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000556
557; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000558define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000559 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
560 %ext = sext <4 x i8> %load to <4 x i64>
561 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
562 ret void
563}
564
565; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000566; SICIVI: s_mov_b32 m0
567; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000568
569; EG: LDS_READ_RET
570; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000571define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000572 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
573 %ext = zext <8 x i8> %load to <8 x i64>
574 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
575 ret void
576}
577
578; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000579; SICIVI: s_mov_b32 m0
580; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000581
582; EG: LDS_READ_RET
583; EG: LDS_READ_RET
584; EG-DAG: ASHR
585; EG-DAG: ASHR
586; EG-DAG: BFE_INT
587; EG-DAG: BFE_INT
588; EG-DAG: BFE_INT
589; EG-DAG: BFE_INT
590; EG-DAG: BFE_INT
591; EG-DAG: BFE_INT
592; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000593define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000594 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
595 %ext = sext <8 x i8> %load to <8 x i64>
596 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
597 ret void
598}
599
600; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000601; SICIVI: s_mov_b32 m0
602; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000603
604; EG: LDS_READ_RET
605; EG: LDS_READ_RET
606; EG: LDS_READ_RET
607; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000608define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000609 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
610 %ext = zext <16 x i8> %load to <16 x i64>
611 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
612 ret void
613}
614
615; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000616; SICIVI: s_mov_b32 m0
617; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000618
619; EG: LDS_READ_RET
620; EG: LDS_READ_RET
621; EG: LDS_READ_RET
622; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000623define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000624 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
625 %ext = sext <16 x i8> %load to <16 x i64>
626 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
627 ret void
628}
629
630; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000631; SICIVI: s_mov_b32 m0
632; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000633
634; EG: LDS_READ_RET
635; EG: LDS_READ_RET
636; EG: LDS_READ_RET
637; EG: LDS_READ_RET
638; EG: LDS_READ_RET
639; EG: LDS_READ_RET
640; EG: LDS_READ_RET
641; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000642define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000643 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
644 %ext = zext <32 x i8> %load to <32 x i64>
645 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
646 ret void
647}
648
649; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000650; SICIVI: s_mov_b32 m0
651; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000652
653; EG: LDS_READ_RET
654; EG: LDS_READ_RET
655; EG: LDS_READ_RET
656; EG: LDS_READ_RET
657; EG: LDS_READ_RET
658; EG: LDS_READ_RET
659; EG: LDS_READ_RET
660; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000661define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000662 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
663 %ext = sext <32 x i8> %load to <32 x i64>
664 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
665 ret void
666}
667
668; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000669; define amdgpu_kernel void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000670; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
671; %ext = zext <64 x i8> %load to <64 x i64>
672; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
673; ret void
674; }
675
676; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000677; define amdgpu_kernel void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000678; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
679; %ext = sext <64 x i8> %load to <64 x i64>
680; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
681; ret void
682; }
683
684; FUNC-LABEL: {{^}}local_zextload_i8_to_i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000685; SICIVI: s_mov_b32 m0
686; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000687; GCN: ds_read_u8 v[[VAL:[0-9]+]],
688; GCN: ds_write_b16 v[[VAL:[0-9]+]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000689
690; EG: LDS_UBYTE_READ_RET
691; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000692define amdgpu_kernel void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000693 %a = load i8, i8 addrspace(3)* %in
694 %ext = zext i8 %a to i16
695 store i16 %ext, i16 addrspace(3)* %out
696 ret void
697}
698
699; FUNC-LABEL: {{^}}local_sextload_i8_to_i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000700; SICIVI: s_mov_b32 m0
701; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000702; GCN: ds_read_i8 v[[VAL:[0-9]+]],
703; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000704
705; EG: LDS_UBYTE_READ_RET
706; EG: BFE_INT
707; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000708define amdgpu_kernel void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000709 %a = load i8, i8 addrspace(3)* %in
710 %ext = sext i8 %a to i16
711 store i16 %ext, i16 addrspace(3)* %out
712 ret void
713}
714
715; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000716; SICIVI: s_mov_b32 m0
717; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000718
719; EG: LDS_UBYTE_READ_RET
720; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000721define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000722 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
723 %ext = zext <1 x i8> %load to <1 x i16>
724 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
725 ret void
726}
727
728; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000729; SICIVI: s_mov_b32 m0
730; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000731
732; EG: LDS_UBYTE_READ_RET
733; EG: BFE_INT
734; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000735define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000736 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
737 %ext = sext <1 x i8> %load to <1 x i16>
738 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
739 ret void
740}
741
742; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000743; SICIVI: s_mov_b32 m0
744; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000745
746; EG: LDS_USHORT_READ_RET
747; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000748define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000749 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
750 %ext = zext <2 x i8> %load to <2 x i16>
751 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
752 ret void
753}
754
755; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000756; SICIVI: s_mov_b32 m0
757; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000758
759; EG: LDS_USHORT_READ_RET
760; EG: BFE_INT
761; EG: BFE_INT
762; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000763define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000764 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
765 %ext = sext <2 x i8> %load to <2 x i16>
766 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
767 ret void
768}
769
770; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000771; SICIVI: s_mov_b32 m0
772; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000773
774; EG: LDS_READ_RET
775; EG: LDS_WRITE
776; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000777define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000778 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
779 %ext = zext <4 x i8> %load to <4 x i16>
780 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
781 ret void
782}
783
784; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000785; SICIVI: s_mov_b32 m0
786; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000787
788; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000789; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000790; EG-DAG: BFE_INT
791; EG-DAG: BFE_INT
792; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000793; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000794; EG: LDS_WRITE
795; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000796define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000797 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
798 %ext = sext <4 x i8> %load to <4 x i16>
799 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
800 ret void
801}
802
803; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000804; SICIVI: s_mov_b32 m0
805; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000806
807; EG: LDS_READ_RET
808; EG: LDS_READ_RET
809; EG: LDS_WRITE
810; EG: LDS_WRITE
811; EG: LDS_WRITE
812; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000813define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000814 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
815 %ext = zext <8 x i8> %load to <8 x i16>
816 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
817 ret void
818}
819
820; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000821; SICIVI: s_mov_b32 m0
822; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000823
824; EG: LDS_READ_RET
825; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000826; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000827; EG-DAG: BFE_INT
828; EG-DAG: BFE_INT
829; EG-DAG: BFE_INT
830; EG-DAG: BFE_INT
831; EG-DAG: BFE_INT
832; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000833; EG-DAG: BFE_INT
834; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000835; EG: LDS_WRITE
836; EG: LDS_WRITE
837; EG: LDS_WRITE
838; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000839define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000840 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
841 %ext = sext <8 x i8> %load to <8 x i16>
842 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
843 ret void
844}
845
846; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000847; SICIVI: s_mov_b32 m0
848; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000849
850; EG: LDS_READ_RET
851; EG: LDS_READ_RET
852; EG: LDS_READ_RET
853; EG: LDS_READ_RET
854; EG: LDS_WRITE
855; EG: LDS_WRITE
856; EG: LDS_WRITE
857; EG: LDS_WRITE
858; EG: LDS_WRITE
859; EG: LDS_WRITE
860; EG: LDS_WRITE
861; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000862define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000863 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
864 %ext = zext <16 x i8> %load to <16 x i16>
865 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
866 ret void
867}
868
869; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000870; SICIVI: s_mov_b32 m0
871; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000872
873; EG: LDS_READ_RET
874; EG: LDS_READ_RET
875; EG: LDS_READ_RET
876; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000877; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000878; EG-DAG: BFE_INT
879; EG-DAG: BFE_INT
880; EG-DAG: BFE_INT
881; EG-DAG: BFE_INT
882; EG-DAG: BFE_INT
883; EG-DAG: BFE_INT
884; EG-DAG: BFE_INT
885; EG-DAG: BFE_INT
886; EG-DAG: BFE_INT
887; EG-DAG: BFE_INT
888; EG-DAG: BFE_INT
889; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000890; EG-DAG: BFE_INT
891; EG-DAG: BFE_INT
892; EG-DAG: BFE_INT
893; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000894; EG: LDS_WRITE
895; EG: LDS_WRITE
896; EG: LDS_WRITE
897; EG: LDS_WRITE
898; EG: LDS_WRITE
899; EG: LDS_WRITE
900; EG: LDS_WRITE
901; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000902define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000903 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
904 %ext = sext <16 x i8> %load to <16 x i16>
905 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
906 ret void
907}
908
909; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000910; SICIVI: s_mov_b32 m0
911; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000912
913; EG: LDS_READ_RET
914; EG: LDS_READ_RET
915; EG: LDS_READ_RET
916; EG: LDS_READ_RET
917; EG: LDS_READ_RET
918; EG: LDS_READ_RET
919; EG: LDS_READ_RET
920; EG: LDS_READ_RET
921; EG: LDS_WRITE
922; EG: LDS_WRITE
923; EG: LDS_WRITE
924; EG: LDS_WRITE
925; EG: LDS_WRITE
926; EG: LDS_WRITE
927; EG: LDS_WRITE
928; EG: LDS_WRITE
929; EG: LDS_WRITE
930; EG: LDS_WRITE
931; EG: LDS_WRITE
932; EG: LDS_WRITE
933; EG: LDS_WRITE
934; EG: LDS_WRITE
935; EG: LDS_WRITE
936; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000937define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000938 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
939 %ext = zext <32 x i8> %load to <32 x i16>
940 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
941 ret void
942}
943
944; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000945; SICIVI: s_mov_b32 m0
946; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000947
948; EG: LDS_READ_RET
949; EG: LDS_READ_RET
950; EG: LDS_READ_RET
951; EG: LDS_READ_RET
952; EG: LDS_READ_RET
953; EG: LDS_READ_RET
954; EG: LDS_READ_RET
955; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000956; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000957; EG-DAG: BFE_INT
958; EG-DAG: BFE_INT
959; EG-DAG: BFE_INT
960; EG-DAG: BFE_INT
961; EG-DAG: BFE_INT
962; EG-DAG: BFE_INT
963; EG-DAG: BFE_INT
964; EG-DAG: BFE_INT
965; EG-DAG: BFE_INT
966; EG-DAG: BFE_INT
967; EG-DAG: BFE_INT
968; EG-DAG: BFE_INT
969; EG-DAG: BFE_INT
970; EG-DAG: BFE_INT
971; EG-DAG: BFE_INT
972; EG-DAG: BFE_INT
973; EG-DAG: BFE_INT
974; EG-DAG: BFE_INT
975; EG-DAG: BFE_INT
976; EG-DAG: BFE_INT
977; EG-DAG: BFE_INT
978; EG-DAG: BFE_INT
979; EG-DAG: BFE_INT
980; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000981; EG-DAG: BFE_INT
982; EG-DAG: BFE_INT
983; EG-DAG: BFE_INT
984; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000985; EG: LDS_WRITE
986; EG: LDS_WRITE
987; EG: LDS_WRITE
988; EG: LDS_WRITE
989; EG: LDS_WRITE
990; EG: LDS_WRITE
991; EG: LDS_WRITE
992; EG: LDS_WRITE
993; EG: LDS_WRITE
994; EG: LDS_WRITE
995; EG: LDS_WRITE
996; EG: LDS_WRITE
997; EG: LDS_WRITE
998; EG: LDS_WRITE
999; EG: LDS_WRITE
1000; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00001001define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001002 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
1003 %ext = sext <32 x i8> %load to <32 x i16>
1004 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
1005 ret void
1006}
1007
1008; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00001009; define amdgpu_kernel void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001010; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
1011; %ext = zext <64 x i8> %load to <64 x i16>
1012; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
1013; ret void
1014; }
1015
1016; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00001017; define amdgpu_kernel void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001018; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
1019; %ext = sext <64 x i8> %load to <64 x i16>
1020; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
1021; ret void
1022; }
1023
1024attributes #0 = { nounwind }