blob: 898d35d60a21fd002d3652c21078ffcc50a92b18 [file] [log] [blame]
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00001; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SICIVI,FUNC %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
Yaxun Liu35845f02017-11-10 02:03:28 +00004; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00005
Farhana Aleenc6c9dc82018-03-16 18:12:00 +00006; Testing for ds_read/write_b128
Farhana Aleena7cb3112018-03-09 17:41:39 +00007; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
8; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00009
10; FUNC-LABEL: {{^}}local_load_i8:
11; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000012; SICIVI: s_mov_b32 m0
13; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000014; GCN: ds_read_u8
15
16; EG: LDS_UBYTE_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000017define amdgpu_kernel void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000018entry:
19 %ld = load i8, i8 addrspace(3)* %in
20 store i8 %ld, i8 addrspace(3)* %out
21 ret void
22}
23
24; FUNC-LABEL: {{^}}local_load_v2i8:
25; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000026; SICIVI: s_mov_b32 m0
27; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000028; GCN: ds_read_u16
29
30; EG: LDS_USHORT_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000031define amdgpu_kernel void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000032entry:
33 %ld = load <2 x i8>, <2 x i8> addrspace(3)* %in
34 store <2 x i8> %ld, <2 x i8> addrspace(3)* %out
35 ret void
36}
37
38; FUNC-LABEL: {{^}}local_load_v3i8:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000039; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000040; GCN: ds_read_b32
41
42; EG: DS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000043define amdgpu_kernel void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000044entry:
45 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
46 store <3 x i8> %ld, <3 x i8> addrspace(3)* %out
47 ret void
48}
49
50; FUNC-LABEL: {{^}}local_load_v4i8:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000051; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000052; GCN: ds_read_b32
53
54; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000055define amdgpu_kernel void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000056entry:
57 %ld = load <4 x i8>, <4 x i8> addrspace(3)* %in
58 store <4 x i8> %ld, <4 x i8> addrspace(3)* %out
59 ret void
60}
61
62; FUNC-LABEL: {{^}}local_load_v8i8:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000063; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000064; GCN: ds_read_b64
65
66; EG: LDS_READ_RET
67; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000069entry:
70 %ld = load <8 x i8>, <8 x i8> addrspace(3)* %in
71 store <8 x i8> %ld, <8 x i8> addrspace(3)* %out
72 ret void
73}
74
75; FUNC-LABEL: {{^}}local_load_v16i8:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000076; GFX9-NOT: m0
Tom Stellarde175d8a2016-08-26 21:36:47 +000077; GCN: ds_read2_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
78; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset1:1{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000079
80; EG: LDS_READ_RET
81; EG: LDS_READ_RET
82; EG: LDS_READ_RET
83; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000084define amdgpu_kernel void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000085entry:
86 %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in
87 store <16 x i8> %ld, <16 x i8> addrspace(3)* %out
88 ret void
89}
90
91; FUNC-LABEL: {{^}}local_zextload_i8_to_i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000092; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000093; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000094; SICIVI: s_mov_b32 m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000095; GCN: ds_read_u8
96
97; EG: LDS_UBYTE_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000098define amdgpu_kernel void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000099 %a = load i8, i8 addrspace(3)* %in
100 %ext = zext i8 %a to i32
101 store i32 %ext, i32 addrspace(3)* %out
102 ret void
103}
104
105; FUNC-LABEL: {{^}}local_sextload_i8_to_i32:
106; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000107; GFX9-NOT: m0
108; SICIVI: s_mov_b32 m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000109; GCN: ds_read_i8
110
111; EG: LDS_UBYTE_READ_RET
112; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000113define amdgpu_kernel void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000114 %ld = load i8, i8 addrspace(3)* %in
115 %ext = sext i8 %ld to i32
116 store i32 %ext, i32 addrspace(3)* %out
117 ret void
118}
119
120; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000121
122; EG: LDS_UBYTE_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000123define amdgpu_kernel void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000124 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
125 %ext = zext <1 x i8> %load to <1 x i32>
126 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
127 ret void
128}
129
130; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000131; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000132
133; EG: LDS_UBYTE_READ_RET
134; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000135define amdgpu_kernel void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000136 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
137 %ext = sext <1 x i8> %load to <1 x i32>
138 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
139 ret void
140}
141
142; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000143; GFX9-NOT: m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000144; GCN: ds_read_u16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000145
Jan Vesely38814fa2016-08-27 19:09:43 +0000146; EG: LDS_USHORT_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000147define amdgpu_kernel void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000148 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
149 %ext = zext <2 x i8> %load to <2 x i32>
150 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
151 ret void
152}
153
154; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32:
155; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000156; GFX9-NOT: m0
157; SICIVI: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000158; GCN: ds_read_u16
Tom Stellard115a6152016-11-10 16:02:37 +0000159; FIXME: Need to optimize this sequence to avoid extra shift on VI.
Matt Arsenault7aad8fd2017-01-24 22:02:15 +0000160; t23: i16 = srl t39, Constant:i32<8>
Tom Stellard115a6152016-11-10 16:02:37 +0000161; t31: i32 = any_extend t23
162; t33: i32 = sign_extend_inreg t31, ValueType:ch:i8
163
164; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
165; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
166
167; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}}
168; VI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
169; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000170
Jan Vesely38814fa2016-08-27 19:09:43 +0000171; EG: LDS_USHORT_READ_RET
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000172; EG-DAG: BFE_INT
173; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000174define amdgpu_kernel void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000175 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
176 %ext = sext <2 x i8> %load to <2 x i32>
177 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
178 ret void
179}
180
181; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000182; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000183; GCN: ds_read_b32
184
Tom Stellard115a6152016-11-10 16:02:37 +0000185; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
186; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, {{v[0-9]+}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000187; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
188; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
Jan Vesely38814fa2016-08-27 19:09:43 +0000189
190; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000191define amdgpu_kernel void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000192entry:
193 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
194 %ext = zext <3 x i8> %ld to <3 x i32>
195 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
196 ret void
197}
198
199; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32:
200; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000201; GFX9-NOT: m0
202; SICIVI: s_mov_b32 m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000203; GCN: ds_read_b32
204
205; GCN-DAG: v_bfe_i32
206; GCN-DAG: v_bfe_i32
207; GCN-DAG: v_bfe_i32
208; GCN-DAG: v_bfe_i32
209
210; GCN-DAG: ds_write_b64
211; GCN-DAG: ds_write_b32
212
Jan Vesely38814fa2016-08-27 19:09:43 +0000213; EG: LDS_READ_RET
214; EG-DAG: BFE_INT
215; EG-DAG: BFE_INT
216; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000217define amdgpu_kernel void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000218entry:
219 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in
220 %ext = sext <3 x i8> %ld to <3 x i32>
221 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
222 ret void
223}
224
225; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32:
226; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000227; GFX9-NOT: m0
228; SICIVI: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000229; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000230
Jan Vesely38814fa2016-08-27 19:09:43 +0000231; EG: LDS_READ_RET
232; EG-DAG: BFE_UINT
233; EG-DAG: BFE_UINT
234; EG-DAG: BFE_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000235define amdgpu_kernel void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000236 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
237 %ext = zext <4 x i8> %load to <4 x i32>
238 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
239 ret void
240}
241
242; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32:
243; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000244; GFX9-NOT: m0
245; SICIVI: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000246; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000247
Jan Vesely38814fa2016-08-27 19:09:43 +0000248; EG-DAG: LDS_READ_RET
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000249; EG-DAG: BFE_INT
250; EG-DAG: BFE_INT
251; EG-DAG: BFE_INT
252; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000253define amdgpu_kernel void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000254 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
255 %ext = sext <4 x i8> %load to <4 x i32>
256 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
257 ret void
258}
259
260; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000261; SICIVI: s_mov_b32 m0
262; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000263
264; EG-DAG: LDS_READ_RET
265; EG-DAG: LDS_READ_RET
266; EG-DAG: BFE_UINT
267; EG-DAG: BFE_UINT
268; EG-DAG: BFE_UINT
269; EG-DAG: BFE_UINT
270; EG-DAG: BFE_UINT
271; EG-DAG: BFE_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000272define amdgpu_kernel void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000273 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
274 %ext = zext <8 x i8> %load to <8 x i32>
275 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
276 ret void
277}
278
279; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000280; SICIVI: s_mov_b32 m0
281; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000282
283; EG-DAG: LDS_READ_RET
284; EG-DAG: LDS_READ_RET
285; EG-DAG: BFE_INT
286; EG-DAG: BFE_INT
287; EG-DAG: BFE_INT
288; EG-DAG: BFE_INT
289; EG-DAG: BFE_INT
290; EG-DAG: BFE_INT
291; EG-DAG: BFE_INT
292; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000293define amdgpu_kernel void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000294 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
295 %ext = sext <8 x i8> %load to <8 x i32>
296 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
297 ret void
298}
299
300; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000301; SICIVI: s_mov_b32 m0
302; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000303
304; EG-DAG: LDS_READ_RET
305; EG-DAG: LDS_READ_RET
306; EG-DAG: LDS_READ_RET
307; EG-DAG: LDS_READ_RET
308; EG-DAG: BFE_UINT
309; EG-DAG: BFE_UINT
310; EG-DAG: BFE_UINT
311; EG-DAG: BFE_UINT
312; EG-DAG: BFE_UINT
313; EG-DAG: BFE_UINT
314; EG-DAG: BFE_UINT
315; EG-DAG: BFE_UINT
316; EG-DAG: BFE_UINT
317; EG-DAG: BFE_UINT
318; EG-DAG: BFE_UINT
319; EG-DAG: BFE_UINT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000320define amdgpu_kernel void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000321 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
322 %ext = zext <16 x i8> %load to <16 x i32>
323 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
324 ret void
325}
326
327; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000328; SICIVI: s_mov_b32 m0
329; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000330
331; EG-DAG: LDS_READ_RET
332; EG-DAG: LDS_READ_RET
333; EG-DAG: LDS_READ_RET
334; EG-DAG: LDS_READ_RET
335; EG-DAG: BFE_INT
336; EG-DAG: BFE_INT
337; EG-DAG: BFE_INT
338; EG-DAG: BFE_INT
339; EG-DAG: BFE_INT
340; EG-DAG: BFE_INT
341; EG-DAG: BFE_INT
342; EG-DAG: BFE_INT
343; EG-DAG: BFE_INT
344; EG-DAG: BFE_INT
345; EG-DAG: BFE_INT
346; EG-DAG: BFE_INT
347; EG-DAG: BFE_INT
348; EG-DAG: BFE_INT
349; EG-DAG: BFE_INT
350; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000351define amdgpu_kernel void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000352 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
353 %ext = sext <16 x i8> %load to <16 x i32>
354 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
355 ret void
356}
357
358; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000359; SICIVI: s_mov_b32 m0
360; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000361
362; EG-DAG: LDS_READ_RET
363; EG-DAG: LDS_READ_RET
364; EG-DAG: LDS_READ_RET
365; EG-DAG: LDS_READ_RET
366; EG-DAG: LDS_READ_RET
367; EG-DAG: LDS_READ_RET
368; EG-DAG: LDS_READ_RET
369; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000370define amdgpu_kernel void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000371 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
372 %ext = zext <32 x i8> %load to <32 x i32>
373 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
374 ret void
375}
376
377; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000378; SICIVI: s_mov_b32 m0
379; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000380
381; EG-DAG: LDS_READ_RET
382; EG-DAG: LDS_READ_RET
383; EG-DAG: LDS_READ_RET
384; EG-DAG: LDS_READ_RET
385; EG-DAG: LDS_READ_RET
386; EG-DAG: LDS_READ_RET
387; EG-DAG: LDS_READ_RET
388; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000389define amdgpu_kernel void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000390 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
391 %ext = sext <32 x i8> %load to <32 x i32>
392 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
393 ret void
394}
395
396; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000397; SICIVI: s_mov_b32 m0
398; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000399
400; EG-DAG: LDS_READ_RET
401; EG-DAG: LDS_READ_RET
402; EG-DAG: LDS_READ_RET
403; EG-DAG: LDS_READ_RET
404; EG-DAG: LDS_READ_RET
405; EG-DAG: LDS_READ_RET
406; EG-DAG: LDS_READ_RET
407; EG-DAG: LDS_READ_RET
408; EG-DAG: LDS_READ_RET
409; EG-DAG: LDS_READ_RET
410; EG-DAG: LDS_READ_RET
411; EG-DAG: LDS_READ_RET
412; EG-DAG: LDS_READ_RET
413; EG-DAG: LDS_READ_RET
414; EG-DAG: LDS_READ_RET
415; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000416define amdgpu_kernel void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000417 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
418 %ext = zext <64 x i8> %load to <64 x i32>
419 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
420 ret void
421}
422
423; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000424; SICIVI: s_mov_b32 m0
425; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000426
427; EG-DAG: LDS_READ_RET
428; EG-DAG: LDS_READ_RET
429; EG-DAG: LDS_READ_RET
430; EG-DAG: LDS_READ_RET
431; EG-DAG: LDS_READ_RET
432; EG-DAG: LDS_READ_RET
433; EG-DAG: LDS_READ_RET
434; EG-DAG: LDS_READ_RET
435; EG-DAG: LDS_READ_RET
436; EG-DAG: LDS_READ_RET
437; EG-DAG: LDS_READ_RET
438; EG-DAG: LDS_READ_RET
439; EG-DAG: LDS_READ_RET
440; EG-DAG: LDS_READ_RET
441; EG-DAG: LDS_READ_RET
442; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000443define amdgpu_kernel void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000444 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
445 %ext = sext <64 x i8> %load to <64 x i32>
446 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
447 ret void
448}
449
450; FUNC-LABEL: {{^}}local_zextload_i8_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000451; SICIVI: s_mov_b32 m0
452; GFX9-NOT: m0
453
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000454; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
455; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]],
456; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000457
458; EG: LDS_UBYTE_READ_RET
459; EG: MOV {{.*}}, literal
460; EG: 0.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000461define amdgpu_kernel void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000462 %a = load i8, i8 addrspace(3)* %in
463 %ext = zext i8 %a to i64
464 store i64 %ext, i64 addrspace(3)* %out
465 ret void
466}
467
468; FUNC-LABEL: {{^}}local_sextload_i8_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000469; SICIVI: s_mov_b32 m0
470; GFX9-NOT: m0
471
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000472; GCN: ds_read_i8 v[[LO:[0-9]+]],
473; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
474
475; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000476
477; EG: LDS_UBYTE_READ_RET
478; EG: ASHR
479; TODO: why not 7?
480; EG: 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000481define amdgpu_kernel void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000482 %a = load i8, i8 addrspace(3)* %in
483 %ext = sext i8 %a to i64
484 store i64 %ext, i64 addrspace(3)* %out
485 ret void
486}
487
488; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000489; SICIVI: s_mov_b32 m0
490; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000491
492; EG: LDS_UBYTE_READ_RET
493; EG: MOV {{.*}}, literal
494; TODO: merge?
495; EG: 0.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000496define amdgpu_kernel void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000497 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
498 %ext = zext <1 x i8> %load to <1 x i64>
499 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
500 ret void
501}
502
503; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000504; SICIVI: s_mov_b32 m0
505; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000506
507; EG: LDS_UBYTE_READ_RET
508; EG: ASHR
509; TODO: why not 7?
510; EG: 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000511define amdgpu_kernel void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000512 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
513 %ext = sext <1 x i8> %load to <1 x i64>
514 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
515 ret void
516}
517
518; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000519; SICIVI: s_mov_b32 m0
520; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000521
522; EG: LDS_USHORT_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000523define amdgpu_kernel void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000524 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
525 %ext = zext <2 x i8> %load to <2 x i64>
526 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
527 ret void
528}
529
530; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000531; SICIVI: s_mov_b32 m0
532; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000533
534; EG: LDS_USHORT_READ_RET
535; EG: BFE_INT
536; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000537define amdgpu_kernel void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000538 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
539 %ext = sext <2 x i8> %load to <2 x i64>
540 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
541 ret void
542}
543
544; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000545; SICIVI: s_mov_b32 m0
546; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000547
548; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000549define amdgpu_kernel void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000550 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
551 %ext = zext <4 x i8> %load to <4 x i64>
552 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
553 ret void
554}
555
556; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000557; SICIVI: s_mov_b32 m0
558; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000559
560; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000561define amdgpu_kernel void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000562 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
563 %ext = sext <4 x i8> %load to <4 x i64>
564 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
565 ret void
566}
567
568; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000569; SICIVI: s_mov_b32 m0
570; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000571
572; EG: LDS_READ_RET
573; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000574define amdgpu_kernel void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000575 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
576 %ext = zext <8 x i8> %load to <8 x i64>
577 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
578 ret void
579}
580
581; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000582; SICIVI: s_mov_b32 m0
583; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000584
585; EG: LDS_READ_RET
586; EG: LDS_READ_RET
587; EG-DAG: ASHR
588; EG-DAG: ASHR
589; EG-DAG: BFE_INT
590; EG-DAG: BFE_INT
591; EG-DAG: BFE_INT
592; EG-DAG: BFE_INT
593; EG-DAG: BFE_INT
594; EG-DAG: BFE_INT
595; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000596define amdgpu_kernel void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000597 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
598 %ext = sext <8 x i8> %load to <8 x i64>
599 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
600 ret void
601}
602
603; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000604; SICIVI: s_mov_b32 m0
605; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000606
607; EG: LDS_READ_RET
608; EG: LDS_READ_RET
609; EG: LDS_READ_RET
610; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000611define amdgpu_kernel void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000612 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
613 %ext = zext <16 x i8> %load to <16 x i64>
614 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
615 ret void
616}
617
618; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000619; SICIVI: s_mov_b32 m0
620; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000621
622; EG: LDS_READ_RET
623; EG: LDS_READ_RET
624; EG: LDS_READ_RET
625; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000626define amdgpu_kernel void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000627 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
628 %ext = sext <16 x i8> %load to <16 x i64>
629 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
630 ret void
631}
632
633; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000634; SICIVI: s_mov_b32 m0
635; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000636
637; EG: LDS_READ_RET
638; EG: LDS_READ_RET
639; EG: LDS_READ_RET
640; EG: LDS_READ_RET
641; EG: LDS_READ_RET
642; EG: LDS_READ_RET
643; EG: LDS_READ_RET
644; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000645define amdgpu_kernel void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000646 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
647 %ext = zext <32 x i8> %load to <32 x i64>
648 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
649 ret void
650}
651
652; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000653; SICIVI: s_mov_b32 m0
654; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000655
656; EG: LDS_READ_RET
657; EG: LDS_READ_RET
658; EG: LDS_READ_RET
659; EG: LDS_READ_RET
660; EG: LDS_READ_RET
661; EG: LDS_READ_RET
662; EG: LDS_READ_RET
663; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000664define amdgpu_kernel void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000665 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
666 %ext = sext <32 x i8> %load to <32 x i64>
667 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
668 ret void
669}
670
671; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000672; define amdgpu_kernel void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000673; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
674; %ext = zext <64 x i8> %load to <64 x i64>
675; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
676; ret void
677; }
678
679; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000680; define amdgpu_kernel void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000681; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
682; %ext = sext <64 x i8> %load to <64 x i64>
683; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
684; ret void
685; }
686
687; FUNC-LABEL: {{^}}local_zextload_i8_to_i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000688; SICIVI: s_mov_b32 m0
689; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000690; GCN: ds_read_u8 v[[VAL:[0-9]+]],
691; GCN: ds_write_b16 v[[VAL:[0-9]+]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000692
693; EG: LDS_UBYTE_READ_RET
694; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000695define amdgpu_kernel void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000696 %a = load i8, i8 addrspace(3)* %in
697 %ext = zext i8 %a to i16
698 store i16 %ext, i16 addrspace(3)* %out
699 ret void
700}
701
702; FUNC-LABEL: {{^}}local_sextload_i8_to_i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000703; SICIVI: s_mov_b32 m0
704; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000705; GCN: ds_read_i8 v[[VAL:[0-9]+]],
706; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000707
708; EG: LDS_UBYTE_READ_RET
709; EG: BFE_INT
710; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000711define amdgpu_kernel void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000712 %a = load i8, i8 addrspace(3)* %in
713 %ext = sext i8 %a to i16
714 store i16 %ext, i16 addrspace(3)* %out
715 ret void
716}
717
718; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000719; SICIVI: s_mov_b32 m0
720; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000721
722; EG: LDS_UBYTE_READ_RET
723; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000724define amdgpu_kernel void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000725 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
726 %ext = zext <1 x i8> %load to <1 x i16>
727 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
728 ret void
729}
730
731; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000732; SICIVI: s_mov_b32 m0
733; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000734
735; EG: LDS_UBYTE_READ_RET
736; EG: BFE_INT
737; EG: LDS_SHORT_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000738define amdgpu_kernel void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000739 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in
740 %ext = sext <1 x i8> %load to <1 x i16>
741 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out
742 ret void
743}
744
745; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000746; SICIVI: s_mov_b32 m0
747; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000748
749; EG: LDS_USHORT_READ_RET
750; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000751define amdgpu_kernel void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000752 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
753 %ext = zext <2 x i8> %load to <2 x i16>
754 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
755 ret void
756}
757
758; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000759; SICIVI: s_mov_b32 m0
760; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000761
762; EG: LDS_USHORT_READ_RET
763; EG: BFE_INT
764; EG: BFE_INT
765; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000766define amdgpu_kernel void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000767 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in
768 %ext = sext <2 x i8> %load to <2 x i16>
769 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out
770 ret void
771}
772
773; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000774; SICIVI: s_mov_b32 m0
775; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000776
777; EG: LDS_READ_RET
778; EG: LDS_WRITE
779; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000780define amdgpu_kernel void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000781 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
782 %ext = zext <4 x i8> %load to <4 x i16>
783 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
784 ret void
785}
786
787; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000788; SICIVI: s_mov_b32 m0
789; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000790
791; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000792; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000793; EG-DAG: BFE_INT
794; EG-DAG: BFE_INT
795; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000796; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000797; EG: LDS_WRITE
798; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000799define amdgpu_kernel void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000800 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in
801 %ext = sext <4 x i8> %load to <4 x i16>
802 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out
803 ret void
804}
805
806; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000807; SICIVI: s_mov_b32 m0
808; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000809
810; EG: LDS_READ_RET
811; EG: LDS_READ_RET
812; EG: LDS_WRITE
813; EG: LDS_WRITE
814; EG: LDS_WRITE
815; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000816define amdgpu_kernel void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000817 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
818 %ext = zext <8 x i8> %load to <8 x i16>
819 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
820 ret void
821}
822
823; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000824; SICIVI: s_mov_b32 m0
825; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000826
827; EG: LDS_READ_RET
828; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000829; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000830; EG-DAG: BFE_INT
831; EG-DAG: BFE_INT
832; EG-DAG: BFE_INT
833; EG-DAG: BFE_INT
834; EG-DAG: BFE_INT
835; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000836; EG-DAG: BFE_INT
837; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000838; EG: LDS_WRITE
839; EG: LDS_WRITE
840; EG: LDS_WRITE
841; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000842define amdgpu_kernel void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000843 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in
844 %ext = sext <8 x i8> %load to <8 x i16>
845 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out
846 ret void
847}
848
849; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000850; SICIVI: s_mov_b32 m0
851; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000852
853; EG: LDS_READ_RET
854; EG: LDS_READ_RET
855; EG: LDS_READ_RET
856; EG: LDS_READ_RET
857; EG: LDS_WRITE
858; EG: LDS_WRITE
859; EG: LDS_WRITE
860; EG: LDS_WRITE
861; EG: LDS_WRITE
862; EG: LDS_WRITE
863; EG: LDS_WRITE
864; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000865define amdgpu_kernel void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000866 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
867 %ext = zext <16 x i8> %load to <16 x i16>
868 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
869 ret void
870}
871
872; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000873; SICIVI: s_mov_b32 m0
874; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000875
876; EG: LDS_READ_RET
877; EG: LDS_READ_RET
878; EG: LDS_READ_RET
879; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000880; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000881; EG-DAG: BFE_INT
882; EG-DAG: BFE_INT
883; EG-DAG: BFE_INT
884; EG-DAG: BFE_INT
885; EG-DAG: BFE_INT
886; EG-DAG: BFE_INT
887; EG-DAG: BFE_INT
888; EG-DAG: BFE_INT
889; EG-DAG: BFE_INT
890; EG-DAG: BFE_INT
891; EG-DAG: BFE_INT
892; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000893; EG-DAG: BFE_INT
894; EG-DAG: BFE_INT
895; EG-DAG: BFE_INT
896; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000897; EG: LDS_WRITE
898; EG: LDS_WRITE
899; EG: LDS_WRITE
900; EG: LDS_WRITE
901; EG: LDS_WRITE
902; EG: LDS_WRITE
903; EG: LDS_WRITE
904; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000905define amdgpu_kernel void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000906 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in
907 %ext = sext <16 x i8> %load to <16 x i16>
908 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out
909 ret void
910}
911
912; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000913; SICIVI: s_mov_b32 m0
914; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000915
916; EG: LDS_READ_RET
917; EG: LDS_READ_RET
918; EG: LDS_READ_RET
919; EG: LDS_READ_RET
920; EG: LDS_READ_RET
921; EG: LDS_READ_RET
922; EG: LDS_READ_RET
923; EG: LDS_READ_RET
924; EG: LDS_WRITE
925; EG: LDS_WRITE
926; EG: LDS_WRITE
927; EG: LDS_WRITE
928; EG: LDS_WRITE
929; EG: LDS_WRITE
930; EG: LDS_WRITE
931; EG: LDS_WRITE
932; EG: LDS_WRITE
933; EG: LDS_WRITE
934; EG: LDS_WRITE
935; EG: LDS_WRITE
936; EG: LDS_WRITE
937; EG: LDS_WRITE
938; EG: LDS_WRITE
939; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000940define amdgpu_kernel void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000941 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
942 %ext = zext <32 x i8> %load to <32 x i16>
943 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
944 ret void
945}
946
947; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000948; SICIVI: s_mov_b32 m0
949; GFX9-NOT: m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000950
951; EG: LDS_READ_RET
952; EG: LDS_READ_RET
953; EG: LDS_READ_RET
954; EG: LDS_READ_RET
955; EG: LDS_READ_RET
956; EG: LDS_READ_RET
957; EG: LDS_READ_RET
958; EG: LDS_READ_RET
Jan Veselyf1705042017-01-20 21:24:26 +0000959; TODO: these do LSHR + BFE_INT, instead of just BFE_INT/ASHR
Jan Vesely38814fa2016-08-27 19:09:43 +0000960; EG-DAG: BFE_INT
961; EG-DAG: BFE_INT
962; EG-DAG: BFE_INT
963; EG-DAG: BFE_INT
964; EG-DAG: BFE_INT
965; EG-DAG: BFE_INT
966; EG-DAG: BFE_INT
967; EG-DAG: BFE_INT
968; EG-DAG: BFE_INT
969; EG-DAG: BFE_INT
970; EG-DAG: BFE_INT
971; EG-DAG: BFE_INT
972; EG-DAG: BFE_INT
973; EG-DAG: BFE_INT
974; EG-DAG: BFE_INT
975; EG-DAG: BFE_INT
976; EG-DAG: BFE_INT
977; EG-DAG: BFE_INT
978; EG-DAG: BFE_INT
979; EG-DAG: BFE_INT
980; EG-DAG: BFE_INT
981; EG-DAG: BFE_INT
982; EG-DAG: BFE_INT
983; EG-DAG: BFE_INT
Jan Veselyf1705042017-01-20 21:24:26 +0000984; EG-DAG: BFE_INT
985; EG-DAG: BFE_INT
986; EG-DAG: BFE_INT
987; EG-DAG: BFE_INT
Jan Vesely38814fa2016-08-27 19:09:43 +0000988; EG: LDS_WRITE
989; EG: LDS_WRITE
990; EG: LDS_WRITE
991; EG: LDS_WRITE
992; EG: LDS_WRITE
993; EG: LDS_WRITE
994; EG: LDS_WRITE
995; EG: LDS_WRITE
996; EG: LDS_WRITE
997; EG: LDS_WRITE
998; EG: LDS_WRITE
999; EG: LDS_WRITE
1000; EG: LDS_WRITE
1001; EG: LDS_WRITE
1002; EG: LDS_WRITE
1003; EG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00001004define amdgpu_kernel void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001005 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in
1006 %ext = sext <32 x i8> %load to <32 x i16>
1007 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out
1008 ret void
1009}
1010
1011; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00001012; define amdgpu_kernel void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001013; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
1014; %ext = zext <64 x i8> %load to <64 x i16>
1015; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
1016; ret void
1017; }
1018
1019; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +00001020; define amdgpu_kernel void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001021; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in
1022; %ext = sext <64 x i8> %load to <64 x i16>
1023; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out
1024; ret void
1025; }
1026
Farhana Aleenc6c9dc82018-03-16 18:12:00 +00001027; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load.
Farhana Aleena7cb3112018-03-09 17:41:39 +00001028; FUNC-LABEL: {{^}}local_v16i8_to_128:
Farhana Aleenc6c9dc82018-03-16 18:12:00 +00001029
Farhana Aleena7cb3112018-03-09 17:41:39 +00001030; SI-NOT: ds_read_b128
Farhana Aleenc6c9dc82018-03-16 18:12:00 +00001031; SI-NOT: ds_write_b128
1032
Farhana Aleena7cb3112018-03-09 17:41:39 +00001033; CIVI: ds_read_b128
Farhana Aleenc6c9dc82018-03-16 18:12:00 +00001034; CIVI: ds_write_b128
1035
Farhana Aleena7cb3112018-03-09 17:41:39 +00001036; EG: LDS_READ_RET
1037; EG: LDS_READ_RET
1038; EG: LDS_READ_RET
1039; EG: LDS_READ_RET
1040define amdgpu_kernel void @local_v16i8_to_128(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) {
1041 %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in, align 16
Farhana Aleenc6c9dc82018-03-16 18:12:00 +00001042 store <16 x i8> %ld, <16 x i8> addrspace(3)* %out, align 16
Farhana Aleena7cb3112018-03-09 17:41:39 +00001043 ret void
1044}
1045
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001046attributes #0 = { nounwind }