blob: d3557c14540ce64142b335c869255f4f701e9afe [file] [log] [blame]
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,GFX89,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX89,FUNC %s
Simon Pilgrimc910a702017-05-23 21:27:15 +00004; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00005
6; FUNC-LABEL: {{^}}local_load_i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00007; GFX9-NOT: m0
8; SICIVI: s_mov_b32 m0
9
Matt Arsenaultd1097a32016-06-02 19:54:26 +000010; GCN: ds_read_u16 v{{[0-9]+}}
11
Jan Vesely38814fa2016-08-27 19:09:43 +000012; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
13; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
14; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
15; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
16; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000017define amdgpu_kernel void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000018entry:
19 %ld = load i16, i16 addrspace(3)* %in
20 store i16 %ld, i16 addrspace(3)* %out
21 ret void
22}
23
24; FUNC-LABEL: {{^}}local_load_v2i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000025; GFX9-NOT: m0
26; SICIVI: s_mov_b32 m0
27
Matt Arsenaultd1097a32016-06-02 19:54:26 +000028; GCN: ds_read_b32
29
Jan Vesely38814fa2016-08-27 19:09:43 +000030; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
31; EG: LDS_READ_RET {{.*}} [[FROM]]
32; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
33; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
34; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000035define amdgpu_kernel void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000036entry:
37 %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in
38 store <2 x i16> %ld, <2 x i16> addrspace(3)* %out
39 ret void
40}
41
42; FUNC-LABEL: {{^}}local_load_v3i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000043; GFX9-NOT: m0
44; SICIVI: s_mov_b32 m0
45
Matt Arsenaultd1097a32016-06-02 19:54:26 +000046; GCN: ds_read_b64
47; GCN-DAG: ds_write_b32
48; GCN-DAG: ds_write_b16
49
50; EG-DAG: LDS_USHORT_READ_RET
51; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000052define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000053entry:
54 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
55 store <3 x i16> %ld, <3 x i16> addrspace(3)* %out
56 ret void
57}
58
59; FUNC-LABEL: {{^}}local_load_v4i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000060; GFX9-NOT: m0
61; SICIVI: s_mov_b32 m0
62
Matt Arsenaultd1097a32016-06-02 19:54:26 +000063; GCN: ds_read_b64
64
65; EG: LDS_READ_RET
66; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000067define amdgpu_kernel void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000068entry:
69 %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in
70 store <4 x i16> %ld, <4 x i16> addrspace(3)* %out
71 ret void
72}
73
74; FUNC-LABEL: {{^}}local_load_v8i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000075; GFX9-NOT: m0
76; SICIVI: s_mov_b32 m0
77
Tom Stellarde175d8a2016-08-26 21:36:47 +000078; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000079
80; EG: LDS_READ_RET
81; EG: LDS_READ_RET
82; EG: LDS_READ_RET
83; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000084define amdgpu_kernel void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000085entry:
86 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in
87 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out
88 ret void
89}
90
91; FUNC-LABEL: {{^}}local_load_v16i16:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000092; GFX9-NOT: m0
93; SICIVI: s_mov_b32 m0
94
Tom Stellardc2ff0eb2016-08-29 19:15:22 +000095; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}}
96; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000097
98
99; EG: LDS_READ_RET
100; EG: LDS_READ_RET
101; EG: LDS_READ_RET
102; EG: LDS_READ_RET
103
104; EG: LDS_READ_RET
105; EG: LDS_READ_RET
106; EG: LDS_READ_RET
107; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000108define amdgpu_kernel void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000109entry:
110 %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in
111 store <16 x i16> %ld, <16 x i16> addrspace(3)* %out
112 ret void
113}
114
115; FUNC-LABEL: {{^}}local_zextload_i16_to_i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000116; GFX9-NOT: m0
117; SICIVI: s_mov_b32 m0
118
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000119; GCN: ds_read_u16
120; GCN: ds_write_b32
121
Jan Vesely38814fa2016-08-27 19:09:43 +0000122; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
123; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
124; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
125; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
126; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000127define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000128 %a = load i16, i16 addrspace(3)* %in
129 %ext = zext i16 %a to i32
130 store i32 %ext, i32 addrspace(3)* %out
131 ret void
132}
133
134; FUNC-LABEL: {{^}}local_sextload_i16_to_i32:
135; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000136
137; GFX9-NOT: m0
138; SICIVI: s_mov_b32 m0
139
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000140; GCN: ds_read_i16
141
Jan Vesely38814fa2016-08-27 19:09:43 +0000142; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
143; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
144; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
145; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
146; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
147; EG: 16
148; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000149define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000150 %a = load i16, i16 addrspace(3)* %in
151 %ext = sext i16 %a to i32
152 store i32 %ext, i32 addrspace(3)* %out
153 ret void
154}
155
156; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000157; GFX9-NOT: m0
158; SICIVI: s_mov_b32 m0
159
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000160; GCN: ds_read_u16
Jan Vesely38814fa2016-08-27 19:09:43 +0000161
162; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
163; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
164; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
165; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
166; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000167define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000168 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
169 %ext = zext <1 x i16> %load to <1 x i32>
170 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
171 ret void
172}
173
174; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000175; GFX9-NOT: m0
176; SICIVI: s_mov_b32 m0
177
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000178; GCN: ds_read_i16
Jan Vesely38814fa2016-08-27 19:09:43 +0000179
180; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
181; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
182; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
183; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
184; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
185; EG: 16
186; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000187define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000188 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
189 %ext = sext <1 x i16> %load to <1 x i32>
190 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
191 ret void
192}
193
194; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32:
195; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000196; GFX9-NOT: m0
197; SICIVI: s_mov_b32 m0
198
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000199; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000200
Jan Vesely38814fa2016-08-27 19:09:43 +0000201; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000202define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000203 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
204 %ext = zext <2 x i16> %load to <2 x i32>
205 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
206 ret void
207}
208
209; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32:
210; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000211; GFX9-NOT: m0
212; SICIVI: s_mov_b32 m0
213
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000214; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000215
Jan Vesely38814fa2016-08-27 19:09:43 +0000216; EG: LDS_READ_RET
217; EG: BFE_INT
218; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000219define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000220 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
221 %ext = sext <2 x i16> %load to <2 x i32>
222 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
223 ret void
224}
225
226; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000227; GFX9-NOT: m0
228; SICIVI: s_mov_b32 m0
229
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000230; GCN: ds_read_b64
231; GCN-DAG: ds_write_b32
232; GCN-DAG: ds_write_b64
Jan Vesely38814fa2016-08-27 19:09:43 +0000233
234; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000235define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000236entry:
237 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
238 %ext = zext <3 x i16> %ld to <3 x i32>
239 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
240 ret void
241}
242
243; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000244; GFX9-NOT: m0
245; SICIVI: s_mov_b32 m0
246
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000247; GCN: ds_read_b64
248; GCN-DAG: ds_write_b32
249; GCN-DAG: ds_write_b64
Jan Vesely38814fa2016-08-27 19:09:43 +0000250
251; EG: LDS_READ_RET
252; EG-DAG: BFE_INT
253; EG-DAG: BFE_INT
254; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000255define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000256entry:
257 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
258 %ext = sext <3 x i16> %ld to <3 x i32>
259 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
260 ret void
261}
262
263; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32:
264; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000265; GFX9-NOT: m0
266; SICIVI: s_mov_b32 m0
267
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000268; GCN: ds_read_b64
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000269
Jan Vesely38814fa2016-08-27 19:09:43 +0000270; EG: LDS_READ_RET
271; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000272define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000273 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
274 %ext = zext <4 x i16> %load to <4 x i32>
275 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
276 ret void
277}
278
279; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32:
280; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000281; GFX9-NOT: m0
282; SICIVI: s_mov_b32 m0
283
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000284; GCN: ds_read_b64
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000285
Jan Vesely38814fa2016-08-27 19:09:43 +0000286; EG: LDS_READ_RET
287; EG: LDS_READ_RET
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000288; EG-DAG: BFE_INT
289; EG-DAG: BFE_INT
290; EG-DAG: BFE_INT
291; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000292define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000293 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
294 %ext = sext <4 x i16> %load to <4 x i32>
295 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
296 ret void
297}
298
299; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000300; GFX9-NOT: m0
301; SICIVI: s_mov_b32 m0
302
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000303; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000304
305; EG: LDS_READ_RET
306; EG: LDS_READ_RET
307; EG: LDS_READ_RET
308; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000309define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000310 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
311 %ext = zext <8 x i16> %load to <8 x i32>
312 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
313 ret void
314}
315
316; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000317; GFX9-NOT: m0
318; SICIVI: s_mov_b32 m0
319
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000320; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000321
322; EG: LDS_READ_RET
323; EG: LDS_READ_RET
324; EG: LDS_READ_RET
325; EG: LDS_READ_RET
326; EG-DAG: BFE_INT
327; EG-DAG: BFE_INT
328; EG-DAG: BFE_INT
329; EG-DAG: BFE_INT
330; EG-DAG: BFE_INT
331; EG-DAG: BFE_INT
332; EG-DAG: BFE_INT
333; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000334define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000335 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
336 %ext = sext <8 x i16> %load to <8 x i32>
337 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
338 ret void
339}
340
341; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000342; GFX9-NOT: m0
343; SICIVI: s_mov_b32 m0
344
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000345; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
346; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000347
348; GCN: ds_write2_b64
349; GCN: ds_write2_b64
350; GCN: ds_write2_b64
351; GCN: ds_write2_b64
Jan Vesely38814fa2016-08-27 19:09:43 +0000352
353; EG: LDS_READ_RET
354; EG: LDS_READ_RET
355; EG: LDS_READ_RET
356; EG: LDS_READ_RET
357; EG: LDS_READ_RET
358; EG: LDS_READ_RET
359; EG: LDS_READ_RET
360; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000361define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000362 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
363 %ext = zext <16 x i16> %load to <16 x i32>
364 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
365 ret void
366}
367
368; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000369; GFX9-NOT: m0
370; SICIVI: s_mov_b32 m0
371
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000372
373; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
374; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000375
376; EG: LDS_READ_RET
377; EG: LDS_READ_RET
378; EG: LDS_READ_RET
379; EG: LDS_READ_RET
380; EG: LDS_READ_RET
381; EG: LDS_READ_RET
382; EG: LDS_READ_RET
383; EG: LDS_READ_RET
384; EG-DAG: BFE_INT
385; EG-DAG: BFE_INT
386; EG-DAG: BFE_INT
387; EG-DAG: BFE_INT
388; EG-DAG: BFE_INT
389; EG-DAG: BFE_INT
390; EG-DAG: BFE_INT
391; EG-DAG: BFE_INT
392; EG-DAG: BFE_INT
393; EG-DAG: BFE_INT
394; EG-DAG: BFE_INT
395; EG-DAG: BFE_INT
396; EG-DAG: BFE_INT
397; EG-DAG: BFE_INT
398; EG-DAG: BFE_INT
399; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000400define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000401 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
402 %ext = sext <16 x i16> %load to <16 x i32>
403 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
404 ret void
405}
406
407; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000408; GFX9-NOT: m0
409; SICIVI: s_mov_b32 m0
410
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000411; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
412; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
413; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
414; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
Jan Vesely38814fa2016-08-27 19:09:43 +0000415
416; EG: LDS_READ_RET
417; EG: LDS_READ_RET
418; EG: LDS_READ_RET
419; EG: LDS_READ_RET
420; EG: LDS_READ_RET
421; EG: LDS_READ_RET
422; EG: LDS_READ_RET
423; EG: LDS_READ_RET
424; EG: LDS_READ_RET
425; EG: LDS_READ_RET
426; EG: LDS_READ_RET
427; EG: LDS_READ_RET
428; EG: LDS_READ_RET
429; EG: LDS_READ_RET
430; EG: LDS_READ_RET
431; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000432define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000433 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
434 %ext = zext <32 x i16> %load to <32 x i32>
435 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
436 ret void
437}
438
439; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000440; GFX9-NOT: m0
441; SICIVI: s_mov_b32 m0
442
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000443; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
444; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
445; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000446; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000447; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
448; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
449; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
450; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
451; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
452; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
453; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
454; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
Jan Vesely38814fa2016-08-27 19:09:43 +0000455
456; EG: LDS_READ_RET
457; EG: LDS_READ_RET
458; EG: LDS_READ_RET
459; EG: LDS_READ_RET
460; EG: LDS_READ_RET
461; EG: LDS_READ_RET
462; EG: LDS_READ_RET
463; EG: LDS_READ_RET
464; EG: LDS_READ_RET
465; EG: LDS_READ_RET
466; EG: LDS_READ_RET
467; EG: LDS_READ_RET
468; EG: LDS_READ_RET
469; EG: LDS_READ_RET
470; EG: LDS_READ_RET
471; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000472define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000473 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
474 %ext = sext <32 x i16> %load to <32 x i32>
475 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
476 ret void
477}
478
479; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000480; GFX9-NOT: m0
481; SICIVI: s_mov_b32 m0
482
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000483; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000484; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
485; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
486; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
487; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000488; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000489; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000490; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11
491; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
492; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
493; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27
494; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25
495; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23
496; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21
497; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19
498; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17
499; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
500; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
501; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
502; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
503; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
504; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
505; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
506; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
Jan Vesely38814fa2016-08-27 19:09:43 +0000507
508; EG: LDS_READ_RET
509; EG: LDS_READ_RET
510; EG: LDS_READ_RET
511; EG: LDS_READ_RET
512; EG: LDS_READ_RET
513; EG: LDS_READ_RET
514; EG: LDS_READ_RET
515; EG: LDS_READ_RET
516; EG: LDS_READ_RET
517; EG: LDS_READ_RET
518; EG: LDS_READ_RET
519; EG: LDS_READ_RET
520; EG: LDS_READ_RET
521; EG: LDS_READ_RET
522; EG: LDS_READ_RET
523; EG: LDS_READ_RET
524; EG: LDS_READ_RET
525; EG: LDS_READ_RET
526; EG: LDS_READ_RET
527; EG: LDS_READ_RET
528; EG: LDS_READ_RET
529; EG: LDS_READ_RET
530; EG: LDS_READ_RET
531; EG: LDS_READ_RET
532; EG: LDS_READ_RET
533; EG: LDS_READ_RET
534; EG: LDS_READ_RET
535; EG: LDS_READ_RET
536; EG: LDS_READ_RET
537; EG: LDS_READ_RET
538; EG: LDS_READ_RET
539; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000540define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000541 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
542 %ext = zext <64 x i16> %load to <64 x i32>
543 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
544 ret void
545}
546
547; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000548; GFX9-NOT: m0
549; SICIVI: s_mov_b32 m0
Jan Vesely38814fa2016-08-27 19:09:43 +0000550
551; EG: LDS_READ_RET
552; EG: LDS_READ_RET
553; EG: LDS_READ_RET
554; EG: LDS_READ_RET
555; EG: LDS_READ_RET
556; EG: LDS_READ_RET
557; EG: LDS_READ_RET
558; EG: LDS_READ_RET
559; EG: LDS_READ_RET
560; EG: LDS_READ_RET
561; EG: LDS_READ_RET
562; EG: LDS_READ_RET
563; EG: LDS_READ_RET
564; EG: LDS_READ_RET
565; EG: LDS_READ_RET
566; EG: LDS_READ_RET
567; EG: LDS_READ_RET
568; EG: LDS_READ_RET
569; EG: LDS_READ_RET
570; EG: LDS_READ_RET
571; EG: LDS_READ_RET
572; EG: LDS_READ_RET
573; EG: LDS_READ_RET
574; EG: LDS_READ_RET
575; EG: LDS_READ_RET
576; EG: LDS_READ_RET
577; EG: LDS_READ_RET
578; EG: LDS_READ_RET
579; EG: LDS_READ_RET
580; EG: LDS_READ_RET
581; EG: LDS_READ_RET
582; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000583define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000584 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
585 %ext = sext <64 x i16> %load to <64 x i32>
586 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
587 ret void
588}
589
590; FUNC-LABEL: {{^}}local_zextload_i16_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000591; GFX9-NOT: m0
592; SICIVI: s_mov_b32 m0
593
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000594; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]],
595; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
596
597; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000598
599; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
600; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
601; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
602; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
603; EG-DAG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000604define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000605 %a = load i16, i16 addrspace(3)* %in
606 %ext = zext i16 %a to i64
607 store i64 %ext, i64 addrspace(3)* %out
608 ret void
609}
610
611; FUNC-LABEL: {{^}}local_sextload_i16_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000612; GFX9-NOT: m0
613; SICIVI: s_mov_b32 m0
614
Tom Stellard115a6152016-11-10 16:02:37 +0000615; FIXME: Need to optimize this sequence to avoid an extra shift.
616; t25: i32,ch = load<LD2[%in(addrspace=3)], anyext from i16> t12, t10, undef:i32
617; t28: i64 = any_extend t25
618; t30: i64 = sign_extend_inreg t28, ValueType:ch:i16
619; SI: ds_read_i16 v[[LO:[0-9]+]],
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000620; GFX89: ds_read_u16 v[[ULO:[0-9]+]]
621; GFX89: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000622; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
623
624; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000625
626; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
627; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
628; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
629; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
630; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
631; EG-DAG: LDS_WRITE
632; EG-DAG: 16
633; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000634define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000635 %a = load i16, i16 addrspace(3)* %in
636 %ext = sext i16 %a to i64
637 store i64 %ext, i64 addrspace(3)* %out
638 ret void
639}
640
641; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000642; GFX9-NOT: m0
643; SICIVI: s_mov_b32 m0
644
Jan Vesely38814fa2016-08-27 19:09:43 +0000645
646; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
647; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
648; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
649; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
650; EG-DAG: LDS_WRITE
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000651define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000652 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
653 %ext = zext <1 x i16> %load to <1 x i64>
654 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
655 ret void
656}
657
658; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000659; GFX9-NOT: m0
660; SICIVI: s_mov_b32 m0
661
Jan Vesely38814fa2016-08-27 19:09:43 +0000662
663; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
664; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
665; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
666; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
667; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
668; EG-DAG: LDS_WRITE
669; EG-DAG: 16
670; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000671define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000672 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
673 %ext = sext <1 x i16> %load to <1 x i64>
674 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
675 ret void
676}
677
678; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000679; GFX9-NOT: m0
680; SICIVI: s_mov_b32 m0
681
Jan Vesely38814fa2016-08-27 19:09:43 +0000682
683; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000684define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000685 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
686 %ext = zext <2 x i16> %load to <2 x i64>
687 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
688 ret void
689}
690
691; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000692; GFX9-NOT: m0
693; SICIVI: s_mov_b32 m0
694
Jan Vesely38814fa2016-08-27 19:09:43 +0000695
696; EG: LDS_READ_RET
697; EG-DAG: BFE_INT
698; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000699define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000700 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
701 %ext = sext <2 x i16> %load to <2 x i64>
702 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
703 ret void
704}
705
706; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000707; GFX9-NOT: m0
708; SICIVI: s_mov_b32 m0
709
Jan Vesely38814fa2016-08-27 19:09:43 +0000710
711; EG: LDS_READ_RET
712; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000713define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000714 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
715 %ext = zext <4 x i16> %load to <4 x i64>
716 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
717 ret void
718}
719
720; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000721; GFX9-NOT: m0
722; SICIVI: s_mov_b32 m0
723
Jan Vesely38814fa2016-08-27 19:09:43 +0000724
725; EG: LDS_READ_RET
726; EG: LDS_READ_RET
727; EG-DAG: BFE_INT
728; EG-DAG: BFE_INT
729; EG-DAG: ASHR
730; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000731define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000732 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
733 %ext = sext <4 x i16> %load to <4 x i64>
734 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
735 ret void
736}
737
738; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000739; GFX9-NOT: m0
740; SICIVI: s_mov_b32 m0
741
Jan Vesely38814fa2016-08-27 19:09:43 +0000742
743; EG: LDS_READ_RET
744; EG: LDS_READ_RET
745; EG: LDS_READ_RET
746; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000747define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000748 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
749 %ext = zext <8 x i16> %load to <8 x i64>
750 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
751 ret void
752}
753
754; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000755; GFX9-NOT: m0
756; SICIVI: s_mov_b32 m0
757
Jan Vesely38814fa2016-08-27 19:09:43 +0000758
759; EG: LDS_READ_RET
760; EG: LDS_READ_RET
761; EG: LDS_READ_RET
762; EG: LDS_READ_RET
763; EG-DAG: BFE_INT
764; EG-DAG: BFE_INT
765; EG-DAG: ASHR
766; EG-DAG: ASHR
767; EG-DAG: BFE_INT
768; EG-DAG: BFE_INT
769; EG-DAG: ASHR
770; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000771define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000772 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
773 %ext = sext <8 x i16> %load to <8 x i64>
774 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
775 ret void
776}
777
778; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000779; GFX9-NOT: m0
780; SICIVI: s_mov_b32 m0
781
Jan Vesely38814fa2016-08-27 19:09:43 +0000782
783; EG: LDS_READ_RET
784; EG: LDS_READ_RET
785; EG: LDS_READ_RET
786; EG: LDS_READ_RET
787; EG: LDS_READ_RET
788; EG: LDS_READ_RET
789; EG: LDS_READ_RET
790; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000791define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000792 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
793 %ext = zext <16 x i16> %load to <16 x i64>
794 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
795 ret void
796}
797
798; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000799; GFX9-NOT: m0
800; SICIVI: s_mov_b32 m0
801
Jan Vesely38814fa2016-08-27 19:09:43 +0000802
803; EG: LDS_READ_RET
804; EG: LDS_READ_RET
805; EG: LDS_READ_RET
806; EG: LDS_READ_RET
807; EG: LDS_READ_RET
808; EG: LDS_READ_RET
809; EG: LDS_READ_RET
810; EG: LDS_READ_RET
811; EG-DAG: BFE_INT
812; EG-DAG: BFE_INT
813; EG-DAG: ASHR
814; EG-DAG: ASHR
815; EG-DAG: BFE_INT
816; EG-DAG: BFE_INT
817; EG-DAG: ASHR
818; EG-DAG: ASHR
819; EG-DAG: BFE_INT
820; EG-DAG: BFE_INT
821; EG-DAG: ASHR
822; EG-DAG: ASHR
823; EG-DAG: BFE_INT
824; EG-DAG: BFE_INT
825; EG-DAG: ASHR
826; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000827define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000828 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
829 %ext = sext <16 x i16> %load to <16 x i64>
830 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
831 ret void
832}
833
834; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000835; GFX9-NOT: m0
836; SICIVI: s_mov_b32 m0
837
Jan Vesely38814fa2016-08-27 19:09:43 +0000838
839; EG: LDS_READ_RET
840; EG: LDS_READ_RET
841; EG: LDS_READ_RET
842; EG: LDS_READ_RET
843; EG: LDS_READ_RET
844; EG: LDS_READ_RET
845; EG: LDS_READ_RET
846; EG: LDS_READ_RET
847; EG: LDS_READ_RET
848; EG: LDS_READ_RET
849; EG: LDS_READ_RET
850; EG: LDS_READ_RET
851; EG: LDS_READ_RET
852; EG: LDS_READ_RET
853; EG: LDS_READ_RET
854; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000855define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000856 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
857 %ext = zext <32 x i16> %load to <32 x i64>
858 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
859 ret void
860}
861
862; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000863; GFX9-NOT: m0
864; SICIVI: s_mov_b32 m0
865
Jan Vesely38814fa2016-08-27 19:09:43 +0000866
867; EG: LDS_READ_RET
868; EG: LDS_READ_RET
869; EG: LDS_READ_RET
870; EG: LDS_READ_RET
871; EG: LDS_READ_RET
872; EG: LDS_READ_RET
873; EG: LDS_READ_RET
874; EG: LDS_READ_RET
875; EG: LDS_READ_RET
876; EG: LDS_READ_RET
877; EG: LDS_READ_RET
878; EG: LDS_READ_RET
879; EG: LDS_READ_RET
880; EG: LDS_READ_RET
881; EG: LDS_READ_RET
882; EG: LDS_READ_RET
883; EG-DAG: BFE_INT
884; EG-DAG: BFE_INT
885; EG-DAG: ASHR
886; EG-DAG: ASHR
887; EG-DAG: BFE_INT
888; EG-DAG: BFE_INT
889; EG-DAG: ASHR
890; EG-DAG: ASHR
891; EG-DAG: BFE_INT
892; EG-DAG: BFE_INT
893; EG-DAG: ASHR
894; EG-DAG: ASHR
895; EG-DAG: BFE_INT
896; EG-DAG: BFE_INT
897; EG-DAG: ASHR
898; EG-DAG: ASHR
899; EG-DAG: BFE_INT
900; EG-DAG: BFE_INT
901; EG-DAG: ASHR
902; EG-DAG: ASHR
903; EG-DAG: BFE_INT
904; EG-DAG: BFE_INT
905; EG-DAG: ASHR
906; EG-DAG: ASHR
907; EG-DAG: BFE_INT
908; EG-DAG: BFE_INT
909; EG-DAG: ASHR
910; EG-DAG: ASHR
911; EG-DAG: BFE_INT
912; EG-DAG: BFE_INT
913; EG-DAG: ASHR
914; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000915define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000916 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
917 %ext = sext <32 x i16> %load to <32 x i64>
918 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
919 ret void
920}
921
922; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000923; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000924; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
925; %ext = zext <64 x i16> %load to <64 x i64>
926; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
927; ret void
928; }
929
930; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000931; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000932; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
933; %ext = sext <64 x i16> %load to <64 x i64>
934; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
935; ret void
936; }
937
938attributes #0 = { nounwind }