blob: bbbb34e8d3331abfa9f5adfc714ec799ed9b93fe [file] [log] [blame]
Tom Stellard115a6152016-11-10 16:02:37 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00003; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; FUNC-LABEL: {{^}}local_load_i16:
6; GCN: ds_read_u16 v{{[0-9]+}}
7
Jan Vesely38814fa2016-08-27 19:09:43 +00008; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
9; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
10; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
11; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
12; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000013define amdgpu_kernel void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000014entry:
15 %ld = load i16, i16 addrspace(3)* %in
16 store i16 %ld, i16 addrspace(3)* %out
17 ret void
18}
19
20; FUNC-LABEL: {{^}}local_load_v2i16:
21; GCN: ds_read_b32
22
Jan Vesely38814fa2016-08-27 19:09:43 +000023; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
24; EG: LDS_READ_RET {{.*}} [[FROM]]
25; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
26; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
27; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000028define amdgpu_kernel void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000029entry:
30 %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in
31 store <2 x i16> %ld, <2 x i16> addrspace(3)* %out
32 ret void
33}
34
35; FUNC-LABEL: {{^}}local_load_v3i16:
36; GCN: ds_read_b64
37; GCN-DAG: ds_write_b32
38; GCN-DAG: ds_write_b16
39
40; EG-DAG: LDS_USHORT_READ_RET
41; EG-DAG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000042define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000043entry:
44 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
45 store <3 x i16> %ld, <3 x i16> addrspace(3)* %out
46 ret void
47}
48
49; FUNC-LABEL: {{^}}local_load_v4i16:
50; GCN: ds_read_b64
51
52; EG: LDS_READ_RET
53; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000054define amdgpu_kernel void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000055entry:
56 %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in
57 store <4 x i16> %ld, <4 x i16> addrspace(3)* %out
58 ret void
59}
60
61; FUNC-LABEL: {{^}}local_load_v8i16:
Tom Stellarde175d8a2016-08-26 21:36:47 +000062; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000063
64; EG: LDS_READ_RET
65; EG: LDS_READ_RET
66; EG: LDS_READ_RET
67; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000069entry:
70 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in
71 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out
72 ret void
73}
74
75; FUNC-LABEL: {{^}}local_load_v16i16:
Tom Stellardc2ff0eb2016-08-29 19:15:22 +000076; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}}
77; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000078
79
80; EG: LDS_READ_RET
81; EG: LDS_READ_RET
82; EG: LDS_READ_RET
83; EG: LDS_READ_RET
84
85; EG: LDS_READ_RET
86; EG: LDS_READ_RET
87; EG: LDS_READ_RET
88; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000089define amdgpu_kernel void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000090entry:
91 %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in
92 store <16 x i16> %ld, <16 x i16> addrspace(3)* %out
93 ret void
94}
95
96; FUNC-LABEL: {{^}}local_zextload_i16_to_i32:
97; GCN: ds_read_u16
98; GCN: ds_write_b32
99
Jan Vesely38814fa2016-08-27 19:09:43 +0000100; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
101; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
102; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
103; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
104; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000106 %a = load i16, i16 addrspace(3)* %in
107 %ext = zext i16 %a to i32
108 store i32 %ext, i32 addrspace(3)* %out
109 ret void
110}
111
112; FUNC-LABEL: {{^}}local_sextload_i16_to_i32:
113; GCN-NOT: s_wqm_b64
114; GCN: s_mov_b32 m0
115; GCN: ds_read_i16
116
Jan Vesely38814fa2016-08-27 19:09:43 +0000117; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
118; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
119; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
120; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
121; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
122; EG: 16
123; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000124define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000125 %a = load i16, i16 addrspace(3)* %in
126 %ext = sext i16 %a to i32
127 store i32 %ext, i32 addrspace(3)* %out
128 ret void
129}
130
131; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32:
132; GCN: ds_read_u16
Jan Vesely38814fa2016-08-27 19:09:43 +0000133
134; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
135; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
136; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
137; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
138; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000139define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000140 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
141 %ext = zext <1 x i16> %load to <1 x i32>
142 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
143 ret void
144}
145
146; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32:
147; GCN: ds_read_i16
Jan Vesely38814fa2016-08-27 19:09:43 +0000148
149; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
150; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
151; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
152; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
153; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
154; EG: 16
155; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000156define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000157 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
158 %ext = sext <1 x i16> %load to <1 x i32>
159 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out
160 ret void
161}
162
163; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32:
164; GCN-NOT: s_wqm_b64
165; GCN: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000166; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000167
Jan Vesely38814fa2016-08-27 19:09:43 +0000168; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000169define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000170 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
171 %ext = zext <2 x i16> %load to <2 x i32>
172 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
173 ret void
174}
175
176; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32:
177; GCN-NOT: s_wqm_b64
178; GCN: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000179; GCN: ds_read_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000180
Jan Vesely38814fa2016-08-27 19:09:43 +0000181; EG: LDS_READ_RET
182; EG: BFE_INT
183; EG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000184define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000185 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
186 %ext = sext <2 x i16> %load to <2 x i32>
187 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out
188 ret void
189}
190
191; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32:
192; GCN: ds_read_b64
193; GCN-DAG: ds_write_b32
194; GCN-DAG: ds_write_b64
Jan Vesely38814fa2016-08-27 19:09:43 +0000195
196; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000197define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000198entry:
199 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
200 %ext = zext <3 x i16> %ld to <3 x i32>
201 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
202 ret void
203}
204
205; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32:
206; GCN: ds_read_b64
207; GCN-DAG: ds_write_b32
208; GCN-DAG: ds_write_b64
Jan Vesely38814fa2016-08-27 19:09:43 +0000209
210; EG: LDS_READ_RET
211; EG-DAG: BFE_INT
212; EG-DAG: BFE_INT
213; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000214define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000215entry:
216 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in
217 %ext = sext <3 x i16> %ld to <3 x i32>
218 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out
219 ret void
220}
221
222; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32:
223; GCN-NOT: s_wqm_b64
224; GCN: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000225; GCN: ds_read_b64
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000226
Jan Vesely38814fa2016-08-27 19:09:43 +0000227; EG: LDS_READ_RET
228; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000229define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000230 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
231 %ext = zext <4 x i16> %load to <4 x i32>
232 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
233 ret void
234}
235
236; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32:
237; GCN-NOT: s_wqm_b64
238; GCN: s_mov_b32 m0
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000239; GCN: ds_read_b64
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000240
Jan Vesely38814fa2016-08-27 19:09:43 +0000241; EG: LDS_READ_RET
242; EG: LDS_READ_RET
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000243; EG-DAG: BFE_INT
244; EG-DAG: BFE_INT
245; EG-DAG: BFE_INT
246; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000247define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000248 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
249 %ext = sext <4 x i16> %load to <4 x i32>
250 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out
251 ret void
252}
253
254; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000255; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000256
257; EG: LDS_READ_RET
258; EG: LDS_READ_RET
259; EG: LDS_READ_RET
260; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000261define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000262 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
263 %ext = zext <8 x i16> %load to <8 x i32>
264 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
265 ret void
266}
267
268; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000269; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000270
271; EG: LDS_READ_RET
272; EG: LDS_READ_RET
273; EG: LDS_READ_RET
274; EG: LDS_READ_RET
275; EG-DAG: BFE_INT
276; EG-DAG: BFE_INT
277; EG-DAG: BFE_INT
278; EG-DAG: BFE_INT
279; EG-DAG: BFE_INT
280; EG-DAG: BFE_INT
281; EG-DAG: BFE_INT
282; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000283define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000284 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
285 %ext = sext <8 x i16> %load to <8 x i32>
286 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out
287 ret void
288}
289
290; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32:
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000291; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
292; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000293
294; GCN: ds_write2_b64
295; GCN: ds_write2_b64
296; GCN: ds_write2_b64
297; GCN: ds_write2_b64
Jan Vesely38814fa2016-08-27 19:09:43 +0000298
299; EG: LDS_READ_RET
300; EG: LDS_READ_RET
301; EG: LDS_READ_RET
302; EG: LDS_READ_RET
303; EG: LDS_READ_RET
304; EG: LDS_READ_RET
305; EG: LDS_READ_RET
306; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000307define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000308 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
309 %ext = zext <16 x i16> %load to <16 x i32>
310 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
311 ret void
312}
313
314; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32:
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000315
316; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
317; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000318
319; EG: LDS_READ_RET
320; EG: LDS_READ_RET
321; EG: LDS_READ_RET
322; EG: LDS_READ_RET
323; EG: LDS_READ_RET
324; EG: LDS_READ_RET
325; EG: LDS_READ_RET
326; EG: LDS_READ_RET
327; EG-DAG: BFE_INT
328; EG-DAG: BFE_INT
329; EG-DAG: BFE_INT
330; EG-DAG: BFE_INT
331; EG-DAG: BFE_INT
332; EG-DAG: BFE_INT
333; EG-DAG: BFE_INT
334; EG-DAG: BFE_INT
335; EG-DAG: BFE_INT
336; EG-DAG: BFE_INT
337; EG-DAG: BFE_INT
338; EG-DAG: BFE_INT
339; EG-DAG: BFE_INT
340; EG-DAG: BFE_INT
341; EG-DAG: BFE_INT
342; EG-DAG: BFE_INT
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000343define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000344 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
345 %ext = sext <16 x i16> %load to <16 x i32>
346 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out
347 ret void
348}
349
350; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000351; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
352; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
353; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
354; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
Jan Vesely38814fa2016-08-27 19:09:43 +0000355
356; EG: LDS_READ_RET
357; EG: LDS_READ_RET
358; EG: LDS_READ_RET
359; EG: LDS_READ_RET
360; EG: LDS_READ_RET
361; EG: LDS_READ_RET
362; EG: LDS_READ_RET
363; EG: LDS_READ_RET
364; EG: LDS_READ_RET
365; EG: LDS_READ_RET
366; EG: LDS_READ_RET
367; EG: LDS_READ_RET
368; EG: LDS_READ_RET
369; EG: LDS_READ_RET
370; EG: LDS_READ_RET
371; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000372define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000373 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
374 %ext = zext <32 x i16> %load to <32 x i32>
375 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
376 ret void
377}
378
379; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32:
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000380; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
381; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
382; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000383; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000384; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
385; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
386; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
387; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
388; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
389; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
390; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
391; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
Jan Vesely38814fa2016-08-27 19:09:43 +0000392
393; EG: LDS_READ_RET
394; EG: LDS_READ_RET
395; EG: LDS_READ_RET
396; EG: LDS_READ_RET
397; EG: LDS_READ_RET
398; EG: LDS_READ_RET
399; EG: LDS_READ_RET
400; EG: LDS_READ_RET
401; EG: LDS_READ_RET
402; EG: LDS_READ_RET
403; EG: LDS_READ_RET
404; EG: LDS_READ_RET
405; EG: LDS_READ_RET
406; EG: LDS_READ_RET
407; EG: LDS_READ_RET
408; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000409define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000410 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
411 %ext = sext <32 x i16> %load to <32 x i32>
412 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out
413 ret void
414}
415
416; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32:
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000417; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000418; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
419; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3
420; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5
421; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000422; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000423; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13
Tom Stellardc2ff0eb2016-08-29 19:15:22 +0000424; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11
425; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
426; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
427; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27
428; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25
429; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23
430; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21
431; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19
432; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17
433; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
434; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13
435; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11
436; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9
437; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
438; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
439; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
440; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
Jan Vesely38814fa2016-08-27 19:09:43 +0000441
442; EG: LDS_READ_RET
443; EG: LDS_READ_RET
444; EG: LDS_READ_RET
445; EG: LDS_READ_RET
446; EG: LDS_READ_RET
447; EG: LDS_READ_RET
448; EG: LDS_READ_RET
449; EG: LDS_READ_RET
450; EG: LDS_READ_RET
451; EG: LDS_READ_RET
452; EG: LDS_READ_RET
453; EG: LDS_READ_RET
454; EG: LDS_READ_RET
455; EG: LDS_READ_RET
456; EG: LDS_READ_RET
457; EG: LDS_READ_RET
458; EG: LDS_READ_RET
459; EG: LDS_READ_RET
460; EG: LDS_READ_RET
461; EG: LDS_READ_RET
462; EG: LDS_READ_RET
463; EG: LDS_READ_RET
464; EG: LDS_READ_RET
465; EG: LDS_READ_RET
466; EG: LDS_READ_RET
467; EG: LDS_READ_RET
468; EG: LDS_READ_RET
469; EG: LDS_READ_RET
470; EG: LDS_READ_RET
471; EG: LDS_READ_RET
472; EG: LDS_READ_RET
473; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000474define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000475 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
476 %ext = zext <64 x i16> %load to <64 x i32>
477 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
478 ret void
479}
480
481; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000482
483; EG: LDS_READ_RET
484; EG: LDS_READ_RET
485; EG: LDS_READ_RET
486; EG: LDS_READ_RET
487; EG: LDS_READ_RET
488; EG: LDS_READ_RET
489; EG: LDS_READ_RET
490; EG: LDS_READ_RET
491; EG: LDS_READ_RET
492; EG: LDS_READ_RET
493; EG: LDS_READ_RET
494; EG: LDS_READ_RET
495; EG: LDS_READ_RET
496; EG: LDS_READ_RET
497; EG: LDS_READ_RET
498; EG: LDS_READ_RET
499; EG: LDS_READ_RET
500; EG: LDS_READ_RET
501; EG: LDS_READ_RET
502; EG: LDS_READ_RET
503; EG: LDS_READ_RET
504; EG: LDS_READ_RET
505; EG: LDS_READ_RET
506; EG: LDS_READ_RET
507; EG: LDS_READ_RET
508; EG: LDS_READ_RET
509; EG: LDS_READ_RET
510; EG: LDS_READ_RET
511; EG: LDS_READ_RET
512; EG: LDS_READ_RET
513; EG: LDS_READ_RET
514; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000515define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000516 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
517 %ext = sext <64 x i16> %load to <64 x i32>
518 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out
519 ret void
520}
521
522; FUNC-LABEL: {{^}}local_zextload_i16_to_i64:
523; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]],
524; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
525
526; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000527
528; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
529; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
530; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
531; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
532; EG-DAG: LDS_WRITE
533; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000534define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000535 %a = load i16, i16 addrspace(3)* %in
536 %ext = zext i16 %a to i64
537 store i64 %ext, i64 addrspace(3)* %out
538 ret void
539}
540
541; FUNC-LABEL: {{^}}local_sextload_i16_to_i64:
Tom Stellard115a6152016-11-10 16:02:37 +0000542; FIXME: Need to optimize this sequence to avoid an extra shift.
543; t25: i32,ch = load<LD2[%in(addrspace=3)], anyext from i16> t12, t10, undef:i32
544; t28: i64 = any_extend t25
545; t30: i64 = sign_extend_inreg t28, ValueType:ch:i16
546; SI: ds_read_i16 v[[LO:[0-9]+]],
547; VI: ds_read_u16 v[[ULO:[0-9]+]]
548; VI: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000549; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
550
551; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000552
553; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
554; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
555; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
556; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
557; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
558; EG-DAG: LDS_WRITE
559; EG-DAG: 16
560; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000561define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000562 %a = load i16, i16 addrspace(3)* %in
563 %ext = sext i16 %a to i64
564 store i64 %ext, i64 addrspace(3)* %out
565 ret void
566}
567
568; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000569
570; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
571; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
572; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP
573; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
574; EG-DAG: LDS_WRITE
575; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000576define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000577 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
578 %ext = zext <1 x i16> %load to <1 x i64>
579 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
580 ret void
581}
582
583; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000584
585; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z
586; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]]
587; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP
588; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y
589; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal
590; EG-DAG: LDS_WRITE
591; EG-DAG: 16
592; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]]
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000593define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000594 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in
595 %ext = sext <1 x i16> %load to <1 x i64>
596 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
597 ret void
598}
599
600; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000601
602; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000603define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000604 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
605 %ext = zext <2 x i16> %load to <2 x i64>
606 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
607 ret void
608}
609
610; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000611
612; EG: LDS_READ_RET
613; EG-DAG: BFE_INT
614; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000615define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000616 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in
617 %ext = sext <2 x i16> %load to <2 x i64>
618 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
619 ret void
620}
621
622; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000623
624; EG: LDS_READ_RET
625; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000626define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000627 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
628 %ext = zext <4 x i16> %load to <4 x i64>
629 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
630 ret void
631}
632
633; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000634
635; EG: LDS_READ_RET
636; EG: LDS_READ_RET
637; EG-DAG: BFE_INT
638; EG-DAG: BFE_INT
639; EG-DAG: ASHR
640; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000641define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000642 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in
643 %ext = sext <4 x i16> %load to <4 x i64>
644 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
645 ret void
646}
647
648; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000649
650; EG: LDS_READ_RET
651; EG: LDS_READ_RET
652; EG: LDS_READ_RET
653; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000654define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000655 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
656 %ext = zext <8 x i16> %load to <8 x i64>
657 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
658 ret void
659}
660
661; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000662
663; EG: LDS_READ_RET
664; EG: LDS_READ_RET
665; EG: LDS_READ_RET
666; EG: LDS_READ_RET
667; EG-DAG: BFE_INT
668; EG-DAG: BFE_INT
669; EG-DAG: ASHR
670; EG-DAG: ASHR
671; EG-DAG: BFE_INT
672; EG-DAG: BFE_INT
673; EG-DAG: ASHR
674; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000675define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000676 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in
677 %ext = sext <8 x i16> %load to <8 x i64>
678 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
679 ret void
680}
681
682; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000683
684; EG: LDS_READ_RET
685; EG: LDS_READ_RET
686; EG: LDS_READ_RET
687; EG: LDS_READ_RET
688; EG: LDS_READ_RET
689; EG: LDS_READ_RET
690; EG: LDS_READ_RET
691; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000692define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000693 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
694 %ext = zext <16 x i16> %load to <16 x i64>
695 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
696 ret void
697}
698
699; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000700
701; EG: LDS_READ_RET
702; EG: LDS_READ_RET
703; EG: LDS_READ_RET
704; EG: LDS_READ_RET
705; EG: LDS_READ_RET
706; EG: LDS_READ_RET
707; EG: LDS_READ_RET
708; EG: LDS_READ_RET
709; EG-DAG: BFE_INT
710; EG-DAG: BFE_INT
711; EG-DAG: ASHR
712; EG-DAG: ASHR
713; EG-DAG: BFE_INT
714; EG-DAG: BFE_INT
715; EG-DAG: ASHR
716; EG-DAG: ASHR
717; EG-DAG: BFE_INT
718; EG-DAG: BFE_INT
719; EG-DAG: ASHR
720; EG-DAG: ASHR
721; EG-DAG: BFE_INT
722; EG-DAG: BFE_INT
723; EG-DAG: ASHR
724; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000725define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000726 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in
727 %ext = sext <16 x i16> %load to <16 x i64>
728 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
729 ret void
730}
731
732; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000733
734; EG: LDS_READ_RET
735; EG: LDS_READ_RET
736; EG: LDS_READ_RET
737; EG: LDS_READ_RET
738; EG: LDS_READ_RET
739; EG: LDS_READ_RET
740; EG: LDS_READ_RET
741; EG: LDS_READ_RET
742; EG: LDS_READ_RET
743; EG: LDS_READ_RET
744; EG: LDS_READ_RET
745; EG: LDS_READ_RET
746; EG: LDS_READ_RET
747; EG: LDS_READ_RET
748; EG: LDS_READ_RET
749; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000750define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000751 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
752 %ext = zext <32 x i16> %load to <32 x i64>
753 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
754 ret void
755}
756
757; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000758
759; EG: LDS_READ_RET
760; EG: LDS_READ_RET
761; EG: LDS_READ_RET
762; EG: LDS_READ_RET
763; EG: LDS_READ_RET
764; EG: LDS_READ_RET
765; EG: LDS_READ_RET
766; EG: LDS_READ_RET
767; EG: LDS_READ_RET
768; EG: LDS_READ_RET
769; EG: LDS_READ_RET
770; EG: LDS_READ_RET
771; EG: LDS_READ_RET
772; EG: LDS_READ_RET
773; EG: LDS_READ_RET
774; EG: LDS_READ_RET
775; EG-DAG: BFE_INT
776; EG-DAG: BFE_INT
777; EG-DAG: ASHR
778; EG-DAG: ASHR
779; EG-DAG: BFE_INT
780; EG-DAG: BFE_INT
781; EG-DAG: ASHR
782; EG-DAG: ASHR
783; EG-DAG: BFE_INT
784; EG-DAG: BFE_INT
785; EG-DAG: ASHR
786; EG-DAG: ASHR
787; EG-DAG: BFE_INT
788; EG-DAG: BFE_INT
789; EG-DAG: ASHR
790; EG-DAG: ASHR
791; EG-DAG: BFE_INT
792; EG-DAG: BFE_INT
793; EG-DAG: ASHR
794; EG-DAG: ASHR
795; EG-DAG: BFE_INT
796; EG-DAG: BFE_INT
797; EG-DAG: ASHR
798; EG-DAG: ASHR
799; EG-DAG: BFE_INT
800; EG-DAG: BFE_INT
801; EG-DAG: ASHR
802; EG-DAG: ASHR
803; EG-DAG: BFE_INT
804; EG-DAG: BFE_INT
805; EG-DAG: ASHR
806; EG-DAG: ASHR
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000807define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000808 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in
809 %ext = sext <32 x i16> %load to <32 x i64>
810 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
811 ret void
812}
813
814; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000815; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000816; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
817; %ext = zext <64 x i16> %load to <64 x i64>
818; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
819; ret void
820; }
821
822; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000823; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000824; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in
825; %ext = sext <64 x i16> %load to <64 x i64>
826; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out
827; ret void
828; }
829
830attributes #0 = { nounwind }