blob: d4eab1babc7bd998209d86234abc7a38787835eb [file] [log] [blame]
Joel E. Denny9fa9c932018-07-11 20:25:49 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +00005; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00006
7; FUNC-LABEL: {{^}}constant_load_i32:
8; GCN: s_load_dword s{{[0-9]+}}
9
10; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
Yaxun Liu0124b542018-02-13 18:00:25 +000011define amdgpu_kernel void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000012entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000013 %ld = load i32, i32 addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000014 store i32 %ld, i32 addrspace(1)* %out
15 ret void
16}
17
18; FUNC-LABEL: {{^}}constant_load_v2i32:
19; GCN: s_load_dwordx2
20
21; EG: VTX_READ_64
Yaxun Liu0124b542018-02-13 18:00:25 +000022define amdgpu_kernel void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000023entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000024 %ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000025 store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
26 ret void
27}
28
29; FUNC-LABEL: {{^}}constant_load_v3i32:
30; GCN: s_load_dwordx4
31
32; EG: VTX_READ_128
Yaxun Liu0124b542018-02-13 18:00:25 +000033define amdgpu_kernel void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(4)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000034entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000035 %ld = load <3 x i32>, <3 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000036 store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
37 ret void
38}
39
40; FUNC-LABEL: {{^}}constant_load_v4i32:
41; GCN: s_load_dwordx4
42
43; EG: VTX_READ_128
Yaxun Liu0124b542018-02-13 18:00:25 +000044define amdgpu_kernel void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000045entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000046 %ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000047 store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
48 ret void
49}
50
51; FUNC-LABEL: {{^}}constant_load_v8i32:
52; GCN: s_load_dwordx8
53
54; EG: VTX_READ_128
55; EG: VTX_READ_128
Yaxun Liu0124b542018-02-13 18:00:25 +000056define amdgpu_kernel void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000057entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000058 %ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000059 store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
60 ret void
61}
62
63; FUNC-LABEL: {{^}}constant_load_v16i32:
64; GCN: s_load_dwordx16
65
66; EG: VTX_READ_128
67; EG: VTX_READ_128
68; EG: VTX_READ_128
69; EG: VTX_READ_128
Yaxun Liu0124b542018-02-13 18:00:25 +000070define amdgpu_kernel void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000071entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000072 %ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000073 store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
74 ret void
75}
76
77; FUNC-LABEL: {{^}}constant_zextload_i32_to_i64:
78; GCN-DAG: s_load_dword s[[SLO:[0-9]+]],
79; GCN-DAG: v_mov_b32_e32 v[[SHI:[0-9]+]], 0{{$}}
80; GCN: store_dwordx2
81
Matt Arsenault327bb5a2016-07-01 22:47:50 +000082; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
83; EG: CF_END
84; EG: VTX_READ_32
Yaxun Liu0124b542018-02-13 18:00:25 +000085define amdgpu_kernel void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
86 %ld = load i32, i32 addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000087 %ext = zext i32 %ld to i64
88 store i64 %ext, i64 addrspace(1)* %out
89 ret void
90}
91
92; FUNC-LABEL: {{^}}constant_sextload_i32_to_i64:
93; GCN: s_load_dword s[[SLO:[0-9]+]]
94; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[SLO]], 31
95; GCN: store_dwordx2
96
Matt Arsenault327bb5a2016-07-01 22:47:50 +000097; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
98; EG: CF_END
99; EG: VTX_READ_32
100; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000101; EG: 31
Yaxun Liu0124b542018-02-13 18:00:25 +0000102define amdgpu_kernel void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(4)* %in) #0 {
103 %ld = load i32, i32 addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000104 %ext = sext i32 %ld to i64
105 store i64 %ext, i64 addrspace(1)* %out
106 ret void
107}
108
109; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
110; GCN: s_load_dword
111; GCN: store_dwordx2
Yaxun Liu0124b542018-02-13 18:00:25 +0000112define amdgpu_kernel void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
113 %ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000114 %ext = zext <1 x i32> %ld to <1 x i64>
115 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
116 ret void
117}
118
119; FUNC-LABEL: {{^}}constant_sextload_v1i32_to_v1i64:
120; GCN: s_load_dword s[[LO:[0-9]+]]
121; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
122; GCN: store_dwordx2
Yaxun Liu0124b542018-02-13 18:00:25 +0000123define amdgpu_kernel void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(4)* %in) #0 {
124 %ld = load <1 x i32>, <1 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000125 %ext = sext <1 x i32> %ld to <1 x i64>
126 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
127 ret void
128}
129
130; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
131; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
132; GCN: store_dwordx4
Yaxun Liu0124b542018-02-13 18:00:25 +0000133define amdgpu_kernel void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
134 %ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000135 %ext = zext <2 x i32> %ld to <2 x i64>
136 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
137 ret void
138}
139
140; FUNC-LABEL: {{^}}constant_sextload_v2i32_to_v2i64:
141; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
142
143; GCN-DAG: s_ashr_i32
144; GCN-DAG: s_ashr_i32
145
146; GCN: store_dwordx4
Yaxun Liu0124b542018-02-13 18:00:25 +0000147define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(4)* %in) #0 {
148 %ld = load <2 x i32>, <2 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000149 %ext = sext <2 x i32> %ld to <2 x i64>
150 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
151 ret void
152}
153
154; FUNC-LABEL: {{^}}constant_zextload_v4i32_to_v4i64:
155; GCN: s_load_dwordx4
156
157; GCN: store_dwordx4
158; GCN: store_dwordx4
Yaxun Liu0124b542018-02-13 18:00:25 +0000159define amdgpu_kernel void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
160 %ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000161 %ext = zext <4 x i32> %ld to <4 x i64>
162 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
163 ret void
164}
165
166; FUNC-LABEL: {{^}}constant_sextload_v4i32_to_v4i64:
167; GCN: s_load_dwordx4
168
169; GCN: s_ashr_i32
170; GCN: s_ashr_i32
171; GCN: s_ashr_i32
172; GCN: s_ashr_i32
173
174; GCN: store_dwordx4
175; GCN: store_dwordx4
Yaxun Liu0124b542018-02-13 18:00:25 +0000176define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(4)* %in) #0 {
177 %ld = load <4 x i32>, <4 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000178 %ext = sext <4 x i32> %ld to <4 x i64>
179 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
180 ret void
181}
182
183; FUNC-LABEL: {{^}}constant_zextload_v8i32_to_v8i64:
184; GCN: s_load_dwordx8
185
186; GCN-NOHSA-DAG: buffer_store_dwordx4
187; GCN-NOHSA-DAG: buffer_store_dwordx4
188; GCN-NOHSA-DAG: buffer_store_dwordx4
189; GCN-NOHSA-DAG: buffer_store_dwordx4
190
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000191; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
192; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
193; GCN-SA-DAG: {{flat|global}}_store_dwordx4
194; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Yaxun Liu0124b542018-02-13 18:00:25 +0000195define amdgpu_kernel void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
196 %ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000197 %ext = zext <8 x i32> %ld to <8 x i64>
198 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
199 ret void
200}
201
202; FUNC-LABEL: {{^}}constant_sextload_v8i32_to_v8i64:
203; GCN: s_load_dwordx8
204
205; GCN: s_ashr_i32
206; GCN: s_ashr_i32
207; GCN: s_ashr_i32
208; GCN: s_ashr_i32
209; GCN: s_ashr_i32
210; GCN: s_ashr_i32
211; GCN: s_ashr_i32
212; GCN: s_ashr_i32
213
214; GCN-NOHSA-DAG: buffer_store_dwordx4
215; GCN-NOHSA-DAG: buffer_store_dwordx4
216; GCN-NOHSA-DAG: buffer_store_dwordx4
217; GCN-NOHSA-DAG: buffer_store_dwordx4
218
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000219; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
220; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
221; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
222; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Yaxun Liu0124b542018-02-13 18:00:25 +0000223define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(4)* %in) #0 {
224 %ld = load <8 x i32>, <8 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000225 %ext = sext <8 x i32> %ld to <8 x i64>
226 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
227 ret void
228}
229
230; FUNC-LABEL: {{^}}constant_sextload_v16i32_to_v16i64:
231; GCN: s_load_dwordx16
232
233
234; GCN-DAG: s_ashr_i32
235
236; GCN: store_dwordx4
237; GCN: store_dwordx4
238; GCN: store_dwordx4
239; GCN: store_dwordx4
240; GCN: store_dwordx4
241; GCN: store_dwordx4
242; GCN: store_dwordx4
243; GCN: store_dwordx4
Yaxun Liu0124b542018-02-13 18:00:25 +0000244define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
245 %ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000246 %ext = sext <16 x i32> %ld to <16 x i64>
247 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
248 ret void
249}
250
251; FUNC-LABEL: {{^}}constant_zextload_v16i32_to_v16i64
252; GCN: s_load_dwordx16
253
254; GCN-NOHSA: buffer_store_dwordx4
255; GCN-NOHSA: buffer_store_dwordx4
256; GCN-NOHSA: buffer_store_dwordx4
257; GCN-NOHSA: buffer_store_dwordx4
258; GCN-NOHSA: buffer_store_dwordx4
259; GCN-NOHSA: buffer_store_dwordx4
260; GCN-NOHSA: buffer_store_dwordx4
261; GCN-NOHSA: buffer_store_dwordx4
262
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000263; GCN-HSA: {{flat|global}}_store_dwordx4
264; GCN-HSA: {{flat|global}}_store_dwordx4
265; GCN-HSA: {{flat|global}}_store_dwordx4
266; GCN-HSA: {{flat|global}}_store_dwordx4
267; GCN-HSA: {{flat|global}}_store_dwordx4
268; GCN-HSA: {{flat|global}}_store_dwordx4
269; GCN-HSA: {{flat|global}}_store_dwordx4
270; GCN-HSA: {{flat|global}}_store_dwordx4
Yaxun Liu0124b542018-02-13 18:00:25 +0000271define amdgpu_kernel void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(4)* %in) #0 {
272 %ld = load <16 x i32>, <16 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000273 %ext = zext <16 x i32> %ld to <16 x i64>
274 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
275 ret void
276}
277
278; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64:
279
280; GCN: s_load_dwordx16
Marek Olsak355a8642016-08-05 21:23:29 +0000281; GCN-DAG: s_load_dwordx16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000282
Marek Olsak355a8642016-08-05 21:23:29 +0000283; GCN-NOHSA-DAG: buffer_store_dwordx4
284; GCN-NOHSA-DAG: buffer_store_dwordx4
285; GCN-NOHSA-DAG: buffer_store_dwordx4
286; GCN-NOHSA-DAG: buffer_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000287
Marek Olsak355a8642016-08-05 21:23:29 +0000288; GCN-NOHSA-DAG: buffer_store_dwordx4
289; GCN-NOHSA-DAG: buffer_store_dwordx4
290; GCN-NOHSA-DAG: buffer_store_dwordx4
291; GCN-NOHSA-DAG: buffer_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000292
Marek Olsak355a8642016-08-05 21:23:29 +0000293; GCN-NOHSA-DAG: buffer_store_dwordx4
294; GCN-NOHSA-DAG: buffer_store_dwordx4
295; GCN-NOHSA-DAG: buffer_store_dwordx4
296; GCN-NOHSA-DAG: buffer_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000297
Marek Olsak355a8642016-08-05 21:23:29 +0000298; GCN-NOHSA-DAG: buffer_store_dwordx4
299; GCN-NOHSA-DAG: buffer_store_dwordx4
300; GCN-NOHSA-DAG: buffer_store_dwordx4
301; GCN-NOHSA-DAG: buffer_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000302
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000303; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
304; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
305; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
306; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000307
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000308; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
309; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
310; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
311; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000312
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000313; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
314; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
315; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
316; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000317
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000318; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
319; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
320; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
321; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000322
Yaxun Liu0124b542018-02-13 18:00:25 +0000323define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
324 %ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000325 %ext = sext <32 x i32> %ld to <32 x i64>
326 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
327 ret void
328}
329
330; FUNC-LABEL: {{^}}constant_zextload_v32i32_to_v32i64:
331; GCN: s_load_dwordx16
332; GCN: s_load_dwordx16
333
334; GCN-NOHSA-DAG: buffer_store_dwordx4
335; GCN-NOHSA-DAG: buffer_store_dwordx4
336; GCN-NOHSA-DAG: buffer_store_dwordx4
337; GCN-NOHSA-DAG: buffer_store_dwordx4
338
339; GCN-NOHSA-DAG: buffer_store_dwordx4
340; GCN-NOHSA-DAG: buffer_store_dwordx4
341; GCN-NOHSA-DAG: buffer_store_dwordx4
342; GCN-NOHSA-DAG: buffer_store_dwordx4
343
344; GCN-NOHSA-DAG: buffer_store_dwordx4
345; GCN-NOHSA-DAG: buffer_store_dwordx4
346; GCN-NOHSA-DAG: buffer_store_dwordx4
347; GCN-NOHSA-DAG: buffer_store_dwordx4
348
349; GCN-NOHSA-DAG: buffer_store_dwordx4
350; GCN-NOHSA-DAG: buffer_store_dwordx4
351; GCN-NOHSA-DAG: buffer_store_dwordx4
352; GCN-NOHSA-DAG: buffer_store_dwordx4
353
354
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000355; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
356; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
357; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
358; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000359
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000360; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
361; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
362; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
363; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000364
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000365; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
366; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
367; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
368; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000369
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000370; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
371; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
372; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
373; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
Yaxun Liu0124b542018-02-13 18:00:25 +0000374define amdgpu_kernel void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
375 %ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000376 %ext = zext <32 x i32> %ld to <32 x i64>
377 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
378 ret void
379}
380
Stanislav Mekhanoshin44451b32018-08-31 22:43:36 +0000381; FUNC-LABEL: {{^}}constant_load_v32i32:
382; GCN: s_load_dwordx16
383; GCN: s_load_dwordx16
384
385; GCN-NOHSA-DAG: buffer_store_dwordx4
386; GCN-NOHSA-DAG: buffer_store_dwordx4
387; GCN-NOHSA-DAG: buffer_store_dwordx4
388; GCN-NOHSA-DAG: buffer_store_dwordx4
389
390; GCN-NOHSA-DAG: buffer_store_dwordx4
391; GCN-NOHSA-DAG: buffer_store_dwordx4
392; GCN-NOHSA-DAG: buffer_store_dwordx4
393; GCN-NOHSA-DAG: buffer_store_dwordx4
394
395; GCN-NOHSA-DAG: buffer_store_dwordx4
396; GCN-NOHSA-DAG: buffer_store_dwordx4
397; GCN-NOHSA-DAG: buffer_store_dwordx4
398; GCN-NOHSA-DAG: buffer_store_dwordx4
399
400; GCN-NOHSA-DAG: buffer_store_dwordx4
401; GCN-NOHSA-DAG: buffer_store_dwordx4
402; GCN-NOHSA-DAG: buffer_store_dwordx4
403; GCN-NOHSA-DAG: buffer_store_dwordx4
404
405; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
406; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
407; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
408; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
409
410; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
411; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
412; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
413; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
414
415; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
416; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
417; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
418; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
419
420; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
421; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
422; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
423; GCN-HSA-DAG: {{flat|global}}_store_dwordx4
424define amdgpu_kernel void @constant_load_v32i32(<32 x i32> addrspace(1)* %out, <32 x i32> addrspace(4)* %in) #0 {
425 %ld = load <32 x i32>, <32 x i32> addrspace(4)* %in
426 store <32 x i32> %ld, <32 x i32> addrspace(1)* %out
427 ret void
428}
429
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000430attributes #0 = { nounwind }