blob: 5df32c1e3120a60518a8f76f1bd215a446f82bd0 [file] [log] [blame]
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00003; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00004; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00005
6
7; FUNC-LABEL: {{^}}global_load_i32:
8; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
9; GCN-HSA: flat_load_dword
10
11; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000013entry:
14 %ld = load i32, i32 addrspace(1)* %in
15 store i32 %ld, i32 addrspace(1)* %out
16 ret void
17}
18
19; FUNC-LABEL: {{^}}global_load_v2i32:
20; GCN-NOHSA: buffer_load_dwordx2
21; GCN-HSA: flat_load_dwordx2
22
23; EG: VTX_READ_64
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000024define amdgpu_kernel void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000025entry:
26 %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
27 store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
28 ret void
29}
30
31; FUNC-LABEL: {{^}}global_load_v3i32:
32; GCN-NOHSA: buffer_load_dwordx4
33; GCN-HSA: flat_load_dwordx4
34
35; EG: VTX_READ_128
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000036define amdgpu_kernel void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000037entry:
38 %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
39 store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
40 ret void
41}
42
43; FUNC-LABEL: {{^}}global_load_v4i32:
44; GCN-NOHSA: buffer_load_dwordx4
45; GCN-HSA: flat_load_dwordx4
46
47; EG: VTX_READ_128
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000048define amdgpu_kernel void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000049entry:
50 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
51 store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
52 ret void
53}
54
55; FUNC-LABEL: {{^}}global_load_v8i32:
56; GCN-NOHSA: buffer_load_dwordx4
57; GCN-NOHSA: buffer_load_dwordx4
58; GCN-HSA: flat_load_dwordx4
59; GCN-HSA: flat_load_dwordx4
60
61; EG: VTX_READ_128
62; EG: VTX_READ_128
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000063define amdgpu_kernel void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000064entry:
65 %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
66 store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
67 ret void
68}
69
70; FUNC-LABEL: {{^}}global_load_v16i32:
71; GCN-NOHSA: buffer_load_dwordx4
72; GCN-NOHSA: buffer_load_dwordx4
73; GCN-NOHSA: buffer_load_dwordx4
74; GCN-NOHSA: buffer_load_dwordx4
75
76; GCN-HSA: flat_load_dwordx4
77; GCN-HSA: flat_load_dwordx4
78; GCN-HSA: flat_load_dwordx4
79; GCN-HSA: flat_load_dwordx4
80
81; EG: VTX_READ_128
82; EG: VTX_READ_128
83; EG: VTX_READ_128
84; EG: VTX_READ_128
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000085define amdgpu_kernel void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000086entry:
87 %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
88 store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
89 ret void
90}
91
92; FUNC-LABEL: {{^}}global_zextload_i32_to_i64:
93; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]],
94; GCN-HSA-DAG: flat_load_dword v[[LO:[0-9]+]],
95; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
96
97; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
98; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
99
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000100; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000101define amdgpu_kernel void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000102 %ld = load i32, i32 addrspace(1)* %in
103 %ext = zext i32 %ld to i64
104 store i64 %ext, i64 addrspace(1)* %out
105 ret void
106}
107
108; FUNC-LABEL: {{^}}global_sextload_i32_to_i64:
109; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
110; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
111; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
112; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
113; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
114
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000115
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000116; EG: MEM_RAT
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000117; EG: VTX_READ_32
118; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000119; EG: 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000120define amdgpu_kernel void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000121 %ld = load i32, i32 addrspace(1)* %in
122 %ext = sext i32 %ld to i64
123 store i64 %ext, i64 addrspace(1)* %out
124 ret void
125}
126
127; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64:
128; GCN-NOHSA: buffer_load_dword
129; GCN-NOHSA: buffer_store_dwordx2
130
131; GCN-HSA: flat_load_dword
132; GCN-HSA: flat_store_dwordx2
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000133define amdgpu_kernel void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000134 %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
135 %ext = zext <1 x i32> %ld to <1 x i64>
136 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
137 ret void
138}
139
140; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64:
141; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
142; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
143; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
144; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
145; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000146define amdgpu_kernel void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000147 %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
148 %ext = sext <1 x i32> %ld to <1 x i64>
149 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
150 ret void
151}
152
153; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64:
154; GCN-NOHSA: buffer_load_dwordx2
155; GCN-NOHSA: buffer_store_dwordx4
156
157; GCN-HSA: flat_load_dwordx2
158; GCN-HSA: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000159define amdgpu_kernel void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000160 %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
161 %ext = zext <2 x i32> %ld to <2 x i64>
162 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
163 ret void
164}
165
166; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64:
167; GCN-NOHSA: buffer_load_dwordx2
168; GCN-HSA: flat_load_dwordx2
169
170; GCN-DAG: v_ashrrev_i32
171; GCN-DAG: v_ashrrev_i32
172
173; GCN-NOHSA-DAG: buffer_store_dwordx4
174; GCN-HSA-DAG: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000175define amdgpu_kernel void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000176 %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
177 %ext = sext <2 x i32> %ld to <2 x i64>
178 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
179 ret void
180}
181
182; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64:
183; GCN-NOHSA: buffer_load_dwordx4
184; GCN-NOHSA: buffer_store_dwordx4
185; GCN-NOHSA: buffer_store_dwordx4
186
187; GCN-HSA: flat_load_dwordx4
188; GCN-HSA: flat_store_dwordx4
189; GCN-HSA: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000190define amdgpu_kernel void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000191 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
192 %ext = zext <4 x i32> %ld to <4 x i64>
193 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
194 ret void
195}
196
197; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64:
198; GCN-NOHSA: buffer_load_dwordx4
199; GCN-HSA: flat_load_dwordx4
200
201; GCN-DAG: v_ashrrev_i32
202; GCN-DAG: v_ashrrev_i32
203; GCN-DAG: v_ashrrev_i32
204; GCN-DAG: v_ashrrev_i32
205
206; GCN-NOHSA-DAG: buffer_store_dwordx4
207; GCN-NOHSA-DAG: buffer_store_dwordx4
208
209; GCN-HSA-DAG: flat_store_dwordx4
210; GCN-HSA-DAG: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000211define amdgpu_kernel void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000212 %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
213 %ext = sext <4 x i32> %ld to <4 x i64>
214 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
215 ret void
216}
217
218; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64:
219; GCN-NOHSA: buffer_load_dwordx4
220; GCN-NOHSA: buffer_load_dwordx4
221
222; GCN-HSA: flat_load_dwordx4
223; GCN-HSA: flat_load_dwordx4
224
225; GCN-NOHSA-DAG: buffer_store_dwordx4
226; GCN-NOHSA-DAG: buffer_store_dwordx4
227; GCN-NOHSA-DAG: buffer_store_dwordx4
228; GCN-NOHSA-DAG: buffer_store_dwordx4
229
230; GCN-HSA-DAG: flat_store_dwordx4
231; GCN-HSA-DAG: flat_store_dwordx4
232; GCN-SA-DAG: flat_store_dwordx4
233; GCN-HSA-DAG: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000234define amdgpu_kernel void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000235 %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
236 %ext = zext <8 x i32> %ld to <8 x i64>
237 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
238 ret void
239}
240
241; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64:
242; GCN-NOHSA: buffer_load_dwordx4
243; GCN-NOHSA: buffer_load_dwordx4
244
245; GCN-HSA: flat_load_dwordx4
246; GCN-HSA: flat_load_dwordx4
247
248; GCN-DAG: v_ashrrev_i32
249; GCN-DAG: v_ashrrev_i32
250; GCN-DAG: v_ashrrev_i32
251; GCN-DAG: v_ashrrev_i32
252; GCN-DAG: v_ashrrev_i32
253; GCN-DAG: v_ashrrev_i32
254; GCN-DAG: v_ashrrev_i32
255; GCN-DAG: v_ashrrev_i32
256
257; GCN-NOHSA-DAG: buffer_store_dwordx4
258; GCN-NOHSA-DAG: buffer_store_dwordx4
259; GCN-NOHSA-DAG: buffer_store_dwordx4
260; GCN-NOHSA-DAG: buffer_store_dwordx4
261
262; GCN-HSA-DAG: flat_store_dwordx4
263; GCN-HSA-DAG: flat_store_dwordx4
264; GCN-HSA-DAG: flat_store_dwordx4
265; GCN-HSA-DAG: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000266define amdgpu_kernel void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000267 %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
268 %ext = sext <8 x i32> %ld to <8 x i64>
269 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
270 ret void
271}
272
273; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64:
274; GCN-NOHSA: buffer_load_dwordx4
275; GCN-NOHSA: buffer_load_dwordx4
276; GCN-NOHSA: buffer_load_dwordx4
277; GCN-NOHSA: buffer_load_dwordx4
278
279; GCN-HSA: flat_load_dwordx4
280; GCN-HSA: flat_load_dwordx4
281; GCN-HSA: flat_load_dwordx4
282; GCN-HSA: flat_load_dwordx4
283
284
285; GCN-DAG: v_ashrrev_i32
286; GCN-DAG: v_ashrrev_i32
287; GCN-DAG: v_ashrrev_i32
288; GCN-DAG: v_ashrrev_i32
289; GCN-NOHSA-DAG: buffer_store_dwordx4
290; GCN-HSA-DAG: flat_store_dwordx4
291
292; GCN-DAG: v_ashrrev_i32
293; GCN-DAG: v_ashrrev_i32
294; GCN-DAG: v_ashrrev_i32
295; GCN-DAG: v_ashrrev_i32
296; GCN-NOHSA-DAG: buffer_store_dwordx4
297; GCN-HSA-DAG: flat_store_dwordx4
298
299; GCN-DAG: v_ashrrev_i32
300; GCN-DAG: v_ashrrev_i32
301; GCN-DAG: v_ashrrev_i32
302; GCN-DAG: v_ashrrev_i32
303; GCN-NOHSA-DAG: buffer_store_dwordx4
304; GCN-HSA-DAG: flat_store_dwordx4
305
306; GCN-DAG: v_ashrrev_i32
307; GCN-DAG: v_ashrrev_i32
308; GCN-DAG: v_ashrrev_i32
309; GCN-DAG: v_ashrrev_i32
310; GCN-NOHSA-DAG: buffer_store_dwordx4
311; GCN-HSA-DAG: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000312define amdgpu_kernel void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000313 %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
314 %ext = sext <16 x i32> %ld to <16 x i64>
315 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
316 ret void
317}
318
319; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64
320; GCN-NOHSA: buffer_load_dwordx4
321; GCN-NOHSA: buffer_load_dwordx4
322; GCN-NOHSA: buffer_load_dwordx4
323; GCN-NOHSA: buffer_load_dwordx4
324
325; GCN-HSA: flat_load_dwordx4
326; GCN-HSA: flat_load_dwordx4
327; GCN-HSA: flat_load_dwordx4
328; GCN-HSA: flat_load_dwordx4
329
330; GCN-NOHSA: buffer_store_dwordx4
331; GCN-NOHSA: buffer_store_dwordx4
332; GCN-NOHSA: buffer_store_dwordx4
333; GCN-NOHSA: buffer_store_dwordx4
334; GCN-NOHSA: buffer_store_dwordx4
335; GCN-NOHSA: buffer_store_dwordx4
336; GCN-NOHSA: buffer_store_dwordx4
337; GCN-NOHSA: buffer_store_dwordx4
338
339; GCN-HSA: flat_store_dwordx4
340; GCN-HSA: flat_store_dwordx4
341; GCN-HSA: flat_store_dwordx4
342; GCN-HSA: flat_store_dwordx4
343; GCN-HSA: flat_store_dwordx4
344; GCN-HSA: flat_store_dwordx4
345; GCN-HSA: flat_store_dwordx4
346; GCN-HSA: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000347define amdgpu_kernel void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000348 %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
349 %ext = zext <16 x i32> %ld to <16 x i64>
350 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
351 ret void
352}
353
354; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64:
355
356; GCN-NOHSA: buffer_load_dwordx4
357; GCN-NOHSA: buffer_load_dwordx4
358; GCN-NOHSA: buffer_load_dwordx4
359; GCN-NOHSA: buffer_load_dwordx4
360; GCN-NOHSA: buffer_load_dwordx4
361; GCN-NOHSA: buffer_load_dwordx4
362; GCN-NOHSA: buffer_load_dwordx4
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000363; GCN-NOHSA-DAG: buffer_load_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000364
365; GCN-HSA: flat_load_dwordx4
366; GCN-HSA: flat_load_dwordx4
367; GCN-HSA: flat_load_dwordx4
368; GCN-HSA: flat_load_dwordx4
369; GCN-HSA: flat_load_dwordx4
370; GCN-HSA: flat_load_dwordx4
371; GCN-HSA: flat_load_dwordx4
372; GCN-HSA: flat_load_dwordx4
373
374; GCN-DAG: v_ashrrev_i32
375; GCN-DAG: v_ashrrev_i32
376; GCN-DAG: v_ashrrev_i32
377; GCN-DAG: v_ashrrev_i32
378; GCN-DAG: v_ashrrev_i32
379; GCN-DAG: v_ashrrev_i32
380; GCN-DAG: v_ashrrev_i32
381; GCN-DAG: v_ashrrev_i32
382; GCN-DAG: v_ashrrev_i32
383; GCN-DAG: v_ashrrev_i32
384; GCN-DAG: v_ashrrev_i32
385; GCN-DAG: v_ashrrev_i32
386; GCN-DAG: v_ashrrev_i32
387; GCN-DAG: v_ashrrev_i32
388; GCN-DAG: v_ashrrev_i32
389; GCN-DAG: v_ashrrev_i32
390; GCN-DAG: v_ashrrev_i32
391; GCN-DAG: v_ashrrev_i32
392; GCN-DAG: v_ashrrev_i32
393; GCN-DAG: v_ashrrev_i32
394; GCN-DAG: v_ashrrev_i32
395; GCN-DAG: v_ashrrev_i32
396; GCN-DAG: v_ashrrev_i32
397; GCN-DAG: v_ashrrev_i32
398; GCN-DAG: v_ashrrev_i32
399; GCN-DAG: v_ashrrev_i32
400; GCN-DAG: v_ashrrev_i32
401; GCN-DAG: v_ashrrev_i32
402; GCN-DAG: v_ashrrev_i32
403; GCN-DAG: v_ashrrev_i32
404; GCN-DAG: v_ashrrev_i32
405; GCN-DAG: v_ashrrev_i32
406
407; GCN-NOHSA: buffer_store_dwordx4
408; GCN-NOHSA: buffer_store_dwordx4
409; GCN-NOHSA: buffer_store_dwordx4
410; GCN-NOHSA: buffer_store_dwordx4
411
412; GCN-NOHSA: buffer_store_dwordx4
413; GCN-NOHSA: buffer_store_dwordx4
414; GCN-NOHSA: buffer_store_dwordx4
415; GCN-NOHSA: buffer_store_dwordx4
416
417; GCN-NOHSA: buffer_store_dwordx4
418; GCN-NOHSA: buffer_store_dwordx4
419; GCN-NOHSA: buffer_store_dwordx4
420; GCN-NOHSA: buffer_store_dwordx4
421
422; GCN-NOHSA: buffer_store_dwordx4
423; GCN-NOHSA: buffer_store_dwordx4
424; GCN-NOHSA: buffer_store_dwordx4
425; GCN-NOHSA: buffer_store_dwordx4
426
Stanislav Mekhanoshin42259cf2017-02-24 21:56:16 +0000427; GCN-HSA: flat_store_dwordx4
428; GCN-HSA: flat_store_dwordx4
429; GCN-HSA: flat_store_dwordx4
430; GCN-HSA: flat_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000431
Stanislav Mekhanoshin42259cf2017-02-24 21:56:16 +0000432; GCN-HSA: flat_store_dwordx4
433; GCN-HSA: flat_store_dwordx4
434; GCN-HSA: flat_store_dwordx4
435; GCN-HSA: flat_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000436
Stanislav Mekhanoshin42259cf2017-02-24 21:56:16 +0000437; GCN-HSA: flat_store_dwordx4
438; GCN-HSA: flat_store_dwordx4
439; GCN-HSA: flat_store_dwordx4
440; GCN-HSA: flat_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000441
Stanislav Mekhanoshin42259cf2017-02-24 21:56:16 +0000442; GCN-HSA: flat_store_dwordx4
443; GCN-HSA: flat_store_dwordx4
444; GCN-HSA: flat_store_dwordx4
445; GCN-HSA: flat_store_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000446
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000447define amdgpu_kernel void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000448 %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
449 %ext = sext <32 x i32> %ld to <32 x i64>
450 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
451 ret void
452}
453
454; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64:
455; GCN-NOHSA: buffer_load_dwordx4
456; GCN-NOHSA: buffer_load_dwordx4
457; GCN-NOHSA: buffer_load_dwordx4
458; GCN-NOHSA: buffer_load_dwordx4
459; GCN-NOHSA: buffer_load_dwordx4
460; GCN-NOHSA: buffer_load_dwordx4
461; GCN-NOHSA: buffer_load_dwordx4
462; GCN-NOHSA: buffer_load_dwordx4
463
464; GCN-HSA: flat_load_dwordx4
465; GCN-HSA: flat_load_dwordx4
466; GCN-HSA: flat_load_dwordx4
467; GCN-HSA: flat_load_dwordx4
468; GCN-HSA: flat_load_dwordx4
469; GCN-HSA: flat_load_dwordx4
470; GCN-HSA: flat_load_dwordx4
471; GCN-HSA: flat_load_dwordx4
472
473
474; GCN-NOHSA-DAG: buffer_store_dwordx4
475; GCN-NOHSA-DAG: buffer_store_dwordx4
476; GCN-NOHSA-DAG: buffer_store_dwordx4
477; GCN-NOHSA-DAG: buffer_store_dwordx4
478
479; GCN-NOHSA-DAG: buffer_store_dwordx4
480; GCN-NOHSA-DAG: buffer_store_dwordx4
481; GCN-NOHSA-DAG: buffer_store_dwordx4
482; GCN-NOHSA-DAG: buffer_store_dwordx4
483
484; GCN-NOHSA-DAG: buffer_store_dwordx4
485; GCN-NOHSA-DAG: buffer_store_dwordx4
486; GCN-NOHSA-DAG: buffer_store_dwordx4
487; GCN-NOHSA-DAG: buffer_store_dwordx4
488
489; GCN-NOHSA-DAG: buffer_store_dwordx4
490; GCN-NOHSA-DAG: buffer_store_dwordx4
491; GCN-NOHSA-DAG: buffer_store_dwordx4
492; GCN-NOHSA-DAG: buffer_store_dwordx4
493
494
495; GCN-HSA-DAG: flat_store_dwordx4
496; GCN-HSA-DAG: flat_store_dwordx4
497; GCN-HSA-DAG: flat_store_dwordx4
498; GCN-HSA-DAG: flat_store_dwordx4
499
500; GCN-HSA-DAG: flat_store_dwordx4
501; GCN-HSA-DAG: flat_store_dwordx4
502; GCN-HSA-DAG: flat_store_dwordx4
503; GCN-HSA-DAG: flat_store_dwordx4
504
505; GCN-HSA-DAG: flat_store_dwordx4
506; GCN-HSA-DAG: flat_store_dwordx4
507; GCN-HSA-DAG: flat_store_dwordx4
508; GCN-HSA-DAG: flat_store_dwordx4
509
510; GCN-HSA-DAG: flat_store_dwordx4
511; GCN-HSA-DAG: flat_store_dwordx4
512; GCN-HSA-DAG: flat_store_dwordx4
513; GCN-HSA-DAG: flat_store_dwordx4
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000514define amdgpu_kernel void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000515 %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
516 %ext = zext <32 x i32> %ld to <32 x i64>
517 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
518 ret void
519}
520
521attributes #0 = { nounwind }