blob: d621c815b0dad3b59c6f5d005f07b337eae923a1 [file] [log] [blame]
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6
7; FIXME: r600 is broken because the bigger testcases spill and it's not implemented
8
9; FUNC-LABEL: {{^}}global_load_i16:
10; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
11; GCN-HSA: flat_load_ushort
12
13; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
14define void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
15entry:
16 %ld = load i16, i16 addrspace(1)* %in
17 store i16 %ld, i16 addrspace(1)* %out
18 ret void
19}
20
21; FUNC-LABEL: {{^}}global_load_v2i16:
22; GCN-NOHSA: buffer_load_dword v
23; GCN-HSA: flat_load_dword v
24
25; EG: VTX_READ_32
26define void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
27entry:
28 %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
29 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
30 ret void
31}
32
33; FUNC-LABEL: {{^}}global_load_v3i16:
34; GCN-NOHSA: buffer_load_dwordx2 v
35; GCN-HSA: flat_load_dwordx2 v
36
37; EG-DAG: VTX_READ_32
38; EG-DAG: VTX_READ_16
39define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
40entry:
41 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
42 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
43 ret void
44}
45
46; FUNC-LABEL: {{^}}global_load_v4i16:
47; GCN-NOHSA: buffer_load_dwordx2
48; GCN-HSA: flat_load_dwordx2
49
50; EG: VTX_READ_64
51define void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
52entry:
53 %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
54 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
55 ret void
56}
57
58; FUNC-LABEL: {{^}}global_load_v8i16:
59; GCN-NOHSA: buffer_load_dwordx4
60; GCN-HSA: flat_load_dwordx4
61
62; EG: VTX_READ_128
63define void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) {
64entry:
65 %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in
66 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
67 ret void
68}
69
70; FUNC-LABEL: {{^}}global_load_v16i16:
71; GCN-NOHSA: buffer_load_dwordx4
72; GCN-NOHSA: buffer_load_dwordx4
73
74; GCN-HSA: flat_load_dwordx4
75; GCN-HSA: flat_load_dwordx4
76
77; EG: VTX_READ_128
78; EG: VTX_READ_128
79define void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) {
80entry:
81 %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in
82 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
83 ret void
84}
85
86; FUNC-LABEL: {{^}}global_zextload_i16_to_i32:
87; GCN-NOHSA: buffer_load_ushort
88; GCN-NOHSA: buffer_store_dword
89
90; GCN-HSA: flat_load_ushort
91; GCN-HSA: flat_store_dword
92
93; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
94define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
95 %a = load i16, i16 addrspace(1)* %in
96 %ext = zext i16 %a to i32
97 store i32 %ext, i32 addrspace(1)* %out
98 ret void
99}
100
101; FUNC-LABEL: {{^}}global_sextload_i16_to_i32:
102; GCN-NOHSA: buffer_load_sshort
103; GCN-NOHSA: buffer_store_dword
104
105; GCN-HSA: flat_load_sshort
106; GCN-HSA: flat_store_dword
107
108; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
109; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
110; EG: 16
111define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
112 %a = load i16, i16 addrspace(1)* %in
113 %ext = sext i16 %a to i32
114 store i32 %ext, i32 addrspace(1)* %out
115 ret void
116}
117
118; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i32:
119; GCN-NOHSA: buffer_load_ushort
120; GCN-HSA: flat_load_ushort
121define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
122 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
123 %ext = zext <1 x i16> %load to <1 x i32>
124 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
125 ret void
126}
127
128; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i32:
129; GCN-NOHSA: buffer_load_sshort
130; GCN-HSA: flat_load_sshort
131define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
132 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
133 %ext = sext <1 x i16> %load to <1 x i32>
134 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
135 ret void
136}
137
138; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i32:
139; GCN-NOHSA: buffer_load_ushort
140; GCN-NOHSA: buffer_load_ushort
141; GCN-HSA: flat_load_ushort
142; GCN-HSA: flat_load_ushort
143define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
144 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
145 %ext = zext <2 x i16> %load to <2 x i32>
146 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
147 ret void
148}
149
150; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i32:
151; GCN-NOHSA: buffer_load_sshort
152; GCN-NOHSA: buffer_load_sshort
153
154; GCN-HSA: flat_load_sshort
155; GCN-HSA: flat_load_sshort
156
157; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
158; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
159; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
160; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
161; EG-DAG: 16
162; EG-DAG: 16
163define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
164 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
165 %ext = sext <2 x i16> %load to <2 x i32>
166 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
167 ret void
168}
169
170; FUNC-LABEL: {{^}}global_global_zextload_v3i16_to_v3i32:
171; GCN-NOHSA: buffer_load_dwordx2
172; GCN-HSA: flat_load_dwordx2
173define void @global_global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
174entry:
175 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
176 %ext = zext <3 x i16> %ld to <3 x i32>
177 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
178 ret void
179}
180
181; FUNC-LABEL: {{^}}global_global_sextload_v3i16_to_v3i32:
182; GCN-NOHSA: buffer_load_dwordx2
183; GCN-HSA: flat_load_dwordx2
184define void @global_global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
185entry:
186 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
187 %ext = sext <3 x i16> %ld to <3 x i32>
188 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
189 ret void
190}
191
192; FUNC-LABEL: {{^}}global_global_zextload_v4i16_to_v4i32:
193; GCN-NOHSA: buffer_load_ushort
194; GCN-NOHSA: buffer_load_ushort
195; GCN-NOHSA: buffer_load_ushort
196; GCN-NOHSA: buffer_load_ushort
197
198; GCN-HSA: flat_load_ushort
199; GCN-HSA: flat_load_ushort
200; GCN-HSA: flat_load_ushort
201; GCN-HSA: flat_load_ushort
202
203; EG: VTX_READ_16
204; EG: VTX_READ_16
205; EG: VTX_READ_16
206; EG: VTX_READ_16
207define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
208 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
209 %ext = zext <4 x i16> %load to <4 x i32>
210 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
211 ret void
212}
213
214; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i32:
215; GCN-NOHSA: buffer_load_sshort
216; GCN-NOHSA: buffer_load_sshort
217; GCN-NOHSA: buffer_load_sshort
218; GCN-NOHSA: buffer_load_sshort
219
220; GCN-HSA: flat_load_sshort
221; GCN-HSA: flat_load_sshort
222; GCN-HSA: flat_load_sshort
223; GCN-HSA: flat_load_sshort
224
225; EG-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
226; EG-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
227; EG-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
228; EG-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
229; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
230; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
231; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
232; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
233; EG-DAG: 16
234; EG-DAG: 16
235; EG-DAG: 16
236; EG-DAG: 16
237define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
238 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
239 %ext = sext <4 x i16> %load to <4 x i32>
240 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
241 ret void
242}
243
244; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i32:
245; GCN-NOHSA: buffer_load_ushort
246; GCN-NOHSA: buffer_load_ushort
247; GCN-NOHSA: buffer_load_ushort
248; GCN-NOHSA: buffer_load_ushort
249; GCN-NOHSA: buffer_load_ushort
250; GCN-NOHSA: buffer_load_ushort
251; GCN-NOHSA: buffer_load_ushort
252; GCN-NOHSA: buffer_load_ushort
253
254; GCN-HSA: flat_load_ushort
255; GCN-HSA: flat_load_ushort
256; GCN-HSA: flat_load_ushort
257; GCN-HSA: flat_load_ushort
258; GCN-HSA: flat_load_ushort
259; GCN-HSA: flat_load_ushort
260; GCN-HSA: flat_load_ushort
261; GCN-HSA: flat_load_ushort
262define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
263 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
264 %ext = zext <8 x i16> %load to <8 x i32>
265 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
266 ret void
267}
268
269; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i32:
270; GCN-NOHSA: buffer_load_sshort
271; GCN-NOHSA: buffer_load_sshort
272; GCN-NOHSA: buffer_load_sshort
273; GCN-NOHSA: buffer_load_sshort
274; GCN-NOHSA: buffer_load_sshort
275; GCN-NOHSA: buffer_load_sshort
276; GCN-NOHSA: buffer_load_sshort
277; GCN-NOHSA: buffer_load_sshort
278
279; GCN-HSA: flat_load_sshort
280; GCN-HSA: flat_load_sshort
281; GCN-HSA: flat_load_sshort
282; GCN-HSA: flat_load_sshort
283; GCN-HSA: flat_load_sshort
284; GCN-HSA: flat_load_sshort
285; GCN-HSA: flat_load_sshort
286; GCN-HSA: flat_load_sshort
287define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
288 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
289 %ext = sext <8 x i16> %load to <8 x i32>
290 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
291 ret void
292}
293
294; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i32:
295; GCN-NOHSA: buffer_load_ushort
296; GCN-NOHSA: buffer_load_ushort
297; GCN-NOHSA: buffer_load_ushort
298; GCN-NOHSA: buffer_load_ushort
299; GCN-NOHSA: buffer_load_ushort
300; GCN-NOHSA: buffer_load_ushort
301; GCN-NOHSA: buffer_load_ushort
302; GCN-NOHSA: buffer_load_ushort
303; GCN-NOHSA: buffer_load_ushort
304; GCN-NOHSA: buffer_load_ushort
305; GCN-NOHSA: buffer_load_ushort
306; GCN-NOHSA: buffer_load_ushort
307; GCN-NOHSA: buffer_load_ushort
308; GCN-NOHSA: buffer_load_ushort
309; GCN-NOHSA: buffer_load_ushort
310; GCN-NOHSA: buffer_load_ushort
311
312; GCN-HSA: flat_load_ushort
313; GCN-HSA: flat_load_ushort
314; GCN-HSA: flat_load_ushort
315; GCN-HSA: flat_load_ushort
316; GCN-HSA: flat_load_ushort
317; GCN-HSA: flat_load_ushort
318; GCN-HSA: flat_load_ushort
319; GCN-HSA: flat_load_ushort
320; GCN-HSA: flat_load_ushort
321; GCN-HSA: flat_load_ushort
322; GCN-HSA: flat_load_ushort
323; GCN-HSA: flat_load_ushort
324; GCN-HSA: flat_load_ushort
325; GCN-HSA: flat_load_ushort
326; GCN-HSA: flat_load_ushort
327; GCN-HSA: flat_load_ushort
328define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
329 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
330 %ext = zext <16 x i16> %load to <16 x i32>
331 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
332 ret void
333}
334
335; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i32:
336define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
337 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
338 %ext = sext <16 x i16> %load to <16 x i32>
339 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
340 ret void
341}
342
343; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i32:
344; GCN-NOHSA: buffer_load_ushort
345; GCN-NOHSA: buffer_load_ushort
346; GCN-NOHSA: buffer_load_ushort
347; GCN-NOHSA: buffer_load_ushort
348; GCN-NOHSA: buffer_load_ushort
349; GCN-NOHSA: buffer_load_ushort
350; GCN-NOHSA: buffer_load_ushort
351; GCN-NOHSA: buffer_load_ushort
352; GCN-NOHSA: buffer_load_ushort
353; GCN-NOHSA: buffer_load_ushort
354; GCN-NOHSA: buffer_load_ushort
355; GCN-NOHSA: buffer_load_ushort
356; GCN-NOHSA: buffer_load_ushort
357; GCN-NOHSA: buffer_load_ushort
358; GCN-NOHSA: buffer_load_ushort
359; GCN-NOHSA: buffer_load_ushort
360; GCN-NOHSA: buffer_load_ushort
361; GCN-NOHSA: buffer_load_ushort
362; GCN-NOHSA: buffer_load_ushort
363; GCN-NOHSA: buffer_load_ushort
364; GCN-NOHSA: buffer_load_ushort
365; GCN-NOHSA: buffer_load_ushort
366; GCN-NOHSA: buffer_load_ushort
367; GCN-NOHSA: buffer_load_ushort
368; GCN-NOHSA: buffer_load_ushort
369; GCN-NOHSA: buffer_load_ushort
370; GCN-NOHSA: buffer_load_ushort
371; GCN-NOHSA: buffer_load_ushort
372; GCN-NOHSA: buffer_load_ushort
373; GCN-NOHSA: buffer_load_ushort
374; GCN-NOHSA: buffer_load_ushort
375; GCN-NOHSA: buffer_load_ushort
376
377; GCN-HSA: flat_load_ushort
378; GCN-HSA: flat_load_ushort
379; GCN-HSA: flat_load_ushort
380; GCN-HSA: flat_load_ushort
381; GCN-HSA: flat_load_ushort
382; GCN-HSA: flat_load_ushort
383; GCN-HSA: flat_load_ushort
384; GCN-HSA: flat_load_ushort
385; GCN-HSA: flat_load_ushort
386; GCN-HSA: flat_load_ushort
387; GCN-HSA: flat_load_ushort
388; GCN-HSA: flat_load_ushort
389; GCN-HSA: flat_load_ushort
390; GCN-HSA: flat_load_ushort
391; GCN-HSA: flat_load_ushort
392; GCN-HSA: flat_load_ushort
393; GCN-HSA: flat_load_ushort
394; GCN-HSA: flat_load_ushort
395; GCN-HSA: flat_load_ushort
396; GCN-HSA: flat_load_ushort
397; GCN-HSA: flat_load_ushort
398; GCN-HSA: flat_load_ushort
399; GCN-HSA: flat_load_ushort
400; GCN-HSA: flat_load_ushort
401; GCN-HSA: flat_load_ushort
402; GCN-HSA: flat_load_ushort
403; GCN-HSA: flat_load_ushort
404; GCN-HSA: flat_load_ushort
405; GCN-HSA: flat_load_ushort
406; GCN-HSA: flat_load_ushort
407; GCN-HSA: flat_load_ushort
408; GCN-HSA: flat_load_ushort
409define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
410 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
411 %ext = zext <32 x i16> %load to <32 x i32>
412 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
413 ret void
414}
415
416; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i32:
417; GCN-NOHSA: buffer_load_sshort
418; GCN-NOHSA: buffer_load_sshort
419; GCN-NOHSA: buffer_load_sshort
420; GCN-NOHSA: buffer_load_sshort
421; GCN-NOHSA: buffer_load_sshort
422; GCN-NOHSA: buffer_load_sshort
423; GCN-NOHSA: buffer_load_sshort
424; GCN-NOHSA: buffer_load_sshort
425; GCN-NOHSA: buffer_load_sshort
426; GCN-NOHSA: buffer_load_sshort
427; GCN-NOHSA: buffer_load_sshort
428; GCN-NOHSA: buffer_load_sshort
429; GCN-NOHSA: buffer_load_sshort
430; GCN-NOHSA: buffer_load_sshort
431; GCN-NOHSA: buffer_load_sshort
432; GCN-NOHSA: buffer_load_sshort
433; GCN-NOHSA: buffer_load_sshort
434; GCN-NOHSA: buffer_load_sshort
435; GCN-NOHSA: buffer_load_sshort
436; GCN-NOHSA: buffer_load_sshort
437; GCN-NOHSA: buffer_load_sshort
438; GCN-NOHSA: buffer_load_sshort
439; GCN-NOHSA: buffer_load_sshort
440; GCN-NOHSA: buffer_load_sshort
441; GCN-NOHSA: buffer_load_sshort
442; GCN-NOHSA: buffer_load_sshort
443; GCN-NOHSA: buffer_load_sshort
444; GCN-NOHSA: buffer_load_sshort
445; GCN-NOHSA: buffer_load_sshort
446; GCN-NOHSA: buffer_load_sshort
447; GCN-NOHSA: buffer_load_sshort
448; GCN-NOHSA: buffer_load_sshort
449
450; GCN-HSA: flat_load_sshort
451; GCN-HSA: flat_load_sshort
452; GCN-HSA: flat_load_sshort
453; GCN-HSA: flat_load_sshort
454; GCN-HSA: flat_load_sshort
455; GCN-HSA: flat_load_sshort
456; GCN-HSA: flat_load_sshort
457; GCN-HSA: flat_load_sshort
458; GCN-HSA: flat_load_sshort
459; GCN-HSA: flat_load_sshort
460; GCN-HSA: flat_load_sshort
461; GCN-HSA: flat_load_sshort
462; GCN-HSA: flat_load_sshort
463; GCN-HSA: flat_load_sshort
464; GCN-HSA: flat_load_sshort
465; GCN-HSA: flat_load_sshort
466; GCN-HSA: flat_load_sshort
467; GCN-HSA: flat_load_sshort
468; GCN-HSA: flat_load_sshort
469; GCN-HSA: flat_load_sshort
470; GCN-HSA: flat_load_sshort
471; GCN-HSA: flat_load_sshort
472; GCN-HSA: flat_load_sshort
473; GCN-HSA: flat_load_sshort
474; GCN-HSA: flat_load_sshort
475; GCN-HSA: flat_load_sshort
476; GCN-HSA: flat_load_sshort
477; GCN-HSA: flat_load_sshort
478; GCN-HSA: flat_load_sshort
479; GCN-HSA: flat_load_sshort
480; GCN-HSA: flat_load_sshort
481; GCN-HSA: flat_load_sshort
482define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
483 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
484 %ext = sext <32 x i16> %load to <32 x i32>
485 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
486 ret void
487}
488
489; FUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i32:
490; GCN-NOHSA: buffer_load_ushort
491; GCN-NOHSA: buffer_load_ushort
492; GCN-NOHSA: buffer_load_ushort
493; GCN-NOHSA: buffer_load_ushort
494; GCN-NOHSA: buffer_load_ushort
495; GCN-NOHSA: buffer_load_ushort
496; GCN-NOHSA: buffer_load_ushort
497; GCN-NOHSA: buffer_load_ushort
498; GCN-NOHSA: buffer_load_ushort
499; GCN-NOHSA: buffer_load_ushort
500; GCN-NOHSA: buffer_load_ushort
501; GCN-NOHSA: buffer_load_ushort
502; GCN-NOHSA: buffer_load_ushort
503; GCN-NOHSA: buffer_load_ushort
504; GCN-NOHSA: buffer_load_ushort
505; GCN-NOHSA: buffer_load_ushort
506; GCN-NOHSA: buffer_load_ushort
507; GCN-NOHSA: buffer_load_ushort
508; GCN-NOHSA: buffer_load_ushort
509; GCN-NOHSA: buffer_load_ushort
510; GCN-NOHSA: buffer_load_ushort
511; GCN-NOHSA: buffer_load_ushort
512; GCN-NOHSA: buffer_load_ushort
513; GCN-NOHSA: buffer_load_ushort
514; GCN-NOHSA: buffer_load_ushort
515; GCN-NOHSA: buffer_load_ushort
516; GCN-NOHSA: buffer_load_ushort
517; GCN-NOHSA: buffer_load_ushort
518; GCN-NOHSA: buffer_load_ushort
519; GCN-NOHSA: buffer_load_ushort
520; GCN-NOHSA: buffer_load_ushort
521; GCN-NOHSA: buffer_load_ushort
522; GCN-NOHSA: buffer_load_ushort
523; GCN-NOHSA: buffer_load_ushort
524; GCN-NOHSA: buffer_load_ushort
525; GCN-NOHSA: buffer_load_ushort
526; GCN-NOHSA: buffer_load_ushort
527; GCN-NOHSA: buffer_load_ushort
528; GCN-NOHSA: buffer_load_ushort
529; GCN-NOHSA: buffer_load_ushort
530; GCN-NOHSA: buffer_load_ushort
531; GCN-NOHSA: buffer_load_ushort
532; GCN-NOHSA: buffer_load_ushort
533; GCN-NOHSA: buffer_load_ushort
534; GCN-NOHSA: buffer_load_ushort
535; GCN-NOHSA: buffer_load_ushort
536; GCN-NOHSA: buffer_load_ushort
537; GCN-NOHSA: buffer_load_ushort
538; GCN-NOHSA: buffer_load_ushort
539; GCN-NOHSA: buffer_load_ushort
540; GCN-NOHSA: buffer_load_ushort
541; GCN-NOHSA: buffer_load_ushort
542; GCN-NOHSA: buffer_load_ushort
543; GCN-NOHSA: buffer_load_ushort
544; GCN-NOHSA: buffer_load_ushort
545; GCN-NOHSA: buffer_load_ushort
546; GCN-NOHSA: buffer_load_ushort
547; GCN-NOHSA: buffer_load_ushort
548; GCN-NOHSA: buffer_load_ushort
549; GCN-NOHSA: buffer_load_ushort
550; GCN-NOHSA: buffer_load_ushort
551; GCN-NOHSA: buffer_load_ushort
552; GCN-NOHSA: buffer_load_ushort
553; GCN-NOHSA: buffer_load_ushort
554
555; GCN-HSA: flat_load_ushort
556; GCN-HSA: flat_load_ushort
557; GCN-HSA: flat_load_ushort
558; GCN-HSA: flat_load_ushort
559; GCN-HSA: flat_load_ushort
560; GCN-HSA: flat_load_ushort
561; GCN-HSA: flat_load_ushort
562; GCN-HSA: flat_load_ushort
563; GCN-HSA: flat_load_ushort
564; GCN-HSA: flat_load_ushort
565; GCN-HSA: flat_load_ushort
566; GCN-HSA: flat_load_ushort
567; GCN-HSA: flat_load_ushort
568; GCN-HSA: flat_load_ushort
569; GCN-HSA: flat_load_ushort
570; GCN-HSA: flat_load_ushort
571; GCN-HSA: flat_load_ushort
572; GCN-HSA: flat_load_ushort
573; GCN-HSA: flat_load_ushort
574; GCN-HSA: flat_load_ushort
575; GCN-HSA: flat_load_ushort
576; GCN-HSA: flat_load_ushort
577; GCN-HSA: flat_load_ushort
578; GCN-HSA: flat_load_ushort
579; GCN-HSA: flat_load_ushort
580; GCN-HSA: flat_load_ushort
581; GCN-HSA: flat_load_ushort
582; GCN-HSA: flat_load_ushort
583; GCN-HSA: flat_load_ushort
584; GCN-HSA: flat_load_ushort
585; GCN-HSA: flat_load_ushort
586; GCN-HSA: flat_load_ushort
587; GCN-HSA: flat_load_ushort
588; GCN-HSA: flat_load_ushort
589; GCN-HSA: flat_load_ushort
590; GCN-HSA: flat_load_ushort
591; GCN-HSA: flat_load_ushort
592; GCN-HSA: flat_load_ushort
593; GCN-HSA: flat_load_ushort
594; GCN-HSA: flat_load_ushort
595; GCN-HSA: flat_load_ushort
596; GCN-HSA: flat_load_ushort
597; GCN-HSA: flat_load_ushort
598; GCN-HSA: flat_load_ushort
599; GCN-HSA: flat_load_ushort
600; GCN-HSA: flat_load_ushort
601; GCN-HSA: flat_load_ushort
602; GCN-HSA: flat_load_ushort
603; GCN-HSA: flat_load_ushort
604; GCN-HSA: flat_load_ushort
605; GCN-HSA: flat_load_ushort
606; GCN-HSA: flat_load_ushort
607; GCN-HSA: flat_load_ushort
608; GCN-HSA: flat_load_ushort
609; GCN-HSA: flat_load_ushort
610; GCN-HSA: flat_load_ushort
611; GCN-HSA: flat_load_ushort
612; GCN-HSA: flat_load_ushort
613; GCN-HSA: flat_load_ushort
614; GCN-HSA: flat_load_ushort
615; GCN-HSA: flat_load_ushort
616; GCN-HSA: flat_load_ushort
617; GCN-HSA: flat_load_ushort
618; GCN-HSA: flat_load_ushort
619define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
620 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
621 %ext = zext <64 x i16> %load to <64 x i32>
622 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
623 ret void
624}
625
626; FUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i32:
627define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
628 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
629 %ext = sext <64 x i16> %load to <64 x i32>
630 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
631 ret void
632}
633
634; FUNC-LABEL: {{^}}global_zextload_i16_to_i64:
635; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
636; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
637; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
638
639; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
640; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
641define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
642 %a = load i16, i16 addrspace(1)* %in
643 %ext = zext i16 %a to i64
644 store i64 %ext, i64 addrspace(1)* %out
645 ret void
646}
647
648; FUNC-LABEL: {{^}}global_sextload_i16_to_i64:
649; GCN-NOHSA-DAG: buffer_load_sshort v[[LO:[0-9]+]],
650; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
651; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
652
653; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
654; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
655define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
656 %a = load i16, i16 addrspace(1)* %in
657 %ext = sext i16 %a to i64
658 store i64 %ext, i64 addrspace(1)* %out
659 ret void
660}
661
662; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i64:
663define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
664 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
665 %ext = zext <1 x i16> %load to <1 x i64>
666 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
667 ret void
668}
669
670; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i64:
671define void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
672 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
673 %ext = sext <1 x i16> %load to <1 x i64>
674 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
675 ret void
676}
677
678; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i64:
679define void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
680 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
681 %ext = zext <2 x i16> %load to <2 x i64>
682 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
683 ret void
684}
685
686; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i64:
687define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
688 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
689 %ext = sext <2 x i16> %load to <2 x i64>
690 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
691 ret void
692}
693
694; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i64:
695define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
696 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
697 %ext = zext <4 x i16> %load to <4 x i64>
698 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
699 ret void
700}
701
702; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i64:
703define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
704 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
705 %ext = sext <4 x i16> %load to <4 x i64>
706 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
707 ret void
708}
709
710; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i64:
711define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
712 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
713 %ext = zext <8 x i16> %load to <8 x i64>
714 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
715 ret void
716}
717
718; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i64:
719define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
720 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
721 %ext = sext <8 x i16> %load to <8 x i64>
722 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
723 ret void
724}
725
726; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i64:
727define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
728 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
729 %ext = zext <16 x i16> %load to <16 x i64>
730 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
731 ret void
732}
733
734; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i64:
735define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
736 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
737 %ext = sext <16 x i16> %load to <16 x i64>
738 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
739 ret void
740}
741
742; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i64:
743define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
744 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
745 %ext = zext <32 x i16> %load to <32 x i64>
746 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
747 ret void
748}
749
750; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i64:
751define void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
752 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
753 %ext = sext <32 x i16> %load to <32 x i64>
754 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
755 ret void
756}
757
758; ; XFUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i64:
759; define void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
760; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
761; %ext = zext <64 x i16> %load to <64 x i64>
762; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
763; ret void
764; }
765
766; ; XFUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i64:
767; define void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
768; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
769; %ext = sext <64 x i16> %load to <64 x i64>
770; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
771; ret void
772; }
773
774attributes #0 = { nounwind }