blob: 72fde04ba39100bdb890b0adc91b2571e1bb632f [file] [log] [blame]
Tom Stellard115a6152016-11-10 16:02:37 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00003; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s
Simon Pilgrimc910a702017-05-23 21:27:15 +00004; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00005
6; FUNC-LABEL: {{^}}constant_load_i16:
7; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
8; GCN-HSA: flat_load_ushort
9
Jan Vesely38814fa2016-08-27 19:09:43 +000010; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000011define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(2)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000012entry:
13 %ld = load i16, i16 addrspace(2)* %in
14 store i16 %ld, i16 addrspace(1)* %out
15 ret void
16}
17
18; FUNC-LABEL: {{^}}constant_load_v2i16:
19; GCN: s_load_dword s
20
Jan Vesely38814fa2016-08-27 19:09:43 +000021; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000022define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000023entry:
24 %ld = load <2 x i16>, <2 x i16> addrspace(2)* %in
25 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
26 ret void
27}
28
29; FUNC-LABEL: {{^}}constant_load_v3i16:
30; GCN: s_load_dwordx2 s
31
Jan Vesely38814fa2016-08-27 19:09:43 +000032; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
33; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000034define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000035entry:
36 %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
37 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
38 ret void
39}
40
41; FUNC-LABEL: {{^}}constant_load_v4i16:
42; GCN: s_load_dwordx2
43
Jan Vesely38814fa2016-08-27 19:09:43 +000044; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000045define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000046entry:
47 %ld = load <4 x i16>, <4 x i16> addrspace(2)* %in
48 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
49 ret void
50}
51
52; FUNC-LABEL: {{^}}constant_load_v8i16:
53; GCN: s_load_dwordx4
54
Jan Vesely38814fa2016-08-27 19:09:43 +000055; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000056define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000057entry:
58 %ld = load <8 x i16>, <8 x i16> addrspace(2)* %in
59 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
60 ret void
61}
62
63; FUNC-LABEL: {{^}}constant_load_v16i16:
64; GCN: s_load_dwordx8
65
Jan Vesely38814fa2016-08-27 19:09:43 +000066; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
67; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000068define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000069entry:
70 %ld = load <16 x i16>, <16 x i16> addrspace(2)* %in
71 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
72 ret void
73}
74
75; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32:
76; GCN-NOHSA: buffer_load_ushort
77; GCN-NOHSA: buffer_store_dword
78
79; GCN-HSA: flat_load_ushort
80; GCN-HSA: flat_store_dword
81
Jan Vesely38814fa2016-08-27 19:09:43 +000082; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000083define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000084 %a = load i16, i16 addrspace(2)* %in
85 %ext = zext i16 %a to i32
86 store i32 %ext, i32 addrspace(1)* %out
87 ret void
88}
89
90; FUNC-LABEL: {{^}}constant_sextload_i16_to_i32:
91; GCN-NOHSA: buffer_load_sshort
92; GCN-NOHSA: buffer_store_dword
93
94; GCN-HSA: flat_load_sshort
95; GCN-HSA: flat_store_dword
96
Jan Vesely38814fa2016-08-27 19:09:43 +000097; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000098; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
99; EG: 16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000100define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000101 %a = load i16, i16 addrspace(2)* %in
102 %ext = sext i16 %a to i32
103 store i32 %ext, i32 addrspace(1)* %out
104 ret void
105}
106
107; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i32:
108; GCN-NOHSA: buffer_load_ushort
109; GCN-HSA: flat_load_ushort
Jan Vesely38814fa2016-08-27 19:09:43 +0000110
111; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000112define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000113 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
114 %ext = zext <1 x i16> %load to <1 x i32>
115 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
116 ret void
117}
118
119; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i32:
120; GCN-NOHSA: buffer_load_sshort
121; GCN-HSA: flat_load_sshort
Jan Vesely38814fa2016-08-27 19:09:43 +0000122
123; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
124; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
125; EG: 16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000126define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000127 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
128 %ext = sext <1 x i16> %load to <1 x i32>
129 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
130 ret void
131}
132
133; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000134; GCN: s_load_dword s
135; GCN-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xffff{{$}}
136; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
Jan Vesely38814fa2016-08-27 19:09:43 +0000137
138; v2i16 is naturally 4 byte aligned
139; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
Jan Vesely06200bd2017-01-06 21:00:46 +0000140; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal
141; EG: 16
Jan Vesely38814fa2016-08-27 19:09:43 +0000142; EG: 16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000143define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000144 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
145 %ext = zext <2 x i16> %load to <2 x i32>
146 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
147 ret void
148}
149
150; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000151; GCN: s_load_dword s
152; GCN-DAG: s_ashr_i32
153; GCN-DAG: s_sext_i32_i16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000154
Jan Vesely38814fa2016-08-27 19:09:43 +0000155; v2i16 is naturally 4 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000156; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XY, {{T[0-9].[XYZW]}},
Jan Vesely38814fa2016-08-27 19:09:43 +0000157; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
Jan Vesely06200bd2017-01-06 21:00:46 +0000158; EG-DAG: BFE_INT {{[* ]*}}[[ST]].X, [[DST]], 0.0, literal
159; TODO: We should use ASHR instead of LSHR + BFE
160; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV\.[XYZW]}}, 0.0, literal
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000161; EG-DAG: 16
162; EG-DAG: 16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000163define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000164 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
165 %ext = sext <2 x i16> %load to <2 x i32>
166 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
167 ret void
168}
169
Jan Vesely06200bd2017-01-06 21:00:46 +0000170; FUNC-LABEL: {{^}}constant_zextload_v3i16_to_v3i32:
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000171; GCN: s_load_dwordx2
Jan Vesely38814fa2016-08-27 19:09:43 +0000172
173; v3i16 is naturally 8 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000174; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}},
175; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}},
176; EG: CF_END
177; EG-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1
178; EG-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1
Jan Vesely38814fa2016-08-27 19:09:43 +0000179; TODO: This should use DST, but for some there are redundant MOVs
Jan Vesely06200bd2017-01-06 21:00:46 +0000180; EG-DAG: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal
181; EG-DAG: 16
182; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal
183; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, literal
184; EG-DAG: 65535
185; EG-DAG: 65535
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000186define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000187entry:
188 %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
189 %ext = zext <3 x i16> %ld to <3 x i32>
190 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
191 ret void
192}
193
Jan Vesely06200bd2017-01-06 21:00:46 +0000194; FUNC-LABEL: {{^}}constant_sextload_v3i16_to_v3i32:
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000195; GCN: s_load_dwordx2
Jan Vesely38814fa2016-08-27 19:09:43 +0000196
Jan Vesely06200bd2017-01-06 21:00:46 +0000197; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}},
198; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}},
Jan Vesely38814fa2016-08-27 19:09:43 +0000199; v3i16 is naturally 8 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000200; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[PTR:T[0-9]\.[XYZW]]], 0, #1
201; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1
202; EG-DAG: ASHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal
203; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal
204; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000205; EG-DAG: 16
206; EG-DAG: 16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000207define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000208entry:
209 %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in
210 %ext = sext <3 x i16> %ld to <3 x i32>
211 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
212 ret void
213}
214
Jan Vesely06200bd2017-01-06 21:00:46 +0000215; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000216; GCN: s_load_dwordx2
217; GCN-DAG: s_and_b32
218; GCN-DAG: s_lshr_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000219
Jan Vesely38814fa2016-08-27 19:09:43 +0000220; v4i16 is naturally 8 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000221; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}
222; EG: VTX_READ_64 [[LD:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1
223; TODO: This should use LD, but for some there are redundant MOVs
224; EG-DAG: BFE_UINT {{[* ]*}}[[ST]].Y, {{.*\.[XYZW]}}, literal
225; EG-DAG: BFE_UINT {{[* ]*}}[[ST]].W, {{.*\.[XYZW]}}, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000226; EG-DAG: 16
227; EG-DAG: 16
Jan Vesely06200bd2017-01-06 21:00:46 +0000228; EG-DAG: AND_INT {{[* ]*}}[[ST]].X, {{T[0-9]\.[XYZW]}}, literal
229; EG-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{T[0-9]\.[XYZW]}}, literal
230; EG-DAG: 65535
231; EG-DAG: 65535
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000232define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000233 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
234 %ext = zext <4 x i16> %load to <4 x i32>
235 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
236 ret void
237}
238
239; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000240; GCN: s_load_dwordx2
241; GCN-DAG: s_ashr_i32
242; GCN-DAG: s_sext_i32_i16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000243
Jan Vesely38814fa2016-08-27 19:09:43 +0000244; v4i16 is naturally 8 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000245; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}},
246; EG: VTX_READ_64 [[DST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1
247; TODO: This should use LD, but for some there are redundant MOVs
248; EG-DAG: BFE_INT {{[* ]*}}[[ST]].X, {{.*}}, 0.0, literal
249; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Z, {{.*}}, 0.0, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000250; TODO: We should use ASHR instead of LSHR + BFE
Jan Vesely06200bd2017-01-06 21:00:46 +0000251; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{.*}}, 0.0, literal
252; EG-DAG: BFE_INT {{[* ]*}}[[ST]].W, {{.*}}, 0.0, literal
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000253; EG-DAG: 16
254; EG-DAG: 16
255; EG-DAG: 16
256; EG-DAG: 16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000257define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000258 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
259 %ext = sext <4 x i16> %load to <4 x i32>
260 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
261 ret void
262}
263
264; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000265; GCN: s_load_dwordx4
266; GCN-DAG: s_and_b32
267; GCN-DAG: s_lshr_b32
Jan Vesely38814fa2016-08-27 19:09:43 +0000268
269; v8i16 is naturally 16 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000270; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}},
271; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}},
272; EG: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1
273; TODO: These should use LSHR instead of BFE_UINT
274; TODO: This should use DST, but for some there are redundant MOVs
275; EG-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].Y, {{.*}}, literal
276; EG-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].W, {{.*}}, literal
277; EG-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].Y, {{.*}}, literal
278; EG-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].W, {{.*}}, literal
279; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, literal
280; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, literal
281; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, literal
282; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000283; EG-DAG: 16
284; EG-DAG: 16
285; EG-DAG: 16
286; EG-DAG: 16
Jan Vesely06200bd2017-01-06 21:00:46 +0000287; EG-DAG: 65535
288; EG-DAG: 65535
289; EG-DAG: 65535
290; EG-DAG: 65535
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000291define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000292 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
293 %ext = zext <8 x i16> %load to <8 x i32>
294 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
295 ret void
296}
297
298; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000299; GCN: s_load_dwordx4
300; GCN-DAG: s_ashr_i32
301; GCN-DAG: s_sext_i32_i16
Jan Vesely38814fa2016-08-27 19:09:43 +0000302
303; v8i16 is naturally 16 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000304; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}},
305; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}},
306; EG: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1
307; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT
308; TODO: This should use DST, but for some there are redundant MOVs
309; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Y, {{.*}}, 0.0, literal
310; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].W, {{.*}}, 0.0, literal
311; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Y, {{.*}}, 0.0, literal
312; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].W, {{.*}}, 0.0, literal
313; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, 0.0, literal
314; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, 0.0, literal
315; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, 0.0, literal
316; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, 0.0, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000317; EG-DAG: 16
318; EG-DAG: 16
319; EG-DAG: 16
320; EG-DAG: 16
321; EG-DAG: 16
322; EG-DAG: 16
323; EG-DAG: 16
324; EG-DAG: 16
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000325define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000326 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
327 %ext = sext <8 x i16> %load to <8 x i32>
328 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
329 ret void
330}
331
332; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000333; GCN: s_load_dwordx8
334; GCN-DAG: s_and_b32
335; GCN-DAG: s_lshr_b32
Jan Vesely38814fa2016-08-27 19:09:43 +0000336
337; v16i16 is naturally 32 byte aligned
338; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 0, #1
339; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 16, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000340define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000341 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
342 %ext = zext <16 x i16> %load to <16 x i32>
343 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
344 ret void
345}
346
347; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000348; GCN: s_load_dwordx8
349; GCN-DAG: s_ashr_i32
350; GCN-DAG: s_sext_i32_i16
Jan Vesely38814fa2016-08-27 19:09:43 +0000351
352; v16i16 is naturally 32 byte aligned
353; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 0, #1
354; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 16, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000355define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000356 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
357 %ext = sext <16 x i16> %load to <16 x i32>
358 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
359 ret void
360}
361
362; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000363; GCN-DAG: s_load_dwordx16
364; GCN-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000365; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000366; GCN-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000367
368; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1
369; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
370; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
371; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000372define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000373 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
374 %ext = zext <32 x i16> %load to <32 x i32>
375 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
376 ret void
377}
378
379; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000380; GCN: s_load_dwordx16
381; GCN-DAG: s_ashr_i32
382; GCN-DAG: s_sext_i32_i16
Jan Vesely38814fa2016-08-27 19:09:43 +0000383
384; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1
385; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
386; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
387; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000388define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000389 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
390 %ext = sext <32 x i16> %load to <32 x i32>
391 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
392 ret void
393}
394
395; FUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000396; GCN: s_load_dwordx16
397; GCN: s_load_dwordx16
Jan Vesely38814fa2016-08-27 19:09:43 +0000398
399; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1
400; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
401; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
402; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
403; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 64, #1
404; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
405; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
406; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000407define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000408 %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
409 %ext = zext <64 x i16> %load to <64 x i32>
410 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
411 ret void
412}
413
414; FUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000415
416; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1
417; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
418; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
419; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
420; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 64, #1
421; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
422; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
423; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000424define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000425 %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
426 %ext = sext <64 x i16> %load to <64 x i32>
427 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
428 ret void
429}
430
431; FUNC-LABEL: {{^}}constant_zextload_i16_to_i64:
432; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
433; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
434; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
435
436; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
437; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000438
439; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
440; EG: MOV {{.*}}, 0.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000441define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000442 %a = load i16, i16 addrspace(2)* %in
443 %ext = zext i16 %a to i64
444 store i64 %ext, i64 addrspace(1)* %out
445 ret void
446}
447
448; FUNC-LABEL: {{^}}constant_sextload_i16_to_i64:
Tom Stellard115a6152016-11-10 16:02:37 +0000449; FIXME: Need to optimize this sequence to avoid extra bfe:
450; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
451; t31: i64 = any_extend t28
452; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
453
454; GCN-NOHSA-SI-DAG: buffer_load_sshort v[[LO:[0-9]+]],
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000455; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
Tom Stellard115a6152016-11-10 16:02:37 +0000456; GCN-NOHSA-VI-DAG: buffer_load_ushort v[[ULO:[0-9]+]],
457; GCN-NOHSA-VI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000458; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
459
460; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
461; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000462
463; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
464; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
Jan Vesely06200bd2017-01-06 21:00:46 +0000465; TODO: These could be expanded earlier using ASHR 15
Jan Vesely38814fa2016-08-27 19:09:43 +0000466; EG: 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000467define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000468 %a = load i16, i16 addrspace(2)* %in
469 %ext = sext i16 %a to i64
470 store i64 %ext, i64 addrspace(1)* %out
471 ret void
472}
473
474; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000475
476; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
477; EG: MOV {{.*}}, 0.0
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000478define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000479 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
480 %ext = zext <1 x i16> %load to <1 x i64>
481 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
482 ret void
483}
484
485; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000486
487; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
488; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
Jan Vesely06200bd2017-01-06 21:00:46 +0000489; TODO: These could be expanded earlier using ASHR 15
Jan Vesely38814fa2016-08-27 19:09:43 +0000490; EG: 31
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000491define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000492 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in
493 %ext = sext <1 x i16> %load to <1 x i64>
494 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
495 ret void
496}
497
498; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000499
500; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000501define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000502 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
503 %ext = zext <2 x i16> %load to <2 x i64>
504 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
505 ret void
506}
507
508; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000509
510; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000511define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000512 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in
513 %ext = sext <2 x i16> %load to <2 x i64>
514 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
515 ret void
516}
517
518; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000519
520; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000521define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000522 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
523 %ext = zext <4 x i16> %load to <4 x i64>
524 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
525 ret void
526}
527
528; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000529
530; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000531define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000532 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in
533 %ext = sext <4 x i16> %load to <4 x i64>
534 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
535 ret void
536}
537
538; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000539
540; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000541define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000542 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
543 %ext = zext <8 x i16> %load to <8 x i64>
544 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
545 ret void
546}
547
548; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000549
550; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000551define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000552 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in
553 %ext = sext <8 x i16> %load to <8 x i64>
554 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
555 ret void
556}
557
558; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000559
560; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
561; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000562define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000563 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
564 %ext = zext <16 x i16> %load to <16 x i64>
565 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
566 ret void
567}
568
569; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000570
571; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
572; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000573define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000574 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in
575 %ext = sext <16 x i16> %load to <16 x i64>
576 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
577 ret void
578}
579
580; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000581
582; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
583; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
584; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
585; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000586define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000587 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
588 %ext = zext <32 x i16> %load to <32 x i64>
589 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
590 ret void
591}
592
593; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000594
595; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
596; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
597; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
598; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000599define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000600 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in
601 %ext = sext <32 x i16> %load to <32 x i64>
602 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
603 ret void
604}
605
Jan Vesely38814fa2016-08-27 19:09:43 +0000606; These trigger undefined register machine verifier errors
607
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000608; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000609; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000610; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
611; %ext = zext <64 x i16> %load to <64 x i64>
612; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
613; ret void
614; }
615
616; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64:
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000617; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000618; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in
619; %ext = sext <64 x i16> %load to <64 x i64>
620; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
621; ret void
622; }
623
624attributes #0 = { nounwind }