blob: 31bb2067d767a859634603c0051518bb34c0a655 [file] [log] [blame]
Tom Stellard115a6152016-11-10 16:02:37 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
Matt Arsenault7aad8fd2017-01-24 22:02:15 +00003; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s
Simon Pilgrimc910a702017-05-23 21:27:15 +00004; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00005
6; FUNC-LABEL: {{^}}constant_load_i16:
7; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
8; GCN-HSA: flat_load_ushort
9
Jan Vesely38814fa2016-08-27 19:09:43 +000010; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +000011define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000012entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000013 %ld = load i16, i16 addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000014 store i16 %ld, i16 addrspace(1)* %out
15 ret void
16}
17
18; FUNC-LABEL: {{^}}constant_load_v2i16:
19; GCN: s_load_dword s
20
Jan Vesely38814fa2016-08-27 19:09:43 +000021; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +000022define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000023entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000024 %ld = load <2 x i16>, <2 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000025 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
26 ret void
27}
28
29; FUNC-LABEL: {{^}}constant_load_v3i16:
30; GCN: s_load_dwordx2 s
31
Jan Vesely38814fa2016-08-27 19:09:43 +000032; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
33; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1
Yaxun Liu0124b542018-02-13 18:00:25 +000034define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000035entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000036 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000037 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
38 ret void
39}
40
41; FUNC-LABEL: {{^}}constant_load_v4i16:
42; GCN: s_load_dwordx2
43
Jan Vesely38814fa2016-08-27 19:09:43 +000044; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +000045define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000046entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000047 %ld = load <4 x i16>, <4 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000048 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
49 ret void
50}
51
52; FUNC-LABEL: {{^}}constant_load_v8i16:
53; GCN: s_load_dwordx4
54
Jan Vesely38814fa2016-08-27 19:09:43 +000055; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +000056define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000057entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000058 %ld = load <8 x i16>, <8 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000059 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
60 ret void
61}
62
63; FUNC-LABEL: {{^}}constant_load_v16i16:
64; GCN: s_load_dwordx8
65
Jan Vesely38814fa2016-08-27 19:09:43 +000066; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
67; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Yaxun Liu0124b542018-02-13 18:00:25 +000068define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000069entry:
Yaxun Liu0124b542018-02-13 18:00:25 +000070 %ld = load <16 x i16>, <16 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000071 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
72 ret void
73}
74
Matt Arsenault6c041a32018-03-29 19:59:28 +000075; FUNC-LABEL: {{^}}constant_load_v16i16_align2:
76; GCN-HSA: flat_load_dwordx4
77; GCN-HSA: flat_load_dwordx4
78; GCN-HSA: flat_store_dwordx4
79; GCN-HSA: flat_store_dwordx4
80define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 {
81entry:
82 %ld = load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2
83 store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32
84 ret void
85}
86
Matt Arsenaultd1097a32016-06-02 19:54:26 +000087; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32:
88; GCN-NOHSA: buffer_load_ushort
89; GCN-NOHSA: buffer_store_dword
90
91; GCN-HSA: flat_load_ushort
92; GCN-HSA: flat_store_dword
93
Jan Vesely38814fa2016-08-27 19:09:43 +000094; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +000095define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
96 %a = load i16, i16 addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +000097 %ext = zext i16 %a to i32
98 store i32 %ext, i32 addrspace(1)* %out
99 ret void
100}
101
102; FUNC-LABEL: {{^}}constant_sextload_i16_to_i32:
103; GCN-NOHSA: buffer_load_sshort
104; GCN-NOHSA: buffer_store_dword
105
106; GCN-HSA: flat_load_sshort
107; GCN-HSA: flat_store_dword
108
Jan Vesely38814fa2016-08-27 19:09:43 +0000109; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000110; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
111; EG: 16
Yaxun Liu0124b542018-02-13 18:00:25 +0000112define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
113 %a = load i16, i16 addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000114 %ext = sext i16 %a to i32
115 store i32 %ext, i32 addrspace(1)* %out
116 ret void
117}
118
119; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i32:
120; GCN-NOHSA: buffer_load_ushort
121; GCN-HSA: flat_load_ushort
Jan Vesely38814fa2016-08-27 19:09:43 +0000122
123; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000124define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
125 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000126 %ext = zext <1 x i16> %load to <1 x i32>
127 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
128 ret void
129}
130
131; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i32:
132; GCN-NOHSA: buffer_load_sshort
133; GCN-HSA: flat_load_sshort
Jan Vesely38814fa2016-08-27 19:09:43 +0000134
135; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
136; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
137; EG: 16
Yaxun Liu0124b542018-02-13 18:00:25 +0000138define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
139 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000140 %ext = sext <1 x i16> %load to <1 x i32>
141 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
142 ret void
143}
144
145; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000146; GCN: s_load_dword s
147; GCN-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xffff{{$}}
148; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
Jan Vesely38814fa2016-08-27 19:09:43 +0000149
150; v2i16 is naturally 4 byte aligned
151; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
Jan Vesely06200bd2017-01-06 21:00:46 +0000152; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal
153; EG: 16
Jan Vesely38814fa2016-08-27 19:09:43 +0000154; EG: 16
Yaxun Liu0124b542018-02-13 18:00:25 +0000155define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
156 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000157 %ext = zext <2 x i16> %load to <2 x i32>
158 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
159 ret void
160}
161
162; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000163; GCN: s_load_dword s
164; GCN-DAG: s_ashr_i32
165; GCN-DAG: s_sext_i32_i16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000166
Jan Vesely38814fa2016-08-27 19:09:43 +0000167; v2i16 is naturally 4 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000168; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XY, {{T[0-9].[XYZW]}},
Jan Vesely38814fa2016-08-27 19:09:43 +0000169; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
Jan Vesely06200bd2017-01-06 21:00:46 +0000170; EG-DAG: BFE_INT {{[* ]*}}[[ST]].X, [[DST]], 0.0, literal
171; TODO: We should use ASHR instead of LSHR + BFE
172; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV\.[XYZW]}}, 0.0, literal
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000173; EG-DAG: 16
174; EG-DAG: 16
Yaxun Liu0124b542018-02-13 18:00:25 +0000175define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
176 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000177 %ext = sext <2 x i16> %load to <2 x i32>
178 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
179 ret void
180}
181
Jan Vesely06200bd2017-01-06 21:00:46 +0000182; FUNC-LABEL: {{^}}constant_zextload_v3i16_to_v3i32:
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000183; GCN: s_load_dwordx2
Jan Vesely38814fa2016-08-27 19:09:43 +0000184
185; v3i16 is naturally 8 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000186; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}},
187; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}},
188; EG: CF_END
189; EG-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1
190; EG-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1
Jan Vesely38814fa2016-08-27 19:09:43 +0000191; TODO: This should use DST, but for some there are redundant MOVs
Jan Vesely06200bd2017-01-06 21:00:46 +0000192; EG-DAG: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal
193; EG-DAG: 16
194; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal
195; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, literal
196; EG-DAG: 65535
197; EG-DAG: 65535
Yaxun Liu0124b542018-02-13 18:00:25 +0000198define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000199entry:
Yaxun Liu0124b542018-02-13 18:00:25 +0000200 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000201 %ext = zext <3 x i16> %ld to <3 x i32>
202 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
203 ret void
204}
205
Jan Vesely06200bd2017-01-06 21:00:46 +0000206; FUNC-LABEL: {{^}}constant_sextload_v3i16_to_v3i32:
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000207; GCN: s_load_dwordx2
Jan Vesely38814fa2016-08-27 19:09:43 +0000208
Jan Vesely06200bd2017-01-06 21:00:46 +0000209; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}},
210; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}},
Jan Vesely38814fa2016-08-27 19:09:43 +0000211; v3i16 is naturally 8 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000212; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[PTR:T[0-9]\.[XYZW]]], 0, #1
213; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1
214; EG-DAG: ASHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal
215; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal
216; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000217; EG-DAG: 16
218; EG-DAG: 16
Yaxun Liu0124b542018-02-13 18:00:25 +0000219define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000220entry:
Yaxun Liu0124b542018-02-13 18:00:25 +0000221 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000222 %ext = sext <3 x i16> %ld to <3 x i32>
223 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
224 ret void
225}
226
Jan Vesely06200bd2017-01-06 21:00:46 +0000227; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000228; GCN: s_load_dwordx2
229; GCN-DAG: s_and_b32
230; GCN-DAG: s_lshr_b32
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000231
Jan Vesely38814fa2016-08-27 19:09:43 +0000232; v4i16 is naturally 8 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000233; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}
234; EG: VTX_READ_64 [[LD:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1
235; TODO: This should use LD, but for some there are redundant MOVs
236; EG-DAG: BFE_UINT {{[* ]*}}[[ST]].Y, {{.*\.[XYZW]}}, literal
237; EG-DAG: BFE_UINT {{[* ]*}}[[ST]].W, {{.*\.[XYZW]}}, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000238; EG-DAG: 16
239; EG-DAG: 16
Jan Vesely06200bd2017-01-06 21:00:46 +0000240; EG-DAG: AND_INT {{[* ]*}}[[ST]].X, {{T[0-9]\.[XYZW]}}, literal
241; EG-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{T[0-9]\.[XYZW]}}, literal
242; EG-DAG: 65535
243; EG-DAG: 65535
Yaxun Liu0124b542018-02-13 18:00:25 +0000244define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
245 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000246 %ext = zext <4 x i16> %load to <4 x i32>
247 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
248 ret void
249}
250
251; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000252; GCN: s_load_dwordx2
253; GCN-DAG: s_ashr_i32
254; GCN-DAG: s_sext_i32_i16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000255
Jan Vesely38814fa2016-08-27 19:09:43 +0000256; v4i16 is naturally 8 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000257; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}},
258; EG: VTX_READ_64 [[DST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1
259; TODO: This should use LD, but for some there are redundant MOVs
260; EG-DAG: BFE_INT {{[* ]*}}[[ST]].X, {{.*}}, 0.0, literal
261; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Z, {{.*}}, 0.0, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000262; TODO: We should use ASHR instead of LSHR + BFE
Jan Vesely06200bd2017-01-06 21:00:46 +0000263; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{.*}}, 0.0, literal
264; EG-DAG: BFE_INT {{[* ]*}}[[ST]].W, {{.*}}, 0.0, literal
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000265; EG-DAG: 16
266; EG-DAG: 16
267; EG-DAG: 16
268; EG-DAG: 16
Yaxun Liu0124b542018-02-13 18:00:25 +0000269define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
270 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000271 %ext = sext <4 x i16> %load to <4 x i32>
272 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
273 ret void
274}
275
276; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000277; GCN: s_load_dwordx4
278; GCN-DAG: s_and_b32
279; GCN-DAG: s_lshr_b32
Jan Vesely38814fa2016-08-27 19:09:43 +0000280
281; v8i16 is naturally 16 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000282; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}},
283; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}},
284; EG: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1
285; TODO: These should use LSHR instead of BFE_UINT
286; TODO: This should use DST, but for some there are redundant MOVs
287; EG-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].Y, {{.*}}, literal
288; EG-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].W, {{.*}}, literal
289; EG-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].Y, {{.*}}, literal
290; EG-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].W, {{.*}}, literal
291; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, literal
292; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, literal
293; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, literal
294; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000295; EG-DAG: 16
296; EG-DAG: 16
297; EG-DAG: 16
298; EG-DAG: 16
Jan Vesely06200bd2017-01-06 21:00:46 +0000299; EG-DAG: 65535
300; EG-DAG: 65535
301; EG-DAG: 65535
302; EG-DAG: 65535
Yaxun Liu0124b542018-02-13 18:00:25 +0000303define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
304 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000305 %ext = zext <8 x i16> %load to <8 x i32>
306 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
307 ret void
308}
309
310; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000311; GCN: s_load_dwordx4
312; GCN-DAG: s_ashr_i32
313; GCN-DAG: s_sext_i32_i16
Jan Vesely38814fa2016-08-27 19:09:43 +0000314
315; v8i16 is naturally 16 byte aligned
Jan Vesely06200bd2017-01-06 21:00:46 +0000316; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}},
317; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}},
318; EG: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1
319; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT
320; TODO: This should use DST, but for some there are redundant MOVs
321; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Y, {{.*}}, 0.0, literal
322; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].W, {{.*}}, 0.0, literal
323; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Y, {{.*}}, 0.0, literal
324; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].W, {{.*}}, 0.0, literal
325; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, 0.0, literal
326; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, 0.0, literal
327; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, 0.0, literal
328; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, 0.0, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000329; EG-DAG: 16
330; EG-DAG: 16
331; EG-DAG: 16
332; EG-DAG: 16
333; EG-DAG: 16
334; EG-DAG: 16
335; EG-DAG: 16
336; EG-DAG: 16
Yaxun Liu0124b542018-02-13 18:00:25 +0000337define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
338 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000339 %ext = sext <8 x i16> %load to <8 x i32>
340 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
341 ret void
342}
343
344; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000345; GCN: s_load_dwordx8
346; GCN-DAG: s_and_b32
347; GCN-DAG: s_lshr_b32
Jan Vesely38814fa2016-08-27 19:09:43 +0000348
349; v16i16 is naturally 32 byte aligned
350; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 0, #1
351; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 16, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000352define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
353 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000354 %ext = zext <16 x i16> %load to <16 x i32>
355 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
356 ret void
357}
358
359; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000360; GCN: s_load_dwordx8
361; GCN-DAG: s_ashr_i32
362; GCN-DAG: s_sext_i32_i16
Jan Vesely38814fa2016-08-27 19:09:43 +0000363
364; v16i16 is naturally 32 byte aligned
365; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 0, #1
366; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 16, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000367define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
368 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000369 %ext = sext <16 x i16> %load to <16 x i32>
370 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
371 ret void
372}
373
374; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000375; GCN-DAG: s_load_dwordx16
376; GCN-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000377; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000378; GCN-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]]
Jan Vesely38814fa2016-08-27 19:09:43 +0000379
380; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1
381; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
382; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
383; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000384define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
385 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000386 %ext = zext <32 x i16> %load to <32 x i32>
387 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
388 ret void
389}
390
391; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000392; GCN: s_load_dwordx16
393; GCN-DAG: s_ashr_i32
394; GCN-DAG: s_sext_i32_i16
Jan Vesely38814fa2016-08-27 19:09:43 +0000395
396; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1
397; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
398; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
399; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000400define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
401 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000402 %ext = sext <32 x i16> %load to <32 x i32>
403 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
404 ret void
405}
406
407; FUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000408; GCN: s_load_dwordx16
409; GCN: s_load_dwordx16
Jan Vesely38814fa2016-08-27 19:09:43 +0000410
411; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1
412; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
413; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
414; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
415; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 64, #1
416; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
417; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
418; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000419define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
420 %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000421 %ext = zext <64 x i16> %load to <64 x i32>
422 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
423 ret void
424}
425
426; FUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000427
428; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1
429; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
430; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1
431; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1
432; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 64, #1
433; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1
434; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1
435; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000436define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
437 %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000438 %ext = sext <64 x i16> %load to <64 x i32>
439 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
440 ret void
441}
442
443; FUNC-LABEL: {{^}}constant_zextload_i16_to_i64:
444; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
445; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
446; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
447
448; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
449; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000450
451; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
452; EG: MOV {{.*}}, 0.0
Yaxun Liu0124b542018-02-13 18:00:25 +0000453define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
454 %a = load i16, i16 addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000455 %ext = zext i16 %a to i64
456 store i64 %ext, i64 addrspace(1)* %out
457 ret void
458}
459
460; FUNC-LABEL: {{^}}constant_sextload_i16_to_i64:
Tom Stellard115a6152016-11-10 16:02:37 +0000461; FIXME: Need to optimize this sequence to avoid extra bfe:
462; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
463; t31: i64 = any_extend t28
464; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
465
466; GCN-NOHSA-SI-DAG: buffer_load_sshort v[[LO:[0-9]+]],
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000467; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
Tom Stellard115a6152016-11-10 16:02:37 +0000468; GCN-NOHSA-VI-DAG: buffer_load_ushort v[[ULO:[0-9]+]],
469; GCN-NOHSA-VI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000470; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
471
472; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
473; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000474
475; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
476; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
Jan Vesely06200bd2017-01-06 21:00:46 +0000477; TODO: These could be expanded earlier using ASHR 15
Jan Vesely38814fa2016-08-27 19:09:43 +0000478; EG: 31
Yaxun Liu0124b542018-02-13 18:00:25 +0000479define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 {
480 %a = load i16, i16 addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000481 %ext = sext i16 %a to i64
482 store i64 %ext, i64 addrspace(1)* %out
483 ret void
484}
485
486; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000487
488; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
489; EG: MOV {{.*}}, 0.0
Yaxun Liu0124b542018-02-13 18:00:25 +0000490define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
491 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000492 %ext = zext <1 x i16> %load to <1 x i64>
493 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
494 ret void
495}
496
497; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000498
499; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
500; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
Jan Vesely06200bd2017-01-06 21:00:46 +0000501; TODO: These could be expanded earlier using ASHR 15
Jan Vesely38814fa2016-08-27 19:09:43 +0000502; EG: 31
Yaxun Liu0124b542018-02-13 18:00:25 +0000503define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 {
504 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000505 %ext = sext <1 x i16> %load to <1 x i64>
506 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
507 ret void
508}
509
510; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000511
512; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000513define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
514 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000515 %ext = zext <2 x i16> %load to <2 x i64>
516 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
517 ret void
518}
519
520; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000521
522; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000523define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 {
524 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000525 %ext = sext <2 x i16> %load to <2 x i64>
526 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
527 ret void
528}
529
530; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000531
532; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000533define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
534 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000535 %ext = zext <4 x i16> %load to <4 x i64>
536 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
537 ret void
538}
539
540; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000541
542; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000543define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 {
544 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000545 %ext = sext <4 x i16> %load to <4 x i64>
546 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
547 ret void
548}
549
550; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000551
552; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000553define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
554 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000555 %ext = zext <8 x i16> %load to <8 x i64>
556 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
557 ret void
558}
559
560; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000561
562; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000563define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 {
564 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000565 %ext = sext <8 x i16> %load to <8 x i64>
566 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
567 ret void
568}
569
570; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000571
572; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
573; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000574define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
575 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000576 %ext = zext <16 x i16> %load to <16 x i64>
577 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
578 ret void
579}
580
581; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000582
583; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
584; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000585define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 {
586 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000587 %ext = sext <16 x i16> %load to <16 x i64>
588 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
589 ret void
590}
591
592; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000593
594; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
595; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
596; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
597; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000598define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
599 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000600 %ext = zext <32 x i16> %load to <32 x i64>
601 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
602 ret void
603}
604
605; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000606
607; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
608; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
609; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
610; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
Yaxun Liu0124b542018-02-13 18:00:25 +0000611define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 {
612 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000613 %ext = sext <32 x i16> %load to <32 x i64>
614 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
615 ret void
616}
617
Jan Vesely38814fa2016-08-27 19:09:43 +0000618; These trigger undefined register machine verifier errors
619
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000620; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64:
Yaxun Liu0124b542018-02-13 18:00:25 +0000621; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
622; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000623; %ext = zext <64 x i16> %load to <64 x i64>
624; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
625; ret void
626; }
627
628; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64:
Yaxun Liu0124b542018-02-13 18:00:25 +0000629; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 {
630; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000631; %ext = sext <64 x i16> %load to <64 x i64>
632; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
633; ret void
634; }
635
636attributes #0 = { nounwind }