blob: f398dd32e06de7b44914a090793e3373ecbc79bc [file] [log] [blame]
Tom Stellard115a6152016-11-10 16:02:37 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-HSA,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00004; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6
7; FIXME: r600 is broken because the bigger testcases spill and it's not implemented
8
9; FUNC-LABEL: {{^}}global_load_i16:
10; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
11; GCN-HSA: flat_load_ushort
12
Jan Vesely38814fa2016-08-27 19:09:43 +000013; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000014define void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
15entry:
16 %ld = load i16, i16 addrspace(1)* %in
17 store i16 %ld, i16 addrspace(1)* %out
18 ret void
19}
20
21; FUNC-LABEL: {{^}}global_load_v2i16:
22; GCN-NOHSA: buffer_load_dword v
23; GCN-HSA: flat_load_dword v
24
Jan Vesely38814fa2016-08-27 19:09:43 +000025; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000026define void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
27entry:
28 %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
29 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
30 ret void
31}
32
33; FUNC-LABEL: {{^}}global_load_v3i16:
34; GCN-NOHSA: buffer_load_dwordx2 v
35; GCN-HSA: flat_load_dwordx2 v
36
Jan Vesely38814fa2016-08-27 19:09:43 +000037; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
38; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000039define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
40entry:
41 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
42 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
43 ret void
44}
45
46; FUNC-LABEL: {{^}}global_load_v4i16:
47; GCN-NOHSA: buffer_load_dwordx2
48; GCN-HSA: flat_load_dwordx2
49
Jan Vesely38814fa2016-08-27 19:09:43 +000050; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000051define void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
52entry:
53 %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
54 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
55 ret void
56}
57
58; FUNC-LABEL: {{^}}global_load_v8i16:
59; GCN-NOHSA: buffer_load_dwordx4
60; GCN-HSA: flat_load_dwordx4
61
Jan Vesely38814fa2016-08-27 19:09:43 +000062; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000063define void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) {
64entry:
65 %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in
66 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
67 ret void
68}
69
70; FUNC-LABEL: {{^}}global_load_v16i16:
71; GCN-NOHSA: buffer_load_dwordx4
72; GCN-NOHSA: buffer_load_dwordx4
73
74; GCN-HSA: flat_load_dwordx4
75; GCN-HSA: flat_load_dwordx4
76
Jan Vesely38814fa2016-08-27 19:09:43 +000077; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
78; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000079define void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) {
80entry:
81 %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in
82 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
83 ret void
84}
85
86; FUNC-LABEL: {{^}}global_zextload_i16_to_i32:
87; GCN-NOHSA: buffer_load_ushort
88; GCN-NOHSA: buffer_store_dword
89
90; GCN-HSA: flat_load_ushort
91; GCN-HSA: flat_store_dword
92
Jan Vesely38814fa2016-08-27 19:09:43 +000093; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000094define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
95 %a = load i16, i16 addrspace(1)* %in
96 %ext = zext i16 %a to i32
97 store i32 %ext, i32 addrspace(1)* %out
98 ret void
99}
100
101; FUNC-LABEL: {{^}}global_sextload_i16_to_i32:
102; GCN-NOHSA: buffer_load_sshort
103; GCN-NOHSA: buffer_store_dword
104
105; GCN-HSA: flat_load_sshort
106; GCN-HSA: flat_store_dword
107
Jan Vesely38814fa2016-08-27 19:09:43 +0000108; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000109; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
110; EG: 16
111define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
112 %a = load i16, i16 addrspace(1)* %in
113 %ext = sext i16 %a to i32
114 store i32 %ext, i32 addrspace(1)* %out
115 ret void
116}
117
118; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i32:
119; GCN-NOHSA: buffer_load_ushort
120; GCN-HSA: flat_load_ushort
Jan Vesely38814fa2016-08-27 19:09:43 +0000121
122; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000123define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
124 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
125 %ext = zext <1 x i16> %load to <1 x i32>
126 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
127 ret void
128}
129
130; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i32:
131; GCN-NOHSA: buffer_load_sshort
132; GCN-HSA: flat_load_sshort
Jan Vesely38814fa2016-08-27 19:09:43 +0000133
134; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1
135; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
136; EG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000137define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
138 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
139 %ext = sext <1 x i16> %load to <1 x i32>
140 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
141 ret void
142}
143
144; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000145; GCN-NOHSA: buffer_load_dword
146; GCN-HSA: flat_load_dword
Jan Vesely38814fa2016-08-27 19:09:43 +0000147
148; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
149; TODO: This should use DST, but for some there are redundant MOVs
Simon Pilgrimcc7b4b52016-09-08 12:57:51 +0000150; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000151; EG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000152define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
153 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
154 %ext = zext <2 x i16> %load to <2 x i32>
155 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
156 ret void
157}
158
159; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000160; GCN-NOHSA: buffer_load_dword
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000161
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000162; GCN-HSA: flat_load_dword
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000163
Jan Vesely38814fa2016-08-27 19:09:43 +0000164; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
165; TODO: These should use DST, but for some there are redundant MOVs
166; TODO: We should also use ASHR instead of LSHR + BFE
167; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, 0.0, literal
168; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, 0.0, literal
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000169; EG-DAG: 16
170; EG-DAG: 16
171define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
172 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
173 %ext = sext <2 x i16> %load to <2 x i32>
174 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
175 ret void
176}
177
178; FUNC-LABEL: {{^}}global_global_zextload_v3i16_to_v3i32:
179; GCN-NOHSA: buffer_load_dwordx2
180; GCN-HSA: flat_load_dwordx2
Jan Vesely38814fa2016-08-27 19:09:43 +0000181
182; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[DST_HI]], 0, #1
183; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], [[DST_LO]], 4, #1
184; TODO: This should use DST, but for some there are redundant MOVs
185; EG: LSHR {{[* ]*}}{{T[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
186; EG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000187define void @global_global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
188entry:
189 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
190 %ext = zext <3 x i16> %ld to <3 x i32>
191 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
192 ret void
193}
194
195; FUNC-LABEL: {{^}}global_global_sextload_v3i16_to_v3i32:
196; GCN-NOHSA: buffer_load_dwordx2
197; GCN-HSA: flat_load_dwordx2
Jan Vesely38814fa2016-08-27 19:09:43 +0000198
199; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[DST_HI]], 0, #1
200; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], [[DST_LO]], 4, #1
201; TODO: These should use DST, but for some there are redundant MOVs
202; EG-DAG: ASHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
203; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, 0.0, literal
204; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, 0.0, literal
205; EG-DAG: 16
206; EG-DAG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000207define void @global_global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
208entry:
209 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
210 %ext = sext <3 x i16> %ld to <3 x i32>
211 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
212 ret void
213}
214
215; FUNC-LABEL: {{^}}global_global_zextload_v4i16_to_v4i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000216; GCN-NOHSA: buffer_load_dwordx2
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000217
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000218; GCN-HSA: flat_load_dwordx2
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000219
Jan Vesely38814fa2016-08-27 19:09:43 +0000220; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1
221; TODO: These should use DST, but for some there are redundant MOVs
Simon Pilgrimcc7b4b52016-09-08 12:57:51 +0000222; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000223; EG-DAG: 16
Simon Pilgrimcc7b4b52016-09-08 12:57:51 +0000224; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
225; EG-DAG: AND_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
Jan Vesely38814fa2016-08-27 19:09:43 +0000226; EG-DAG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000227define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
228 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
229 %ext = zext <4 x i16> %load to <4 x i32>
230 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
231 ret void
232}
233
234; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000235; GCN-NOHSA: buffer_load_dwordx2
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000236
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000237; GCN-HSA: flat_load_dwordx2
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000238
Jan Vesely38814fa2016-08-27 19:09:43 +0000239; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1
240; TODO: These should use DST, but for some there are redundant MOVs
241; TODO: We should use ASHR instead of LSHR + BFE
242; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
243; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
244; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
245; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000246; EG-DAG: 16
247; EG-DAG: 16
248; EG-DAG: 16
249; EG-DAG: 16
250define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
251 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
252 %ext = sext <4 x i16> %load to <4 x i32>
253 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
254 ret void
255}
256
257; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000258; GCN-NOHSA: buffer_load_dwordx4
259; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000260
261; EG: VTX_READ_128 [[DST:T[0-9]\.XYZW]], {{T[0-9].[XYZW]}}, 0, #1
262; TODO: These should use DST, but for some there are redundant MOVs
263; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
264; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
265; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
266; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
267; EG-DAG: 16
268; EG-DAG: 16
269; EG-DAG: 16
270; EG-DAG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000271define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
272 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
273 %ext = zext <8 x i16> %load to <8 x i32>
274 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
275 ret void
276}
277
278; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000279; GCN-NOHSA: buffer_load_dwordx4
280; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000281
282; EG: VTX_READ_128 [[DST:T[0-9]\.XYZW]], {{T[0-9].[XYZW]}}, 0, #1
283; TODO: These should use DST, but for some there are redundant MOVs
284; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
285; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
286; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
287; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
288; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
289; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
290; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
291; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
292; EG-DAG: 16
293; EG-DAG: 16
294; EG-DAG: 16
295; EG-DAG: 16
296; EG-DAG: 16
297; EG-DAG: 16
298; EG-DAG: 16
299; EG-DAG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000300define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
301 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
302 %ext = sext <8 x i16> %load to <8 x i32>
303 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
304 ret void
305}
306
307; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000308; GCN-NOHSA: buffer_load_dwordx4
309; GCN-NOHSA: buffer_load_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000310
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000311; GCN-HSA: flat_load_dwordx4
312; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000313
314; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
315; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000316define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
317 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
318 %ext = zext <16 x i16> %load to <16 x i32>
319 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
320 ret void
321}
322
323; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000324
325; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
326; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000327define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
328 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
329 %ext = sext <16 x i16> %load to <16 x i32>
330 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
331 ret void
332}
333
334; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000335; GCN-NOHSA: buffer_load_dwordx4
336; GCN-NOHSA: buffer_load_dwordx4
337; GCN-NOHSA: buffer_load_dwordx4
338; GCN-NOHSA: buffer_load_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000339
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000340; GCN-HSA: flat_load_dwordx4
341; GCN-HSA: flat_load_dwordx4
342; GCN-HSA: flat_load_dwordx4
343; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000344
345; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
346; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
347; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
348; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000349define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
350 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
351 %ext = zext <32 x i16> %load to <32 x i32>
352 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
353 ret void
354}
355
356; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000357; GCN-NOHSA: buffer_load_dwordx4
358; GCN-NOHSA: buffer_load_dwordx4
359; GCN-NOHSA: buffer_load_dwordx4
360; GCN-NOHSA: buffer_load_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000361
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000362; GCN-HSA: flat_load_dwordx4
363; GCN-HSA: flat_load_dwordx4
364; GCN-HSA: flat_load_dwordx4
365; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000366
367; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
368; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
369; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
370; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000371define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
372 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
373 %ext = sext <32 x i16> %load to <32 x i32>
374 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
375 ret void
376}
377
378; FUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000379; GCN-NOHSA: buffer_load_dwordx4
380; GCN-NOHSA: buffer_load_dwordx4
381; GCN-NOHSA: buffer_load_dwordx4
382; GCN-NOHSA: buffer_load_dwordx4
383; GCN-NOHSA: buffer_load_dwordx4
384; GCN-NOHSA: buffer_load_dwordx4
385; GCN-NOHSA: buffer_load_dwordx4
386; GCN-NOHSA: buffer_load_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000387
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000388; GCN-HSA: flat_load_dwordx4
389; GCN-HSA: flat_load_dwordx4
390; GCN-HSA: flat_load_dwordx4
391; GCN-HSA: flat_load_dwordx4
392; GCN-HSA: flat_load_dwordx4
393; GCN-HSA: flat_load_dwordx4
394; GCN-HSA: flat_load_dwordx4
395; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000396
397; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
398; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
399; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
400; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
401; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1
402; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1
403; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1
404; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000405define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
406 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
407 %ext = zext <64 x i16> %load to <64 x i32>
408 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
409 ret void
410}
411
412; FUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000413
414; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
415; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
416; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
417; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
418; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1
419; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1
420; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1
421; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000422define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
423 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
424 %ext = sext <64 x i16> %load to <64 x i32>
425 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
426 ret void
427}
428
429; FUNC-LABEL: {{^}}global_zextload_i16_to_i64:
430; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
431; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
432; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
433
434; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
435; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000436
437; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
438; EG: MOV {{.*}}, 0.0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000439define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
440 %a = load i16, i16 addrspace(1)* %in
441 %ext = zext i16 %a to i64
442 store i64 %ext, i64 addrspace(1)* %out
443 ret void
444}
445
446; FUNC-LABEL: {{^}}global_sextload_i16_to_i64:
Tom Stellard115a6152016-11-10 16:02:37 +0000447; FIXME: Need to optimize this sequence to avoid extra bfe:
448; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64
449; t31: i64 = any_extend t28
450; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16
451
452; GCN-NOHSA-SI-DAG: buffer_load_sshort v[[LO:[0-9]+]],
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000453; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
Tom Stellard115a6152016-11-10 16:02:37 +0000454; GCN-NOHSA-VI-DAG: buffer_load_ushort v[[ULO:[0-9]+]],
455; GCN-NOHSA-VI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000456; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
457
458; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
459; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000460
461; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
462; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
463; TODO: Why not 15 ?
464; EG: 31
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000465define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
466 %a = load i16, i16 addrspace(1)* %in
467 %ext = sext i16 %a to i64
468 store i64 %ext, i64 addrspace(1)* %out
469 ret void
470}
471
472; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000473
474; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
475; EG: MOV {{.*}}, 0.0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000476define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
477 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
478 %ext = zext <1 x i16> %load to <1 x i64>
479 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
480 ret void
481}
482
483; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000484
485; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
486; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
487; TODO: Why not 15 ?
488; EG: 31
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000489define void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
490 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
491 %ext = sext <1 x i16> %load to <1 x i64>
492 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
493 ret void
494}
495
496; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i64:
497define void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
498 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
499 %ext = zext <2 x i16> %load to <2 x i64>
500 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
501 ret void
502}
503
504; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000505
506; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000507define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
508 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
509 %ext = sext <2 x i16> %load to <2 x i64>
510 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
511 ret void
512}
513
514; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000515
516; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000517define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
518 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
519 %ext = zext <4 x i16> %load to <4 x i64>
520 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
521 ret void
522}
523
524; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000525
526; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000527define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
528 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
529 %ext = sext <4 x i16> %load to <4 x i64>
530 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
531 ret void
532}
533
534; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000535
536; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000537define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
538 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
539 %ext = zext <8 x i16> %load to <8 x i64>
540 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
541 ret void
542}
543
544; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000545
546; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000547define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
548 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
549 %ext = sext <8 x i16> %load to <8 x i64>
550 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
551 ret void
552}
553
554; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000555
556; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
557; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000558define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
559 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
560 %ext = zext <16 x i16> %load to <16 x i64>
561 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
562 ret void
563}
564
565; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000566
567; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
568; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000569define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
570 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
571 %ext = sext <16 x i16> %load to <16 x i64>
572 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
573 ret void
574}
575
576; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000577
578; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
579; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
580; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
581; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000582define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
583 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
584 %ext = zext <32 x i16> %load to <32 x i64>
585 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
586 ret void
587}
588
589; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000590
591; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
592; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
593; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
594; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000595define void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
596 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
597 %ext = sext <32 x i16> %load to <32 x i64>
598 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
599 ret void
600}
601
602; ; XFUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i64:
603; define void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
604; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
605; %ext = zext <64 x i16> %load to <64 x i64>
606; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
607; ret void
608; }
609
610; ; XFUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i64:
611; define void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
612; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
613; %ext = sext <64 x i16> %load to <64 x i64>
614; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
615; ret void
616; }
617
618attributes #0 = { nounwind }