blob: 86739ff6c95b6ea8acb661ece99183cf99f04037 [file] [log] [blame]
Matt Arsenaultd1097a32016-06-02 19:54:26 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6
7; FIXME: r600 is broken because the bigger testcases spill and it's not implemented
8
9; FUNC-LABEL: {{^}}global_load_i16:
10; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}}
11; GCN-HSA: flat_load_ushort
12
Jan Vesely38814fa2016-08-27 19:09:43 +000013; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000014define void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
15entry:
16 %ld = load i16, i16 addrspace(1)* %in
17 store i16 %ld, i16 addrspace(1)* %out
18 ret void
19}
20
21; FUNC-LABEL: {{^}}global_load_v2i16:
22; GCN-NOHSA: buffer_load_dword v
23; GCN-HSA: flat_load_dword v
24
Jan Vesely38814fa2016-08-27 19:09:43 +000025; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000026define void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
27entry:
28 %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in
29 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out
30 ret void
31}
32
33; FUNC-LABEL: {{^}}global_load_v3i16:
34; GCN-NOHSA: buffer_load_dwordx2 v
35; GCN-HSA: flat_load_dwordx2 v
36
Jan Vesely38814fa2016-08-27 19:09:43 +000037; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
38; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000039define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
40entry:
41 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
42 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out
43 ret void
44}
45
46; FUNC-LABEL: {{^}}global_load_v4i16:
47; GCN-NOHSA: buffer_load_dwordx2
48; GCN-HSA: flat_load_dwordx2
49
Jan Vesely38814fa2016-08-27 19:09:43 +000050; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000051define void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
52entry:
53 %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in
54 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out
55 ret void
56}
57
58; FUNC-LABEL: {{^}}global_load_v8i16:
59; GCN-NOHSA: buffer_load_dwordx4
60; GCN-HSA: flat_load_dwordx4
61
Jan Vesely38814fa2016-08-27 19:09:43 +000062; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000063define void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) {
64entry:
65 %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in
66 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out
67 ret void
68}
69
70; FUNC-LABEL: {{^}}global_load_v16i16:
71; GCN-NOHSA: buffer_load_dwordx4
72; GCN-NOHSA: buffer_load_dwordx4
73
74; GCN-HSA: flat_load_dwordx4
75; GCN-HSA: flat_load_dwordx4
76
Jan Vesely38814fa2016-08-27 19:09:43 +000077; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
78; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000079define void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) {
80entry:
81 %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in
82 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out
83 ret void
84}
85
86; FUNC-LABEL: {{^}}global_zextload_i16_to_i32:
87; GCN-NOHSA: buffer_load_ushort
88; GCN-NOHSA: buffer_store_dword
89
90; GCN-HSA: flat_load_ushort
91; GCN-HSA: flat_store_dword
92
Jan Vesely38814fa2016-08-27 19:09:43 +000093; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +000094define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
95 %a = load i16, i16 addrspace(1)* %in
96 %ext = zext i16 %a to i32
97 store i32 %ext, i32 addrspace(1)* %out
98 ret void
99}
100
101; FUNC-LABEL: {{^}}global_sextload_i16_to_i32:
102; GCN-NOHSA: buffer_load_sshort
103; GCN-NOHSA: buffer_store_dword
104
105; GCN-HSA: flat_load_sshort
106; GCN-HSA: flat_store_dword
107
Jan Vesely38814fa2016-08-27 19:09:43 +0000108; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000109; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
110; EG: 16
111define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
112 %a = load i16, i16 addrspace(1)* %in
113 %ext = sext i16 %a to i32
114 store i32 %ext, i32 addrspace(1)* %out
115 ret void
116}
117
118; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i32:
119; GCN-NOHSA: buffer_load_ushort
120; GCN-HSA: flat_load_ushort
Jan Vesely38814fa2016-08-27 19:09:43 +0000121
122; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000123define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
124 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
125 %ext = zext <1 x i16> %load to <1 x i32>
126 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
127 ret void
128}
129
130; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i32:
131; GCN-NOHSA: buffer_load_sshort
132; GCN-HSA: flat_load_sshort
Jan Vesely38814fa2016-08-27 19:09:43 +0000133
134; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1
135; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
136; EG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000137define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
138 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
139 %ext = sext <1 x i16> %load to <1 x i32>
140 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
141 ret void
142}
143
144; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000145; GCN-NOHSA: buffer_load_dword
146; GCN-HSA: flat_load_dword
Jan Vesely38814fa2016-08-27 19:09:43 +0000147
148; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
149; TODO: This should use DST, but for some there are redundant MOVs
150; EG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
151; EG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000152define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
153 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
154 %ext = zext <2 x i16> %load to <2 x i32>
155 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
156 ret void
157}
158
159; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000160; GCN-NOHSA: buffer_load_dword
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000161
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000162; GCN-HSA: flat_load_dword
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000163
Jan Vesely38814fa2016-08-27 19:09:43 +0000164; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
165; TODO: These should use DST, but for some there are redundant MOVs
166; TODO: We should also use ASHR instead of LSHR + BFE
167; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, 0.0, literal
168; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, 0.0, literal
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000169; EG-DAG: 16
170; EG-DAG: 16
171define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
172 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
173 %ext = sext <2 x i16> %load to <2 x i32>
174 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
175 ret void
176}
177
178; FUNC-LABEL: {{^}}global_global_zextload_v3i16_to_v3i32:
179; GCN-NOHSA: buffer_load_dwordx2
180; GCN-HSA: flat_load_dwordx2
Jan Vesely38814fa2016-08-27 19:09:43 +0000181
182; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[DST_HI]], 0, #1
183; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], [[DST_LO]], 4, #1
184; TODO: This should use DST, but for some there are redundant MOVs
185; EG: LSHR {{[* ]*}}{{T[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
186; EG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000187define void @global_global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
188entry:
189 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
190 %ext = zext <3 x i16> %ld to <3 x i32>
191 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
192 ret void
193}
194
195; FUNC-LABEL: {{^}}global_global_sextload_v3i16_to_v3i32:
196; GCN-NOHSA: buffer_load_dwordx2
197; GCN-HSA: flat_load_dwordx2
Jan Vesely38814fa2016-08-27 19:09:43 +0000198
199; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[DST_HI]], 0, #1
200; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], [[DST_LO]], 4, #1
201; TODO: These should use DST, but for some there are redundant MOVs
202; EG-DAG: ASHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
203; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, 0.0, literal
204; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, 0.0, literal
205; EG-DAG: 16
206; EG-DAG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000207define void @global_global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) {
208entry:
209 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in
210 %ext = sext <3 x i16> %ld to <3 x i32>
211 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
212 ret void
213}
214
215; FUNC-LABEL: {{^}}global_global_zextload_v4i16_to_v4i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000216; GCN-NOHSA: buffer_load_dwordx2
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000217
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000218; GCN-HSA: flat_load_dwordx2
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000219
Jan Vesely38814fa2016-08-27 19:09:43 +0000220; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1
221; TODO: These should use DST, but for some there are redundant MOVs
222; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
223; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
224; EG-DAG: 16
225; EG-DAG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000226define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
227 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
228 %ext = zext <4 x i16> %load to <4 x i32>
229 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
230 ret void
231}
232
233; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000234; GCN-NOHSA: buffer_load_dwordx2
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000235
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000236; GCN-HSA: flat_load_dwordx2
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000237
Jan Vesely38814fa2016-08-27 19:09:43 +0000238; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1
239; TODO: These should use DST, but for some there are redundant MOVs
240; TODO: We should use ASHR instead of LSHR + BFE
241; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
242; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
243; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
244; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000245; EG-DAG: 16
246; EG-DAG: 16
247; EG-DAG: 16
248; EG-DAG: 16
249define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
250 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
251 %ext = sext <4 x i16> %load to <4 x i32>
252 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
253 ret void
254}
255
256; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000257; GCN-NOHSA: buffer_load_dwordx4
258; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000259
260; EG: VTX_READ_128 [[DST:T[0-9]\.XYZW]], {{T[0-9].[XYZW]}}, 0, #1
261; TODO: These should use DST, but for some there are redundant MOVs
262; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
263; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
264; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
265; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
266; EG-DAG: 16
267; EG-DAG: 16
268; EG-DAG: 16
269; EG-DAG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000270define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
271 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
272 %ext = zext <8 x i16> %load to <8 x i32>
273 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
274 ret void
275}
276
277; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000278; GCN-NOHSA: buffer_load_dwordx4
279; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000280
281; EG: VTX_READ_128 [[DST:T[0-9]\.XYZW]], {{T[0-9].[XYZW]}}, 0, #1
282; TODO: These should use DST, but for some there are redundant MOVs
283; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
284; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
285; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
286; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
287; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
288; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
289; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
290; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
291; EG-DAG: 16
292; EG-DAG: 16
293; EG-DAG: 16
294; EG-DAG: 16
295; EG-DAG: 16
296; EG-DAG: 16
297; EG-DAG: 16
298; EG-DAG: 16
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000299define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
300 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
301 %ext = sext <8 x i16> %load to <8 x i32>
302 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
303 ret void
304}
305
306; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000307; GCN-NOHSA: buffer_load_dwordx4
308; GCN-NOHSA: buffer_load_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000309
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000310; GCN-HSA: flat_load_dwordx4
311; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000312
313; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
314; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000315define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
316 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
317 %ext = zext <16 x i16> %load to <16 x i32>
318 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
319 ret void
320}
321
322; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000323
324; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
325; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000326define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
327 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
328 %ext = sext <16 x i16> %load to <16 x i32>
329 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
330 ret void
331}
332
333; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000334; GCN-NOHSA: buffer_load_dwordx4
335; GCN-NOHSA: buffer_load_dwordx4
336; GCN-NOHSA: buffer_load_dwordx4
337; GCN-NOHSA: buffer_load_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000338
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000339; GCN-HSA: flat_load_dwordx4
340; GCN-HSA: flat_load_dwordx4
341; GCN-HSA: flat_load_dwordx4
342; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000343
344; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
345; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
346; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
347; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000348define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
349 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
350 %ext = zext <32 x i16> %load to <32 x i32>
351 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
352 ret void
353}
354
355; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000356; GCN-NOHSA: buffer_load_dwordx4
357; GCN-NOHSA: buffer_load_dwordx4
358; GCN-NOHSA: buffer_load_dwordx4
359; GCN-NOHSA: buffer_load_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000360
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000361; GCN-HSA: flat_load_dwordx4
362; GCN-HSA: flat_load_dwordx4
363; GCN-HSA: flat_load_dwordx4
364; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000365
366; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
367; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
368; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
369; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000370define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
371 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
372 %ext = sext <32 x i16> %load to <32 x i32>
373 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
374 ret void
375}
376
377; FUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i32:
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000378; GCN-NOHSA: buffer_load_dwordx4
379; GCN-NOHSA: buffer_load_dwordx4
380; GCN-NOHSA: buffer_load_dwordx4
381; GCN-NOHSA: buffer_load_dwordx4
382; GCN-NOHSA: buffer_load_dwordx4
383; GCN-NOHSA: buffer_load_dwordx4
384; GCN-NOHSA: buffer_load_dwordx4
385; GCN-NOHSA: buffer_load_dwordx4
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000386
Matt Arsenault327bb5a2016-07-01 22:47:50 +0000387; GCN-HSA: flat_load_dwordx4
388; GCN-HSA: flat_load_dwordx4
389; GCN-HSA: flat_load_dwordx4
390; GCN-HSA: flat_load_dwordx4
391; GCN-HSA: flat_load_dwordx4
392; GCN-HSA: flat_load_dwordx4
393; GCN-HSA: flat_load_dwordx4
394; GCN-HSA: flat_load_dwordx4
Jan Vesely38814fa2016-08-27 19:09:43 +0000395
396; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
397; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
398; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
399; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
400; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1
401; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1
402; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1
403; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000404define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
405 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
406 %ext = zext <64 x i16> %load to <64 x i32>
407 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
408 ret void
409}
410
411; FUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i32:
Jan Vesely38814fa2016-08-27 19:09:43 +0000412
413; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1
414; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1
415; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1
416; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1
417; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1
418; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1
419; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1
420; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000421define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
422 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
423 %ext = sext <64 x i16> %load to <64 x i32>
424 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
425 ret void
426}
427
428; FUNC-LABEL: {{^}}global_zextload_i16_to_i64:
429; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]],
430; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]],
431; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
432
433; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
434; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000435
436; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
437; EG: MOV {{.*}}, 0.0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000438define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
439 %a = load i16, i16 addrspace(1)* %in
440 %ext = zext i16 %a to i64
441 store i64 %ext, i64 addrspace(1)* %out
442 ret void
443}
444
445; FUNC-LABEL: {{^}}global_sextload_i16_to_i64:
446; GCN-NOHSA-DAG: buffer_load_sshort v[[LO:[0-9]+]],
447; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]],
448; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
449
450; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
451; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
Jan Vesely38814fa2016-08-27 19:09:43 +0000452
453; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
454; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
455; TODO: Why not 15 ?
456; EG: 31
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000457define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
458 %a = load i16, i16 addrspace(1)* %in
459 %ext = sext i16 %a to i64
460 store i64 %ext, i64 addrspace(1)* %out
461 ret void
462}
463
464; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000465
466; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
467; EG: MOV {{.*}}, 0.0
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000468define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
469 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
470 %ext = zext <1 x i16> %load to <1 x i64>
471 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
472 ret void
473}
474
475; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000476
477; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
478; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
479; TODO: Why not 15 ?
480; EG: 31
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000481define void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 {
482 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
483 %ext = sext <1 x i16> %load to <1 x i64>
484 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
485 ret void
486}
487
488; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i64:
489define void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
490 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
491 %ext = zext <2 x i16> %load to <2 x i64>
492 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
493 ret void
494}
495
496; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000497
498; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000499define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
500 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
501 %ext = sext <2 x i16> %load to <2 x i64>
502 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
503 ret void
504}
505
506; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000507
508; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000509define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
510 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
511 %ext = zext <4 x i16> %load to <4 x i64>
512 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
513 ret void
514}
515
516; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000517
518; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000519define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
520 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
521 %ext = sext <4 x i16> %load to <4 x i64>
522 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
523 ret void
524}
525
526; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000527
528; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000529define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
530 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
531 %ext = zext <8 x i16> %load to <8 x i64>
532 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
533 ret void
534}
535
536; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000537
538; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000539define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
540 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
541 %ext = sext <8 x i16> %load to <8 x i64>
542 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
543 ret void
544}
545
546; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000547
548; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
549; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000550define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
551 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
552 %ext = zext <16 x i16> %load to <16 x i64>
553 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
554 ret void
555}
556
557; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000558
559; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
560; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000561define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 {
562 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
563 %ext = sext <16 x i16> %load to <16 x i64>
564 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
565 ret void
566}
567
568; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000569
570; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
571; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
572; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
573; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000574define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
575 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
576 %ext = zext <32 x i16> %load to <32 x i64>
577 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
578 ret void
579}
580
581; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i64:
Jan Vesely38814fa2016-08-27 19:09:43 +0000582
583; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
584; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
585; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1
586; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000587define void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 {
588 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
589 %ext = sext <32 x i16> %load to <32 x i64>
590 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
591 ret void
592}
593
594; ; XFUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i64:
595; define void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
596; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
597; %ext = zext <64 x i16> %load to <64 x i64>
598; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
599; ret void
600; }
601
602; ; XFUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i64:
603; define void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 {
604; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
605; %ext = sext <64 x i16> %load to <64 x i64>
606; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
607; ret void
608; }
609
610attributes #0 = { nounwind }