blob: c736586fa21724bc2f001949c395b89d8a46a556 [file] [log] [blame]
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00004; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5
Matt Arsenaultd1097a32016-06-02 19:54:26 +00006; FUNC-LABEL: {{^}}local_load_i32:
7; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00008; SICIVI: s_mov_b32 m0, -1
9; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000010; GCN: ds_read_b32
11
12; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000013define amdgpu_kernel void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000014entry:
15 %ld = load i32, i32 addrspace(3)* %in
16 store i32 %ld, i32 addrspace(3)* %out
17 ret void
18}
19
20; FUNC-LABEL: {{^}}local_load_v2i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000021; SICIVI: s_mov_b32 m0, -1
22; GFX9-NOT: m0
23
Matt Arsenaultd1097a32016-06-02 19:54:26 +000024; GCN: ds_read_b64
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000025define amdgpu_kernel void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000026entry:
27 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
28 store <2 x i32> %ld, <2 x i32> addrspace(3)* %out
29 ret void
30}
31
32; FUNC-LABEL: {{^}}local_load_v3i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000033; SICIVI: s_mov_b32 m0, -1
34; GFX9-NOT: m0
35
Matt Arsenaultd1097a32016-06-02 19:54:26 +000036; GCN-DAG: ds_read_b64
37; GCN-DAG: ds_read_b32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000038define amdgpu_kernel void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000039entry:
40 %ld = load <3 x i32>, <3 x i32> addrspace(3)* %in
41 store <3 x i32> %ld, <3 x i32> addrspace(3)* %out
42 ret void
43}
44
45; FUNC-LABEL: {{^}}local_load_v4i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000046; SICIVI: s_mov_b32 m0, -1
47; GFX9-NOT: m0
48
Tom Stellarde175d8a2016-08-26 21:36:47 +000049; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000050
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000051define amdgpu_kernel void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000052entry:
53 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
54 store <4 x i32> %ld, <4 x i32> addrspace(3)* %out
55 ret void
56}
57
58; FUNC-LABEL: {{^}}local_load_v8i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000059; SICIVI: s_mov_b32 m0, -1
60; GFX9-NOT: m0
61
Tom Stellarde175d8a2016-08-26 21:36:47 +000062; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
63; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000064define amdgpu_kernel void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000065entry:
66 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
67 store <8 x i32> %ld, <8 x i32> addrspace(3)* %out
68 ret void
69}
70
71; FUNC-LABEL: {{^}}local_load_v16i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000072; SICIVI: s_mov_b32 m0, -1
73; GFX9-NOT: m0
74
Tom Stellardc2ff0eb2016-08-29 19:15:22 +000075; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}}
76; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5{{$}}
77; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
78; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
79; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
80; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
81; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
82; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000083define amdgpu_kernel void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000084entry:
85 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
86 store <16 x i32> %ld, <16 x i32> addrspace(3)* %out
87 ret void
88}
89
90; FUNC-LABEL: {{^}}local_zextload_i32_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000091; SICIVI: s_mov_b32 m0, -1
92; GFX9-NOT: m0
93
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000094define amdgpu_kernel void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000095 %ld = load i32, i32 addrspace(3)* %in
96 %ext = zext i32 %ld to i64
97 store i64 %ext, i64 addrspace(3)* %out
98 ret void
99}
100
101; FUNC-LABEL: {{^}}local_sextload_i32_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000102; SICIVI: s_mov_b32 m0, -1
103; GFX9-NOT: m0
104
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000105define amdgpu_kernel void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000106 %ld = load i32, i32 addrspace(3)* %in
107 %ext = sext i32 %ld to i64
108 store i64 %ext, i64 addrspace(3)* %out
109 ret void
110}
111
112; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000113; SICIVI: s_mov_b32 m0, -1
114; GFX9-NOT: m0
115
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000116define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000117 %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
118 %ext = zext <1 x i32> %ld to <1 x i64>
119 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
120 ret void
121}
122
123; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000124; SICIVI: s_mov_b32 m0, -1
125; GFX9-NOT: m0
126
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000127define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000128 %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
129 %ext = sext <1 x i32> %ld to <1 x i64>
130 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
131 ret void
132}
133
134; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000135; SICIVI: s_mov_b32 m0, -1
136; GFX9-NOT: m0
137
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000138define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000139 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
140 %ext = zext <2 x i32> %ld to <2 x i64>
141 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
142 ret void
143}
144
145; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000146; SICIVI: s_mov_b32 m0, -1
147; GFX9-NOT: m0
148
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000149define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000150 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
151 %ext = sext <2 x i32> %ld to <2 x i64>
152 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
153 ret void
154}
155
156; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000157; SICIVI: s_mov_b32 m0, -1
158; GFX9-NOT: m0
159
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000160define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000161 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
162 %ext = zext <4 x i32> %ld to <4 x i64>
163 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
164 ret void
165}
166
167; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000168; SICIVI: s_mov_b32 m0, -1
169; GFX9-NOT: m0
170
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000171define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000172 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
173 %ext = sext <4 x i32> %ld to <4 x i64>
174 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
175 ret void
176}
177
178; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000179; SICIVI: s_mov_b32 m0, -1
180; GFX9-NOT: m0
181
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000182define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000183 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
184 %ext = zext <8 x i32> %ld to <8 x i64>
185 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
186 ret void
187}
188
189; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000190; SICIVI: s_mov_b32 m0, -1
191; GFX9-NOT: m0
192
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000193define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000194 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
195 %ext = sext <8 x i32> %ld to <8 x i64>
196 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
197 ret void
198}
199
200; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000201; SICIVI: s_mov_b32 m0, -1
202; GFX9-NOT: m0
203
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000204define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000205 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
206 %ext = sext <16 x i32> %ld to <16 x i64>
207 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
208 ret void
209}
210
211; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000212; SICIVI: s_mov_b32 m0, -1
213; GFX9-NOT: m0
214
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000215define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000216 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
217 %ext = zext <16 x i32> %ld to <16 x i64>
218 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
219 ret void
220}
221
222; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000223; SICIVI: s_mov_b32 m0, -1
224; GFX9-NOT: m0
225
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000226define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000227 %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
228 %ext = sext <32 x i32> %ld to <32 x i64>
229 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
230 ret void
231}
232
233; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000234; SICIVI: s_mov_b32 m0, -1
235; GFX9-NOT: m0
236
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000237define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000238 %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
239 %ext = zext <32 x i32> %ld to <32 x i64>
240 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
241 ret void
242}
243
244attributes #0 = { nounwind }