blob: 2d0e989b68618d613ee7bb880ac0a692928da30b [file] [log] [blame]
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00001; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
Matt Arsenaultd1097a32016-06-02 19:54:26 +00004; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5
Farhana Aleenc6c9dc82018-03-16 18:12:00 +00006; Testing for ds_read/write_128
Farhana Aleena7cb3112018-03-09 17:41:39 +00007; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s
8; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
9; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
10
Matt Arsenaultd1097a32016-06-02 19:54:26 +000011; FUNC-LABEL: {{^}}local_load_i32:
12; GCN-NOT: s_wqm_b64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000013; SICIVI: s_mov_b32 m0, -1
14; GFX9-NOT: m0
Matt Arsenaultd1097a32016-06-02 19:54:26 +000015; GCN: ds_read_b32
16
17; EG: LDS_READ_RET
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000018define amdgpu_kernel void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000019entry:
20 %ld = load i32, i32 addrspace(3)* %in
21 store i32 %ld, i32 addrspace(3)* %out
22 ret void
23}
24
25; FUNC-LABEL: {{^}}local_load_v2i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000026; SICIVI: s_mov_b32 m0, -1
27; GFX9-NOT: m0
28
Matt Arsenaultd1097a32016-06-02 19:54:26 +000029; GCN: ds_read_b64
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000030define amdgpu_kernel void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000031entry:
32 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
33 store <2 x i32> %ld, <2 x i32> addrspace(3)* %out
34 ret void
35}
36
37; FUNC-LABEL: {{^}}local_load_v3i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000038; SICIVI: s_mov_b32 m0, -1
39; GFX9-NOT: m0
40
Matt Arsenaultd1097a32016-06-02 19:54:26 +000041; GCN-DAG: ds_read_b64
42; GCN-DAG: ds_read_b32
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000043define amdgpu_kernel void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000044entry:
45 %ld = load <3 x i32>, <3 x i32> addrspace(3)* %in
46 store <3 x i32> %ld, <3 x i32> addrspace(3)* %out
47 ret void
48}
49
50; FUNC-LABEL: {{^}}local_load_v4i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000051; SICIVI: s_mov_b32 m0, -1
52; GFX9-NOT: m0
53
Tom Stellarde175d8a2016-08-26 21:36:47 +000054; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenaultd1097a32016-06-02 19:54:26 +000055
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000056define amdgpu_kernel void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000057entry:
58 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
59 store <4 x i32> %ld, <4 x i32> addrspace(3)* %out
60 ret void
61}
62
63; FUNC-LABEL: {{^}}local_load_v8i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000064; SICIVI: s_mov_b32 m0, -1
65; GFX9-NOT: m0
66
Tom Stellarde175d8a2016-08-26 21:36:47 +000067; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
68; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000069define amdgpu_kernel void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000070entry:
71 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
72 store <8 x i32> %ld, <8 x i32> addrspace(3)* %out
73 ret void
74}
75
76; FUNC-LABEL: {{^}}local_load_v16i32:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000077; SICIVI: s_mov_b32 m0, -1
78; GFX9-NOT: m0
79
Tom Stellardc2ff0eb2016-08-29 19:15:22 +000080; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}}
81; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5{{$}}
82; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
83; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
84; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
85; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
86; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
87; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000088define amdgpu_kernel void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +000089entry:
90 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
91 store <16 x i32> %ld, <16 x i32> addrspace(3)* %out
92 ret void
93}
94
95; FUNC-LABEL: {{^}}local_zextload_i32_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000096; SICIVI: s_mov_b32 m0, -1
97; GFX9-NOT: m0
98
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000099define amdgpu_kernel void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000100 %ld = load i32, i32 addrspace(3)* %in
101 %ext = zext i32 %ld to i64
102 store i64 %ext, i64 addrspace(3)* %out
103 ret void
104}
105
106; FUNC-LABEL: {{^}}local_sextload_i32_to_i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000107; SICIVI: s_mov_b32 m0, -1
108; GFX9-NOT: m0
109
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000110define amdgpu_kernel void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000111 %ld = load i32, i32 addrspace(3)* %in
112 %ext = sext i32 %ld to i64
113 store i64 %ext, i64 addrspace(3)* %out
114 ret void
115}
116
117; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000118; SICIVI: s_mov_b32 m0, -1
119; GFX9-NOT: m0
120
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000121define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000122 %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
123 %ext = zext <1 x i32> %ld to <1 x i64>
124 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
125 ret void
126}
127
128; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000129; SICIVI: s_mov_b32 m0, -1
130; GFX9-NOT: m0
131
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000132define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000133 %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
134 %ext = sext <1 x i32> %ld to <1 x i64>
135 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
136 ret void
137}
138
139; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000140; SICIVI: s_mov_b32 m0, -1
141; GFX9-NOT: m0
142
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000143define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000144 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
145 %ext = zext <2 x i32> %ld to <2 x i64>
146 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
147 ret void
148}
149
150; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000151; SICIVI: s_mov_b32 m0, -1
152; GFX9-NOT: m0
153
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000154define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000155 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
156 %ext = sext <2 x i32> %ld to <2 x i64>
157 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
158 ret void
159}
160
161; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000162; SICIVI: s_mov_b32 m0, -1
163; GFX9-NOT: m0
164
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000165define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000166 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
167 %ext = zext <4 x i32> %ld to <4 x i64>
168 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
169 ret void
170}
171
172; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000173; SICIVI: s_mov_b32 m0, -1
174; GFX9-NOT: m0
175
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000176define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000177 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
178 %ext = sext <4 x i32> %ld to <4 x i64>
179 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
180 ret void
181}
182
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000183; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load.
Farhana Aleena7cb3112018-03-09 17:41:39 +0000184; FUNC-LABEL: {{^}}local_v4i32_to_128:
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000185
Farhana Aleena7cb3112018-03-09 17:41:39 +0000186; SI-NOT: ds_read_b128
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000187; SI-NOT: ds_write_b128
188
Farhana Aleena7cb3112018-03-09 17:41:39 +0000189; CIVI: ds_read_b128
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000190; CIVI: ds_write_b128
191
Farhana Aleena7cb3112018-03-09 17:41:39 +0000192; EG: LDS_READ_RET
193; EG: LDS_READ_RET
194; EG: LDS_READ_RET
195; EG: LDS_READ_RET
196define amdgpu_kernel void @local_v4i32_to_128(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) {
197 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 16
Farhana Aleenc6c9dc82018-03-16 18:12:00 +0000198 store <4 x i32> %ld, <4 x i32> addrspace(3)* %out, align 16
Farhana Aleena7cb3112018-03-09 17:41:39 +0000199 ret void
200}
201
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000202; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000203; SICIVI: s_mov_b32 m0, -1
204; GFX9-NOT: m0
205
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000206define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000207 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
208 %ext = zext <8 x i32> %ld to <8 x i64>
209 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
210 ret void
211}
212
213; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000214; SICIVI: s_mov_b32 m0, -1
215; GFX9-NOT: m0
216
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000217define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000218 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
219 %ext = sext <8 x i32> %ld to <8 x i64>
220 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
221 ret void
222}
223
224; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000225; SICIVI: s_mov_b32 m0, -1
226; GFX9-NOT: m0
227
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000228define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000229 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
230 %ext = sext <16 x i32> %ld to <16 x i64>
231 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
232 ret void
233}
234
235; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000236; SICIVI: s_mov_b32 m0, -1
237; GFX9-NOT: m0
238
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000239define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000240 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
241 %ext = zext <16 x i32> %ld to <16 x i64>
242 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
243 ret void
244}
245
246; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000247; SICIVI: s_mov_b32 m0, -1
248; GFX9-NOT: m0
249
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000250define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000251 %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
252 %ext = sext <32 x i32> %ld to <32 x i64>
253 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
254 ret void
255}
256
257; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64:
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000258; SICIVI: s_mov_b32 m0, -1
259; GFX9-NOT: m0
260
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000261define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
Matt Arsenaultd1097a32016-06-02 19:54:26 +0000262 %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
263 %ext = zext <32 x i32> %ld to <32 x i64>
264 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
265 ret void
266}
267
268attributes #0 = { nounwind }