blob: 87c18a7fc449fbf78100e07a99d9b709072f0a79 [file] [log] [blame]
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00001; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI %s
2; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI %s
3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
Matt Arsenaultb9433482014-03-19 22:19:52 +00005
Matt Arsenault3f71c0e2017-11-29 00:55:57 +00006; GCN-LABEL: {{^}}local_i32_load
7; SICIVI: s_mov_b32 m0
8; GFX9-NOT: m0
9
10; GCN: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28
11; GCN: buffer_store_dword [[REG]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000012define amdgpu_kernel void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
David Blaikie79e6c742015-02-27 19:29:02 +000013 %gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
David Blaikiea79ac142015-02-27 21:17:42 +000014 %val = load i32, i32 addrspace(3)* %gep, align 4
Matt Arsenaultb9433482014-03-19 22:19:52 +000015 store i32 %val, i32 addrspace(1)* %out, align 4
16 ret void
17}
18
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000019; GCN-LABEL: {{^}}local_i32_load_0_offset
20; SICIVI: s_mov_b32 m0
21; GFX9-NOT: m0
22
23; GCN: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}}
24; GCN: buffer_store_dword [[REG]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000025define amdgpu_kernel void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +000026 %val = load i32, i32 addrspace(3)* %in, align 4
Matt Arsenaultb9433482014-03-19 22:19:52 +000027 store i32 %val, i32 addrspace(1)* %out, align 4
28 ret void
29}
30
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000031; GCN-LABEL: {{^}}local_i8_load_i16_max_offset:
32; SICIVI: s_mov_b32 m0
33; GFX9-NOT: m0
34
35; GCN-NOT: add
36; GCN: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535
37; GCN: buffer_store_byte [[REG]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000038define amdgpu_kernel void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
David Blaikie79e6c742015-02-27 19:29:02 +000039 %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65535
David Blaikiea79ac142015-02-27 21:17:42 +000040 %val = load i8, i8 addrspace(3)* %gep, align 4
Matt Arsenaultb9433482014-03-19 22:19:52 +000041 store i8 %val, i8 addrspace(1)* %out, align 4
42 ret void
43}
44
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000045; GCN-LABEL: {{^}}local_i8_load_over_i16_max_offset:
46; SICIVI-DAG: s_mov_b32 m0
47; GFX9-NOT: m0
48
Tom Stellard85e8b6d2014-08-22 18:49:33 +000049; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
50; SI, which is why it is being OR'd with the base pointer.
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000051; SI-DAG: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
52; CI-DAG: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
53; VI-DAG: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
54; GFX9-DAG: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
55
56; GCN-DAG: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
57; GCN: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]]
58; GCN: buffer_store_byte [[REG]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000059define amdgpu_kernel void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
David Blaikie79e6c742015-02-27 19:29:02 +000060 %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65536
David Blaikiea79ac142015-02-27 21:17:42 +000061 %val = load i8, i8 addrspace(3)* %gep, align 4
Matt Arsenaultb9433482014-03-19 22:19:52 +000062 store i8 %val, i8 addrspace(1)* %out, align 4
63 ret void
64}
65
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000066; GCN-LABEL: {{^}}local_i64_load:
67; SICIVI: s_mov_b32 m0
68; GFX9-NOT: m0
69
70; GCN-NOT: add
71; GCN: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
72; GCN: buffer_store_dwordx2 [[REG]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000073define amdgpu_kernel void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
David Blaikie79e6c742015-02-27 19:29:02 +000074 %gep = getelementptr i64, i64 addrspace(3)* %in, i32 7
David Blaikiea79ac142015-02-27 21:17:42 +000075 %val = load i64, i64 addrspace(3)* %gep, align 8
Matt Arsenaultb9433482014-03-19 22:19:52 +000076 store i64 %val, i64 addrspace(1)* %out, align 8
77 ret void
78}
79
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000080; GCN-LABEL: {{^}}local_i64_load_0_offset
81; SICIVI: s_mov_b32 m0
82; GFX9-NOT: m0
83
84; GCN: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
85; GCN: buffer_store_dwordx2 [[REG]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000086define amdgpu_kernel void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +000087 %val = load i64, i64 addrspace(3)* %in, align 8
Matt Arsenaultb9433482014-03-19 22:19:52 +000088 store i64 %val, i64 addrspace(1)* %out, align 8
89 ret void
90}
91
Matt Arsenault3f71c0e2017-11-29 00:55:57 +000092; GCN-LABEL: {{^}}local_f64_load:
93; SICIVI: s_mov_b32 m0
94; GFX9-NOT: m0
95
96; GCN-NOT: add
97; GCN: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
98; GCN: buffer_store_dwordx2 [[REG]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +000099define amdgpu_kernel void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
David Blaikie79e6c742015-02-27 19:29:02 +0000100 %gep = getelementptr double, double addrspace(3)* %in, i32 7
David Blaikiea79ac142015-02-27 21:17:42 +0000101 %val = load double, double addrspace(3)* %gep, align 8
Matt Arsenaultb9433482014-03-19 22:19:52 +0000102 store double %val, double addrspace(1)* %out, align 8
103 ret void
104}
105
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000106; GCN-LABEL: {{^}}local_f64_load_0_offset
107; SICIVI: s_mov_b32 m0
108; GFX9-NOT: m0
109
110; GCN: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
111; GCN: buffer_store_dwordx2 [[REG]],
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000112define amdgpu_kernel void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
David Blaikiea79ac142015-02-27 21:17:42 +0000113 %val = load double, double addrspace(3)* %in, align 8
Matt Arsenaultb9433482014-03-19 22:19:52 +0000114 store double %val, double addrspace(1)* %out, align 8
115 ret void
116}
Matt Arsenaultd06ebd92014-03-19 22:19:54 +0000117
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000118; GCN-LABEL: {{^}}local_i64_store:
119; SICIVI: s_mov_b32 m0
120; GFX9-NOT: m0
121
122; GCN-NOT: add
123; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000124define amdgpu_kernel void @local_i64_store(i64 addrspace(3)* %out) nounwind {
David Blaikie79e6c742015-02-27 19:29:02 +0000125 %gep = getelementptr i64, i64 addrspace(3)* %out, i32 7
Matt Arsenaultd06ebd92014-03-19 22:19:54 +0000126 store i64 5678, i64 addrspace(3)* %gep, align 8
127 ret void
128}
129
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000130; GCN-LABEL: {{^}}local_i64_store_0_offset:
131; SICIVI: s_mov_b32 m0
132; GFX9-NOT: m0
133
134; GCN-NOT: add
135; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000136define amdgpu_kernel void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
Matt Arsenaultd06ebd92014-03-19 22:19:54 +0000137 store i64 1234, i64 addrspace(3)* %out, align 8
138 ret void
139}
140
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000141; GCN-LABEL: {{^}}local_f64_store:
142; SICIVI: s_mov_b32 m0
143; GFX9-NOT: m0
144
145; GCN-NOT: add
146; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000147define amdgpu_kernel void @local_f64_store(double addrspace(3)* %out) nounwind {
David Blaikie79e6c742015-02-27 19:29:02 +0000148 %gep = getelementptr double, double addrspace(3)* %out, i32 7
Matt Arsenaultd06ebd92014-03-19 22:19:54 +0000149 store double 16.0, double addrspace(3)* %gep, align 8
150 ret void
151}
152
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000153; GCN-LABEL: {{^}}local_f64_store_0_offset
154; SICIVI: s_mov_b32 m0
155; GFX9-NOT: m0
156
157; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000158define amdgpu_kernel void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
Matt Arsenaultd06ebd92014-03-19 22:19:54 +0000159 store double 20.0, double addrspace(3)* %out, align 8
160 ret void
161}
162
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000163; GCN-LABEL: {{^}}local_v2i64_store:
164; SICIVI: s_mov_b32 m0
165; GFX9-NOT: m0
166
167; GCN-NOT: add
168; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
169; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000170define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
David Blaikie79e6c742015-02-27 19:29:02 +0000171 %gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7
Matt Arsenaultd06ebd92014-03-19 22:19:54 +0000172 store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
173 ret void
174}
175
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000176; GCN-LABEL: {{^}}local_v2i64_store_0_offset:
177; SICIVI: s_mov_b32 m0
178; GFX9-NOT: m0
179
180; GCN-NOT: add
181; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
182; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000183define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
Matt Arsenaultd06ebd92014-03-19 22:19:54 +0000184 store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
185 ret void
186}
187
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000188; GCN-LABEL: {{^}}local_v4i64_store:
189; SICIVI: s_mov_b32 m0
190; GFX9-NOT: m0
191
192; GCN-NOT: add
193; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
194; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
195; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000196define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
David Blaikie79e6c742015-02-27 19:29:02 +0000197 %gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7
Matt Arsenaultd06ebd92014-03-19 22:19:54 +0000198 store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
199 ret void
200}
201
Matt Arsenault3f71c0e2017-11-29 00:55:57 +0000202; GCN-LABEL: {{^}}local_v4i64_store_0_offset:
203; SICIVI: s_mov_b32 m0
204; GFX9-NOT: m0
205
206; GCN-NOT: add
207; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
208; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
209; GCN: s_endpgm
Matt Arsenault3dbeefa2017-03-21 21:39:51 +0000210define amdgpu_kernel void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
Matt Arsenaultd06ebd92014-03-19 22:19:54 +0000211 store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
212 ret void
213}