blob: fdb3d3908839f170e0b40c912072640475bcf2b0 [file] [log] [blame]
Matt Arsenaultfe0a2e62014-10-10 22:12:32 +00001; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
Matt Arsenaultb2baffa2014-08-15 17:49:05 +00002
3; Test that doing a shift of a pointer with a constant add will be
4; folded into the constant offset addressing mode even if the add has
5; multiple uses. This is relevant to accessing 2 separate, adjacent
6; LDS globals.
7
8
9declare i32 @llvm.r600.read.tidig.x() #1
10
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +000011@lds0 = addrspace(3) global [512 x float] undef, align 4
12@lds1 = addrspace(3) global [512 x float] undef, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000013
14
15; Make sure the (add tid, 2) << 2 gets folded into the ds's offset as (tid << 2) + 8
16
Tom Stellard79243d92014-10-01 17:15:17 +000017; SI-LABEL: {{^}}load_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +000018; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
19; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8 [M0]
20; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000021define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
22 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
23 %idx.0 = add nsw i32 %tid.x, 2
24 %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
25 %val0 = load float addrspace(3)* %arrayidx0, align 4
26 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
27 store float %val0, float addrspace(1)* %out
28 ret void
29}
30
31; Make sure once the first use is folded into the addressing mode, the
32; remaining add use goes through the normal shl + add constant fold.
33
Tom Stellard79243d92014-10-01 17:15:17 +000034; SI-LABEL: {{^}}load_shl_base_lds_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +000035; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
36; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0]
37; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
38; SI-DAG: buffer_store_dword [[RESULT]]
39; SI-DAG: buffer_store_dword [[ADDUSE]]
40; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000041define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
42 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
43 %idx.0 = add nsw i32 %tid.x, 2
44 %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
45 %val0 = load float addrspace(3)* %arrayidx0, align 4
46 %shl_add_use = shl i32 %idx.0, 2
47 store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
48 store float %val0, float addrspace(1)* %out
49 ret void
50}
51
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +000052@maxlds = addrspace(3) global [65536 x i8] undef, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000053
Matt Arsenault61cc9082014-10-10 22:16:07 +000054; SI-LABEL: {{^}}load_shl_base_lds_max_offset
Tom Stellard326d6ec2014-11-05 14:50:53 +000055; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
56; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000057define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
58 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
59 %idx.0 = add nsw i32 %tid.x, 65535
60 %arrayidx0 = getelementptr inbounds [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
61 %val0 = load i8 addrspace(3)* %arrayidx0
62 store i32 %idx.0, i32 addrspace(1)* %add_use
63 store i8 %val0, i8 addrspace(1)* %out
64 ret void
65}
66
67; The two globals are placed adjacent in memory, so the same base
68; pointer can be used with an offset into the second one.
69
Tom Stellard79243d92014-10-01 17:15:17 +000070; SI-LABEL: {{^}}load_shl_base_lds_2:
Tom Stellarda99ada52014-11-21 22:31:44 +000071; SI: s_mov_b32 m0, -1
72; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
Tom Stellard326d6ec2014-11-05 14:50:53 +000073; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
74; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000075define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
76 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
77 %idx.0 = add nsw i32 %tid.x, 64
78 %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
79 %val0 = load float addrspace(3)* %arrayidx0, align 4
80 %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
81 %val1 = load float addrspace(3)* %arrayidx1, align 4
82 %sum = fadd float %val0, %val1
83 store float %sum, float addrspace(1)* %out, align 4
84 ret void
85}
86
Tom Stellard79243d92014-10-01 17:15:17 +000087; SI-LABEL: {{^}}store_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +000088; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
89; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8 [M0]
90; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000091define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
92 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
93 %idx.0 = add nsw i32 %tid.x, 2
94 %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
95 store float 1.0, float addrspace(3)* %arrayidx0, align 4
96 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
97 ret void
98}
99
100
101; --------------------------------------------------------------------------------
102; Atomics.
103
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +0000104@lds2 = addrspace(3) global [512 x i32] undef, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000105
106; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
107; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
108; %idx.0 = add nsw i32 %tid.x, 2
109; %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
110; %val = load atomic i32 addrspace(3)* %arrayidx0 seq_cst, align 4
111; store i32 %val, i32 addrspace(1)* %out, align 4
112; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
113; ret void
114; }
115
116
Tom Stellard79243d92014-10-01 17:15:17 +0000117; SI-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000118; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
119; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
120; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000121define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
122 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
123 %idx.0 = add nsw i32 %tid.x, 2
124 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
125 %pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
126 %result = extractvalue { i32, i1 } %pair, 0
127 store i32 %result, i32 addrspace(1)* %out, align 4
128 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
129 ret void
130}
131
Tom Stellard79243d92014-10-01 17:15:17 +0000132; SI-LABEL: {{^}}atomic_swap_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000133; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
134; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
135; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000136define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
137 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
138 %idx.0 = add nsw i32 %tid.x, 2
139 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
140 %val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
141 store i32 %val, i32 addrspace(1)* %out, align 4
142 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
143 ret void
144}
145
Tom Stellard79243d92014-10-01 17:15:17 +0000146; SI-LABEL: {{^}}atomic_add_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000147; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
148; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
149; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000150define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
151 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
152 %idx.0 = add nsw i32 %tid.x, 2
153 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
154 %val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
155 store i32 %val, i32 addrspace(1)* %out, align 4
156 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
157 ret void
158}
159
Tom Stellard79243d92014-10-01 17:15:17 +0000160; SI-LABEL: {{^}}atomic_sub_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000161; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
162; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
163; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000164define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
165 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
166 %idx.0 = add nsw i32 %tid.x, 2
167 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
168 %val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
169 store i32 %val, i32 addrspace(1)* %out, align 4
170 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
171 ret void
172}
173
Tom Stellard79243d92014-10-01 17:15:17 +0000174; SI-LABEL: {{^}}atomic_and_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000175; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
176; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
177; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000178define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
179 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
180 %idx.0 = add nsw i32 %tid.x, 2
181 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
182 %val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
183 store i32 %val, i32 addrspace(1)* %out, align 4
184 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
185 ret void
186}
187
Tom Stellard79243d92014-10-01 17:15:17 +0000188; SI-LABEL: {{^}}atomic_or_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000189; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
190; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
191; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000192define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
193 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
194 %idx.0 = add nsw i32 %tid.x, 2
195 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
196 %val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
197 store i32 %val, i32 addrspace(1)* %out, align 4
198 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
199 ret void
200}
201
Tom Stellard79243d92014-10-01 17:15:17 +0000202; SI-LABEL: {{^}}atomic_xor_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000203; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
204; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
205; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000206define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
207 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
208 %idx.0 = add nsw i32 %tid.x, 2
209 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
210 %val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
211 store i32 %val, i32 addrspace(1)* %out, align 4
212 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
213 ret void
214}
215
216; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
217; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
218; %idx.0 = add nsw i32 %tid.x, 2
219; %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
220; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
221; store i32 %val, i32 addrspace(1)* %out, align 4
222; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
223; ret void
224; }
225
Tom Stellard79243d92014-10-01 17:15:17 +0000226; SI-LABEL: {{^}}atomic_min_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000227; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
228; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
229; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000230define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
231 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
232 %idx.0 = add nsw i32 %tid.x, 2
233 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
234 %val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
235 store i32 %val, i32 addrspace(1)* %out, align 4
236 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
237 ret void
238}
239
Tom Stellard79243d92014-10-01 17:15:17 +0000240; SI-LABEL: {{^}}atomic_max_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000241; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
242; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
243; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000244define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
245 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
246 %idx.0 = add nsw i32 %tid.x, 2
247 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
248 %val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
249 store i32 %val, i32 addrspace(1)* %out, align 4
250 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
251 ret void
252}
253
Tom Stellard79243d92014-10-01 17:15:17 +0000254; SI-LABEL: {{^}}atomic_umin_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000255; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
256; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
257; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000258define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
259 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
260 %idx.0 = add nsw i32 %tid.x, 2
261 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
262 %val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
263 store i32 %val, i32 addrspace(1)* %out, align 4
264 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
265 ret void
266}
267
Tom Stellard79243d92014-10-01 17:15:17 +0000268; SI-LABEL: {{^}}atomic_umax_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000269; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
270; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
271; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000272define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
273 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
274 %idx.0 = add nsw i32 %tid.x, 2
275 %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
276 %val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
277 store i32 %val, i32 addrspace(1)* %out, align 4
278 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
279 ret void
280}
281
282attributes #0 = { nounwind }
283attributes #1 = { nounwind readnone }