blob: 6671e909cd1dd0f84df6421eb7843186214b0719 [file] [log] [blame]
Tom Stellard49f8bfd2015-01-06 18:00:21 +00001; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
Marek Olsak75170772015-01-27 17:27:15 +00002; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
Matt Arsenaultb2baffa2014-08-15 17:49:05 +00003
4; Test that doing a shift of a pointer with a constant add will be
5; folded into the constant offset addressing mode even if the add has
6; multiple uses. This is relevant to accessing 2 separate, adjacent
7; LDS globals.
8
9
10declare i32 @llvm.r600.read.tidig.x() #1
11
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +000012@lds0 = addrspace(3) global [512 x float] undef, align 4
13@lds1 = addrspace(3) global [512 x float] undef, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000014
15
16; Make sure the (add tid, 2) << 2 gets folded into the ds's offset as (tid << 2) + 8
17
Tom Stellard79243d92014-10-01 17:15:17 +000018; SI-LABEL: {{^}}load_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +000019; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
Tom Stellardeb05c612015-02-26 17:08:43 +000020; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
Tom Stellard326d6ec2014-11-05 14:50:53 +000021; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000022define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
23 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
24 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +000025 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
David Blaikiea79ac142015-02-27 21:17:42 +000026 %val0 = load float, float addrspace(3)* %arrayidx0, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000027 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
28 store float %val0, float addrspace(1)* %out
29 ret void
30}
31
32; Make sure once the first use is folded into the addressing mode, the
33; remaining add use goes through the normal shl + add constant fold.
34
Tom Stellard79243d92014-10-01 17:15:17 +000035; SI-LABEL: {{^}}load_shl_base_lds_1:
Tom Stellard326d6ec2014-11-05 14:50:53 +000036; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
Tom Stellardeb05c612015-02-26 17:08:43 +000037; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
Tom Stellard326d6ec2014-11-05 14:50:53 +000038; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
39; SI-DAG: buffer_store_dword [[RESULT]]
40; SI-DAG: buffer_store_dword [[ADDUSE]]
41; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000042define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
43 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
44 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +000045 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
David Blaikiea79ac142015-02-27 21:17:42 +000046 %val0 = load float, float addrspace(3)* %arrayidx0, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000047 %shl_add_use = shl i32 %idx.0, 2
48 store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
49 store float %val0, float addrspace(1)* %out
50 ret void
51}
52
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +000053@maxlds = addrspace(3) global [65536 x i8] undef, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000054
Matt Arsenault61cc9082014-10-10 22:16:07 +000055; SI-LABEL: {{^}}load_shl_base_lds_max_offset
Tom Stellard326d6ec2014-11-05 14:50:53 +000056; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
57; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000058define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
59 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
60 %idx.0 = add nsw i32 %tid.x, 65535
David Blaikie79e6c742015-02-27 19:29:02 +000061 %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
David Blaikiea79ac142015-02-27 21:17:42 +000062 %val0 = load i8, i8 addrspace(3)* %arrayidx0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000063 store i32 %idx.0, i32 addrspace(1)* %add_use
64 store i8 %val0, i8 addrspace(1)* %out
65 ret void
66}
67
68; The two globals are placed adjacent in memory, so the same base
69; pointer can be used with an offset into the second one.
70
Tom Stellard79243d92014-10-01 17:15:17 +000071; SI-LABEL: {{^}}load_shl_base_lds_2:
Tom Stellard381a94a2015-05-12 15:00:49 +000072; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
Tom Stellarda99ada52014-11-21 22:31:44 +000073; SI: s_mov_b32 m0, -1
Tom Stellardeb05c612015-02-26 17:08:43 +000074; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
Tom Stellard326d6ec2014-11-05 14:50:53 +000075; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000076define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
77 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
78 %idx.0 = add nsw i32 %tid.x, 64
David Blaikie79e6c742015-02-27 19:29:02 +000079 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
David Blaikiea79ac142015-02-27 21:17:42 +000080 %val0 = load float, float addrspace(3)* %arrayidx0, align 4
David Blaikie79e6c742015-02-27 19:29:02 +000081 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
David Blaikiea79ac142015-02-27 21:17:42 +000082 %val1 = load float, float addrspace(3)* %arrayidx1, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000083 %sum = fadd float %val0, %val1
84 store float %sum, float addrspace(1)* %out, align 4
85 ret void
86}
87
Tom Stellard79243d92014-10-01 17:15:17 +000088; SI-LABEL: {{^}}store_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +000089; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
Tom Stellardeb05c612015-02-26 17:08:43 +000090; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
Tom Stellard326d6ec2014-11-05 14:50:53 +000091; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000092define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
93 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
94 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +000095 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +000096 store float 1.0, float addrspace(3)* %arrayidx0, align 4
97 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
98 ret void
99}
100
101
102; --------------------------------------------------------------------------------
103; Atomics.
104
Matt Arsenaultcc8d3b82014-11-13 19:56:13 +0000105@lds2 = addrspace(3) global [512 x i32] undef, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000106
107; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
108; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
109; %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000110; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
David Blaikiea79ac142015-02-27 21:17:42 +0000111; %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000112; store i32 %val, i32 addrspace(1)* %out, align 4
113; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
114; ret void
115; }
116
117
Tom Stellard79243d92014-10-01 17:15:17 +0000118; SI-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000119; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
120; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
121; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000122define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
123 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
124 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000125 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000126 %pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
127 %result = extractvalue { i32, i1 } %pair, 0
128 store i32 %result, i32 addrspace(1)* %out, align 4
129 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
130 ret void
131}
132
Tom Stellard79243d92014-10-01 17:15:17 +0000133; SI-LABEL: {{^}}atomic_swap_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000134; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
135; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
136; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000137define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
138 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
139 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000140 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000141 %val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
142 store i32 %val, i32 addrspace(1)* %out, align 4
143 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
144 ret void
145}
146
Tom Stellard79243d92014-10-01 17:15:17 +0000147; SI-LABEL: {{^}}atomic_add_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000148; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
149; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
150; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000151define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
152 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
153 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000154 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000155 %val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
156 store i32 %val, i32 addrspace(1)* %out, align 4
157 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
158 ret void
159}
160
Tom Stellard79243d92014-10-01 17:15:17 +0000161; SI-LABEL: {{^}}atomic_sub_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000162; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
163; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
164; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000165define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
166 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
167 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000168 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000169 %val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
170 store i32 %val, i32 addrspace(1)* %out, align 4
171 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
172 ret void
173}
174
Tom Stellard79243d92014-10-01 17:15:17 +0000175; SI-LABEL: {{^}}atomic_and_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000176; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
177; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
178; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000179define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
180 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
181 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000182 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000183 %val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
184 store i32 %val, i32 addrspace(1)* %out, align 4
185 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
186 ret void
187}
188
Tom Stellard79243d92014-10-01 17:15:17 +0000189; SI-LABEL: {{^}}atomic_or_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000190; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
191; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
192; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000193define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
194 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
195 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000196 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000197 %val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
198 store i32 %val, i32 addrspace(1)* %out, align 4
199 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
200 ret void
201}
202
Tom Stellard79243d92014-10-01 17:15:17 +0000203; SI-LABEL: {{^}}atomic_xor_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000204; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
205; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
206; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000207define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
208 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
209 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000210 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000211 %val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
212 store i32 %val, i32 addrspace(1)* %out, align 4
213 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
214 ret void
215}
216
217; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
218; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
219; %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000220; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000221; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
222; store i32 %val, i32 addrspace(1)* %out, align 4
223; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
224; ret void
225; }
226
Tom Stellard79243d92014-10-01 17:15:17 +0000227; SI-LABEL: {{^}}atomic_min_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000228; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
229; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
230; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000231define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
232 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
233 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000234 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000235 %val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
236 store i32 %val, i32 addrspace(1)* %out, align 4
237 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
238 ret void
239}
240
Tom Stellard79243d92014-10-01 17:15:17 +0000241; SI-LABEL: {{^}}atomic_max_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000242; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
243; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
244; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000245define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
246 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
247 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000248 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000249 %val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
250 store i32 %val, i32 addrspace(1)* %out, align 4
251 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
252 ret void
253}
254
Tom Stellard79243d92014-10-01 17:15:17 +0000255; SI-LABEL: {{^}}atomic_umin_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000256; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
257; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
258; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000259define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
260 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
261 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000262 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000263 %val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
264 store i32 %val, i32 addrspace(1)* %out, align 4
265 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
266 ret void
267}
268
Tom Stellard79243d92014-10-01 17:15:17 +0000269; SI-LABEL: {{^}}atomic_umax_shl_base_lds_0:
Tom Stellard326d6ec2014-11-05 14:50:53 +0000270; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
271; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
272; SI: s_endpgm
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000273define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
274 %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
275 %idx.0 = add nsw i32 %tid.x, 2
David Blaikie79e6c742015-02-27 19:29:02 +0000276 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
Matt Arsenaultb2baffa2014-08-15 17:49:05 +0000277 %val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
278 store i32 %val, i32 addrspace(1)* %out, align 4
279 store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
280 ret void
281}
282
283attributes #0 = { nounwind }
284attributes #1 = { nounwind readnone }