blob: 883bdc1ce2657bfe37713fcb3ebd2a97379665b2 [file] [log] [blame]
Matt Arsenaulte0132462016-01-30 05:19:45 +00001; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
2; RUN: opt -S -mtriple=r600-unknown-unknown -mcpu=redwood -amdgpu-promote-alloca < %s | FileCheck -check-prefix=OPT %s
3
4declare i32 @llvm.r600.read.tidig.x() nounwind readnone
5
6; FUNC-LABEL: {{^}}mova_same_clause:
7
8; R600: LDS_WRITE
9; R600: LDS_WRITE
10; R600: LDS_READ
11; R600: LDS_READ
12
13; OPT: call i32 @llvm.r600.read.local.size.y(), !range !0
14; OPT: call i32 @llvm.r600.read.local.size.z(), !range !0
15; OPT: call i32 @llvm.r600.read.tidig.x(), !range !0
16; OPT: call i32 @llvm.r600.read.tidig.y(), !range !0
17; OPT: call i32 @llvm.r600.read.tidig.z(), !range !0
18
Matt Arsenault8a028bf2016-05-16 21:19:59 +000019define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +000020entry:
21 %stack = alloca [5 x i32], align 4
22 %0 = load i32, i32 addrspace(1)* %in, align 4
23 %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
24 store i32 4, i32* %arrayidx1, align 4
25 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
26 %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
27 %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
28 store i32 5, i32* %arrayidx3, align 4
29 %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
30 %2 = load i32, i32* %arrayidx10, align 4
31 store i32 %2, i32 addrspace(1)* %out, align 4
32 %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
33 %3 = load i32, i32* %arrayidx12
34 %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
35 store i32 %3, i32 addrspace(1)* %arrayidx13
36 ret void
37}
38
39; This test checks that the stack offset is calculated correctly for structs.
40; All register loads/stores should be optimized away, so there shouldn't be
41; any MOVA instructions.
42;
43; XXX: This generated code has unnecessary MOVs, we should be able to optimize
44; this.
45
46; FUNC-LABEL: {{^}}multiple_structs:
47; R600-NOT: MOVA_INT
48%struct.point = type { i32, i32 }
49
Matt Arsenault8a028bf2016-05-16 21:19:59 +000050define void @multiple_structs(i32 addrspace(1)* %out) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +000051entry:
52 %a = alloca %struct.point
53 %b = alloca %struct.point
Matt Arsenaultde420812016-02-02 21:16:12 +000054 %a.x.ptr = getelementptr inbounds %struct.point, %struct.point* %a, i32 0, i32 0
55 %a.y.ptr = getelementptr inbounds %struct.point, %struct.point* %a, i32 0, i32 1
56 %b.x.ptr = getelementptr inbounds %struct.point, %struct.point* %b, i32 0, i32 0
57 %b.y.ptr = getelementptr inbounds %struct.point, %struct.point* %b, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +000058 store i32 0, i32* %a.x.ptr
59 store i32 1, i32* %a.y.ptr
60 store i32 2, i32* %b.x.ptr
61 store i32 3, i32* %b.y.ptr
Matt Arsenaultde420812016-02-02 21:16:12 +000062 %a.indirect.ptr = getelementptr inbounds %struct.point, %struct.point* %a, i32 0, i32 0
63 %b.indirect.ptr = getelementptr inbounds %struct.point, %struct.point* %b, i32 0, i32 0
Matt Arsenaulte0132462016-01-30 05:19:45 +000064 %a.indirect = load i32, i32* %a.indirect.ptr
65 %b.indirect = load i32, i32* %b.indirect.ptr
66 %0 = add i32 %a.indirect, %b.indirect
67 store i32 %0, i32 addrspace(1)* %out
68 ret void
69}
70
71; Test direct access of a private array inside a loop. The private array
72; loads and stores should be lowered to copies, so there shouldn't be any
73; MOVA instructions.
74
75; FUNC-LABEL: {{^}}direct_loop:
76; R600-NOT: MOVA_INT
77
Matt Arsenault8a028bf2016-05-16 21:19:59 +000078define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +000079entry:
80 %prv_array_const = alloca [2 x i32]
81 %prv_array = alloca [2 x i32]
82 %a = load i32, i32 addrspace(1)* %in
Matt Arsenaultde420812016-02-02 21:16:12 +000083 %b_src_ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +000084 %b = load i32, i32 addrspace(1)* %b_src_ptr
Matt Arsenaultde420812016-02-02 21:16:12 +000085 %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
Matt Arsenaulte0132462016-01-30 05:19:45 +000086 store i32 %a, i32* %a_dst_ptr
Matt Arsenaultde420812016-02-02 21:16:12 +000087 %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +000088 store i32 %b, i32* %b_dst_ptr
89 br label %for.body
90
91for.body:
92 %inc = phi i32 [0, %entry], [%count, %for.body]
Matt Arsenaultde420812016-02-02 21:16:12 +000093 %x_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
Matt Arsenaulte0132462016-01-30 05:19:45 +000094 %x = load i32, i32* %x_ptr
Matt Arsenaultde420812016-02-02 21:16:12 +000095 %y_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
Matt Arsenaulte0132462016-01-30 05:19:45 +000096 %y = load i32, i32* %y_ptr
97 %xy = add i32 %x, %y
98 store i32 %xy, i32* %y_ptr
99 %count = add i32 %inc, 1
100 %done = icmp eq i32 %count, 4095
101 br i1 %done, label %for.end, label %for.body
102
103for.end:
Matt Arsenaultde420812016-02-02 21:16:12 +0000104 %value_ptr = getelementptr inbounds [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
Matt Arsenaulte0132462016-01-30 05:19:45 +0000105 %value = load i32, i32* %value_ptr
106 store i32 %value, i32 addrspace(1)* %out
107 ret void
108}
109
110; FUNC-LABEL: {{^}}short_array:
111
112; R600: MOVA_INT
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000113define void @short_array(i32 addrspace(1)* %out, i32 %index) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000114entry:
115 %0 = alloca [2 x i16]
Matt Arsenaultde420812016-02-02 21:16:12 +0000116 %1 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 0
117 %2 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +0000118 store i16 0, i16* %1
119 store i16 1, i16* %2
Matt Arsenaultde420812016-02-02 21:16:12 +0000120 %3 = getelementptr inbounds [2 x i16], [2 x i16]* %0, i32 0, i32 %index
Matt Arsenaulte0132462016-01-30 05:19:45 +0000121 %4 = load i16, i16* %3
122 %5 = sext i16 %4 to i32
123 store i32 %5, i32 addrspace(1)* %out
124 ret void
125}
126
127; FUNC-LABEL: {{^}}char_array:
128
129; R600: MOVA_INT
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000130define void @char_array(i32 addrspace(1)* %out, i32 %index) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000131entry:
132 %0 = alloca [2 x i8]
Matt Arsenaultde420812016-02-02 21:16:12 +0000133 %1 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 0
134 %2 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +0000135 store i8 0, i8* %1
136 store i8 1, i8* %2
Matt Arsenaultde420812016-02-02 21:16:12 +0000137 %3 = getelementptr inbounds [2 x i8], [2 x i8]* %0, i32 0, i32 %index
Matt Arsenaulte0132462016-01-30 05:19:45 +0000138 %4 = load i8, i8* %3
139 %5 = sext i8 %4 to i32
140 store i32 %5, i32 addrspace(1)* %out
141 ret void
142
143}
144
145; Make sure we don't overwrite workitem information with private memory
146
147; FUNC-LABEL: {{^}}work_item_info:
148; R600-NOT: MOV T0.X
149; Additional check in case the move ends up in the last slot
150; R600-NOT: MOV * TO.X
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000151define void @work_item_info(i32 addrspace(1)* %out, i32 %in) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000152entry:
153 %0 = alloca [2 x i32]
Matt Arsenaultde420812016-02-02 21:16:12 +0000154 %1 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 0
155 %2 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +0000156 store i32 0, i32* %1
157 store i32 1, i32* %2
Matt Arsenaultde420812016-02-02 21:16:12 +0000158 %3 = getelementptr inbounds [2 x i32], [2 x i32]* %0, i32 0, i32 %in
Matt Arsenaulte0132462016-01-30 05:19:45 +0000159 %4 = load i32, i32* %3
160 %5 = call i32 @llvm.r600.read.tidig.x()
161 %6 = add i32 %4, %5
162 store i32 %6, i32 addrspace(1)* %out
163 ret void
164}
165
166; Test that two stack objects are not stored in the same register
167; The second stack object should be in T3.X
168; FUNC-LABEL: {{^}}no_overlap:
169; R600_CHECK: MOV
170; R600_CHECK: [[CHAN:[XYZW]]]+
171; R600-NOT: [[CHAN]]+
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000172define void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000173entry:
174 %0 = alloca [3 x i8], align 1
175 %1 = alloca [2 x i8], align 1
Matt Arsenaultde420812016-02-02 21:16:12 +0000176 %2 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 0
177 %3 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 1
178 %4 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 2
179 %5 = getelementptr inbounds [2 x i8], [2 x i8]* %1, i32 0, i32 0
180 %6 = getelementptr inbounds [2 x i8], [2 x i8]* %1, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +0000181 store i8 0, i8* %2
182 store i8 1, i8* %3
183 store i8 2, i8* %4
184 store i8 1, i8* %5
185 store i8 0, i8* %6
Matt Arsenaultde420812016-02-02 21:16:12 +0000186 %7 = getelementptr inbounds [3 x i8], [3 x i8]* %0, i32 0, i32 %in
187 %8 = getelementptr inbounds [2 x i8], [2 x i8]* %1, i32 0, i32 %in
Matt Arsenaulte0132462016-01-30 05:19:45 +0000188 %9 = load i8, i8* %7
189 %10 = load i8, i8* %8
190 %11 = add i8 %9, %10
191 %12 = sext i8 %11 to i32
192 store i32 %12, i32 addrspace(1)* %out
193 ret void
194}
195
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000196define void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000197entry:
198 %alloca = alloca [2 x [2 x i8]]
Matt Arsenaultde420812016-02-02 21:16:12 +0000199 %gep0 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
200 %gep1 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +0000201 store i8 0, i8* %gep0
202 store i8 1, i8* %gep1
Matt Arsenaultde420812016-02-02 21:16:12 +0000203 %gep2 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
Matt Arsenaulte0132462016-01-30 05:19:45 +0000204 %load = load i8, i8* %gep2
205 %sext = sext i8 %load to i32
206 store i32 %sext, i32 addrspace(1)* %out
207 ret void
208}
209
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000210define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000211entry:
212 %alloca = alloca [2 x [2 x i32]]
Matt Arsenaultde420812016-02-02 21:16:12 +0000213 %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
214 %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +0000215 store i32 0, i32* %gep0
216 store i32 1, i32* %gep1
Matt Arsenaultde420812016-02-02 21:16:12 +0000217 %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
Matt Arsenaulte0132462016-01-30 05:19:45 +0000218 %load = load i32, i32* %gep2
219 store i32 %load, i32 addrspace(1)* %out
220 ret void
221}
222
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000223define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000224entry:
225 %alloca = alloca [2 x [2 x i64]]
Matt Arsenaultde420812016-02-02 21:16:12 +0000226 %gep0 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
227 %gep1 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +0000228 store i64 0, i64* %gep0
229 store i64 1, i64* %gep1
Matt Arsenaultde420812016-02-02 21:16:12 +0000230 %gep2 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
Matt Arsenaulte0132462016-01-30 05:19:45 +0000231 %load = load i64, i64* %gep2
232 store i64 %load, i64 addrspace(1)* %out
233 ret void
234}
235
236%struct.pair32 = type { i32, i32 }
237
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000238define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000239entry:
240 %alloca = alloca [2 x [2 x %struct.pair32]]
Matt Arsenaultde420812016-02-02 21:16:12 +0000241 %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
242 %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +0000243 store i32 0, i32* %gep0
244 store i32 1, i32* %gep1
Matt Arsenaultde420812016-02-02 21:16:12 +0000245 %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
Matt Arsenaulte0132462016-01-30 05:19:45 +0000246 %load = load i32, i32* %gep2
247 store i32 %load, i32 addrspace(1)* %out
248 ret void
249}
250
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000251define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000252entry:
253 %alloca = alloca [2 x %struct.pair32]
Matt Arsenaultde420812016-02-02 21:16:12 +0000254 %gep0 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
255 %gep1 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
Matt Arsenaulte0132462016-01-30 05:19:45 +0000256 store i32 0, i32* %gep0
257 store i32 1, i32* %gep1
Matt Arsenaultde420812016-02-02 21:16:12 +0000258 %gep2 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
Matt Arsenaulte0132462016-01-30 05:19:45 +0000259 %load = load i32, i32* %gep2
260 store i32 %load, i32 addrspace(1)* %out
261 ret void
262}
263
264define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
265entry:
266 %tmp = alloca [2 x i32]
Matt Arsenaultde420812016-02-02 21:16:12 +0000267 %tmp1 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
268 %tmp2 = getelementptr inbounds [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
Matt Arsenaulte0132462016-01-30 05:19:45 +0000269 store i32 0, i32* %tmp1
270 store i32 1, i32* %tmp2
271 %cmp = icmp eq i32 %in, 0
272 %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
273 %load = load i32, i32* %sel
274 store i32 %load, i32 addrspace(1)* %out
275 ret void
276}
277
278; AMDGPUPromoteAlloca does not know how to handle ptrtoint. When it
279; finds one, it should stop trying to promote.
280
281; FUNC-LABEL: ptrtoint:
282; SI-NOT: ds_write
283; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
284; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ;
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000285define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
Matt Arsenaulte0132462016-01-30 05:19:45 +0000286 %alloca = alloca [16 x i32]
Matt Arsenaultde420812016-02-02 21:16:12 +0000287 %tmp0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
Matt Arsenaulte0132462016-01-30 05:19:45 +0000288 store i32 5, i32* %tmp0
289 %tmp1 = ptrtoint [16 x i32]* %alloca to i32
290 %tmp2 = add i32 %tmp1, 5
291 %tmp3 = inttoptr i32 %tmp2 to i32*
Matt Arsenaultde420812016-02-02 21:16:12 +0000292 %tmp4 = getelementptr inbounds i32, i32* %tmp3, i32 %b
Matt Arsenaulte0132462016-01-30 05:19:45 +0000293 %tmp5 = load i32, i32* %tmp4
294 store i32 %tmp5, i32 addrspace(1)* %out
295 ret void
296}
297
298; OPT: !0 = !{i32 0, i32 2048}
Matt Arsenault8a028bf2016-05-16 21:19:59 +0000299
300attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="2" }