blob: ba9de4adc940a3019be0a2288a9034f28e2d18f0 [file] [log] [blame]
Ulrich Weigandc3ec80f2018-04-30 17:54:28 +00001; Test 32-bit subtraction in which the second operand is variable.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare i32 @foo()
6
7; Check SLR.
8define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) {
9; CHECK-LABEL: f1:
10; CHECK: slr %r3, %r4
11; CHECK-DAG: st %r3, 0(%r5)
12; CHECK-DAG: ipm [[REG:%r[0-5]]]
13; CHECK-DAG: afi [[REG]], -536870912
14; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
15; CHECK: br %r14
16 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
17 %val = extractvalue {i32, i1} %t, 0
18 %obit = extractvalue {i32, i1} %t, 1
19 store i32 %val, i32 *%res
20 ret i1 %obit
21}
22
23; Check using the overflow result for a branch.
24define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) {
25; CHECK-LABEL: f2:
26; CHECK: slr %r3, %r4
27; CHECK: st %r3, 0(%r5)
28; CHECK: jgle foo@PLT
29; CHECK: br %r14
30 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
31 %val = extractvalue {i32, i1} %t, 0
32 %obit = extractvalue {i32, i1} %t, 1
33 store i32 %val, i32 *%res
34 br i1 %obit, label %call, label %exit
35
36call:
37 tail call i32 @foo()
38 br label %exit
39
40exit:
41 ret void
42}
43
44; ... and the same with the inverted direction.
45define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) {
46; CHECK-LABEL: f3:
47; CHECK: slr %r3, %r4
48; CHECK: st %r3, 0(%r5)
49; CHECK: jgnle foo@PLT
50; CHECK: br %r14
51 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
52 %val = extractvalue {i32, i1} %t, 0
53 %obit = extractvalue {i32, i1} %t, 1
54 store i32 %val, i32 *%res
55 br i1 %obit, label %exit, label %call
56
57call:
58 tail call i32 @foo()
59 br label %exit
60
61exit:
62 ret void
63}
64
65; Check the low end of the SL range.
66define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
67; CHECK-LABEL: f4:
68; CHECK: sl %r3, 0(%r4)
69; CHECK-DAG: st %r3, 0(%r5)
70; CHECK-DAG: ipm [[REG:%r[0-5]]]
71; CHECK-DAG: afi [[REG]], -536870912
72; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
73; CHECK: br %r14
74 %b = load i32, i32 *%src
75 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
76 %val = extractvalue {i32, i1} %t, 0
77 %obit = extractvalue {i32, i1} %t, 1
78 store i32 %val, i32 *%res
79 ret i1 %obit
80}
81
82; Check the high end of the aligned SL range.
83define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
84; CHECK-LABEL: f5:
85; CHECK: sl %r3, 4092(%r4)
86; CHECK-DAG: st %r3, 0(%r5)
87; CHECK-DAG: ipm [[REG:%r[0-5]]]
88; CHECK-DAG: afi [[REG]], -536870912
89; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
90; CHECK: br %r14
91 %ptr = getelementptr i32, i32 *%src, i64 1023
92 %b = load i32, i32 *%ptr
93 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
94 %val = extractvalue {i32, i1} %t, 0
95 %obit = extractvalue {i32, i1} %t, 1
96 store i32 %val, i32 *%res
97 ret i1 %obit
98}
99
100; Check the next word up, which should use SLY instead of SL.
101define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
102; CHECK-LABEL: f6:
103; CHECK: sly %r3, 4096(%r4)
104; CHECK-DAG: st %r3, 0(%r5)
105; CHECK-DAG: ipm [[REG:%r[0-5]]]
106; CHECK-DAG: afi [[REG]], -536870912
107; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
108; CHECK: br %r14
109 %ptr = getelementptr i32, i32 *%src, i64 1024
110 %b = load i32, i32 *%ptr
111 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
112 %val = extractvalue {i32, i1} %t, 0
113 %obit = extractvalue {i32, i1} %t, 1
114 store i32 %val, i32 *%res
115 ret i1 %obit
116}
117
118; Check the high end of the aligned SLY range.
119define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
120; CHECK-LABEL: f7:
121; CHECK: sly %r3, 524284(%r4)
122; CHECK-DAG: st %r3, 0(%r5)
123; CHECK-DAG: ipm [[REG:%r[0-5]]]
124; CHECK-DAG: afi [[REG]], -536870912
125; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
126; CHECK: br %r14
127 %ptr = getelementptr i32, i32 *%src, i64 131071
128 %b = load i32, i32 *%ptr
129 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
130 %val = extractvalue {i32, i1} %t, 0
131 %obit = extractvalue {i32, i1} %t, 1
132 store i32 %val, i32 *%res
133 ret i1 %obit
134}
135
136; Check the next word up, which needs separate address logic.
137; Other sequences besides this one would be OK.
138define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
139; CHECK-LABEL: f8:
140; CHECK: agfi %r4, 524288
141; CHECK: sl %r3, 0(%r4)
142; CHECK-DAG: st %r3, 0(%r5)
143; CHECK-DAG: ipm [[REG:%r[0-5]]]
144; CHECK-DAG: afi [[REG]], -536870912
145; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
146; CHECK: br %r14
147 %ptr = getelementptr i32, i32 *%src, i64 131072
148 %b = load i32, i32 *%ptr
149 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
150 %val = extractvalue {i32, i1} %t, 0
151 %obit = extractvalue {i32, i1} %t, 1
152 store i32 %val, i32 *%res
153 ret i1 %obit
154}
155
156; Check the high end of the negative aligned SLY range.
157define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
158; CHECK-LABEL: f9:
159; CHECK: sly %r3, -4(%r4)
160; CHECK-DAG: st %r3, 0(%r5)
161; CHECK-DAG: ipm [[REG:%r[0-5]]]
162; CHECK-DAG: afi [[REG]], -536870912
163; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
164; CHECK: br %r14
165 %ptr = getelementptr i32, i32 *%src, i64 -1
166 %b = load i32, i32 *%ptr
167 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
168 %val = extractvalue {i32, i1} %t, 0
169 %obit = extractvalue {i32, i1} %t, 1
170 store i32 %val, i32 *%res
171 ret i1 %obit
172}
173
174; Check the low end of the SLY range.
175define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
176; CHECK-LABEL: f10:
177; CHECK: sly %r3, -524288(%r4)
178; CHECK-DAG: st %r3, 0(%r5)
179; CHECK-DAG: ipm [[REG:%r[0-5]]]
180; CHECK-DAG: afi [[REG]], -536870912
181; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
182; CHECK: br %r14
183 %ptr = getelementptr i32, i32 *%src, i64 -131072
184 %b = load i32, i32 *%ptr
185 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
186 %val = extractvalue {i32, i1} %t, 0
187 %obit = extractvalue {i32, i1} %t, 1
188 store i32 %val, i32 *%res
189 ret i1 %obit
190}
191
192; Check the next word down, which needs separate address logic.
193; Other sequences besides this one would be OK.
194define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) {
195; CHECK-LABEL: f11:
196; CHECK: agfi %r4, -524292
197; CHECK: sl %r3, 0(%r4)
198; CHECK-DAG: st %r3, 0(%r5)
199; CHECK-DAG: ipm [[REG:%r[0-5]]]
200; CHECK-DAG: afi [[REG]], -536870912
201; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
202; CHECK: br %r14
203 %ptr = getelementptr i32, i32 *%src, i64 -131073
204 %b = load i32, i32 *%ptr
205 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
206 %val = extractvalue {i32, i1} %t, 0
207 %obit = extractvalue {i32, i1} %t, 1
208 store i32 %val, i32 *%res
209 ret i1 %obit
210}
211
212; Check that SL allows an index.
213define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) {
214; CHECK-LABEL: f12:
215; CHECK: sl %r4, 4092({{%r3,%r2|%r2,%r3}})
216; CHECK-DAG: st %r4, 0(%r5)
217; CHECK-DAG: ipm [[REG:%r[0-5]]]
218; CHECK-DAG: afi [[REG]], -536870912
219; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
220; CHECK: br %r14
221 %add1 = add i64 %src, %index
222 %add2 = add i64 %add1, 4092
223 %ptr = inttoptr i64 %add2 to i32 *
224 %b = load i32, i32 *%ptr
225 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
226 %val = extractvalue {i32, i1} %t, 0
227 %obit = extractvalue {i32, i1} %t, 1
228 store i32 %val, i32 *%res
229 ret i1 %obit
230}
231
232; Check that SLY allows an index.
233define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) {
234; CHECK-LABEL: f13:
235; CHECK: sly %r4, 4096({{%r3,%r2|%r2,%r3}})
236; CHECK-DAG: st %r4, 0(%r5)
237; CHECK-DAG: ipm [[REG:%r[0-5]]]
238; CHECK-DAG: afi [[REG]], -536870912
239; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
240; CHECK: br %r14
241 %add1 = add i64 %src, %index
242 %add2 = add i64 %add1, 4096
243 %ptr = inttoptr i64 %add2 to i32 *
244 %b = load i32, i32 *%ptr
245 %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
246 %val = extractvalue {i32, i1} %t, 0
247 %obit = extractvalue {i32, i1} %t, 1
248 store i32 %val, i32 *%res
249 ret i1 %obit
250}
251
252; Check that subtractions of spilled values can use SL rather than SLR.
253define zeroext i1 @f14(i32 *%ptr0) {
254; CHECK-LABEL: f14:
255; CHECK: brasl %r14, foo@PLT
256; CHECK: sl %r2, 16{{[04]}}(%r15)
257; CHECK: br %r14
258 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
259 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
260 %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
261 %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
262 %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
263 %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
264 %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
265 %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
266 %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
267
268 %val0 = load i32, i32 *%ptr0
269 %val1 = load i32, i32 *%ptr1
270 %val2 = load i32, i32 *%ptr2
271 %val3 = load i32, i32 *%ptr3
272 %val4 = load i32, i32 *%ptr4
273 %val5 = load i32, i32 *%ptr5
274 %val6 = load i32, i32 *%ptr6
275 %val7 = load i32, i32 *%ptr7
276 %val8 = load i32, i32 *%ptr8
277 %val9 = load i32, i32 *%ptr9
278
279 %ret = call i32 @foo()
280
281 %t0 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %ret, i32 %val0)
282 %add0 = extractvalue {i32, i1} %t0, 0
283 %obit0 = extractvalue {i32, i1} %t0, 1
284 %t1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add0, i32 %val1)
285 %add1 = extractvalue {i32, i1} %t1, 0
286 %obit1 = extractvalue {i32, i1} %t1, 1
287 %res1 = or i1 %obit0, %obit1
288 %t2 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add1, i32 %val2)
289 %add2 = extractvalue {i32, i1} %t2, 0
290 %obit2 = extractvalue {i32, i1} %t2, 1
291 %res2 = or i1 %res1, %obit2
292 %t3 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add2, i32 %val3)
293 %add3 = extractvalue {i32, i1} %t3, 0
294 %obit3 = extractvalue {i32, i1} %t3, 1
295 %res3 = or i1 %res2, %obit3
296 %t4 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add3, i32 %val4)
297 %add4 = extractvalue {i32, i1} %t4, 0
298 %obit4 = extractvalue {i32, i1} %t4, 1
299 %res4 = or i1 %res3, %obit4
300 %t5 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add4, i32 %val5)
301 %add5 = extractvalue {i32, i1} %t5, 0
302 %obit5 = extractvalue {i32, i1} %t5, 1
303 %res5 = or i1 %res4, %obit5
304 %t6 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add5, i32 %val6)
305 %add6 = extractvalue {i32, i1} %t6, 0
306 %obit6 = extractvalue {i32, i1} %t6, 1
307 %res6 = or i1 %res5, %obit6
308 %t7 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add6, i32 %val7)
309 %add7 = extractvalue {i32, i1} %t7, 0
310 %obit7 = extractvalue {i32, i1} %t7, 1
311 %res7 = or i1 %res6, %obit7
312 %t8 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add7, i32 %val8)
313 %add8 = extractvalue {i32, i1} %t8, 0
314 %obit8 = extractvalue {i32, i1} %t8, 1
315 %res8 = or i1 %res7, %obit8
316 %t9 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add8, i32 %val9)
317 %add9 = extractvalue {i32, i1} %t9, 0
318 %obit9 = extractvalue {i32, i1} %t9, 1
319 %res9 = or i1 %res8, %obit9
320
321 ret i1 %res9
322}
323
324declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
325