blob: 5e7a37e5c1c0027f9518f83096c4433074568efd [file] [log] [blame]
Ulrich Weigandc3ec80f2018-04-30 17:54:28 +00001; Test additions between an i64 and a sign-extended i32.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare i64 @foo()
6
7; Check AGFR.
8define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
9; CHECK-LABEL: f1:
10; CHECK: agfr %r3, %r4
11; CHECK-DAG: stg %r3, 0(%r5)
12; CHECK-DAG: ipm [[REG:%r[0-5]]]
13; CHECK-DAG: afi [[REG]], 1342177280
14; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
15; CHECK: br %r14
16 %bext = sext i32 %b to i64
17 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
18 %val = extractvalue {i64, i1} %t, 0
19 %obit = extractvalue {i64, i1} %t, 1
20 store i64 %val, i64 *%res
21 ret i1 %obit
22}
23
24; Check using the overflow result for a branch.
25define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
26; CHECK-LABEL: f2:
27; CHECK: agfr %r3, %r4
28; CHECK: stg %r3, 0(%r5)
29; CHECK: jgo foo@PLT
30; CHECK: br %r14
31 %bext = sext i32 %b to i64
32 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
33 %val = extractvalue {i64, i1} %t, 0
34 %obit = extractvalue {i64, i1} %t, 1
35 store i64 %val, i64 *%res
36 br i1 %obit, label %call, label %exit
37
38call:
39 tail call i64 @foo()
40 br label %exit
41
42exit:
43 ret void
44}
45
46; ... and the same with the inverted direction.
47define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) {
48; CHECK-LABEL: f3:
49; CHECK: agfr %r3, %r4
50; CHECK: stg %r3, 0(%r5)
51; CHECK: jgno foo@PLT
52; CHECK: br %r14
53 %bext = sext i32 %b to i64
54 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
55 %val = extractvalue {i64, i1} %t, 0
56 %obit = extractvalue {i64, i1} %t, 1
57 store i64 %val, i64 *%res
58 br i1 %obit, label %exit, label %call
59
60call:
61 tail call i64 @foo()
62 br label %exit
63
64exit:
65 ret void
66}
67
68; Check AGF with no displacement.
69define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
70; CHECK-LABEL: f4:
71; CHECK: agf %r3, 0(%r4)
72; CHECK-DAG: stg %r3, 0(%r5)
73; CHECK-DAG: ipm [[REG:%r[0-5]]]
74; CHECK-DAG: afi [[REG]], 1342177280
75; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
76; CHECK: br %r14
77 %b = load i32, i32 *%src
78 %bext = sext i32 %b to i64
79 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
80 %val = extractvalue {i64, i1} %t, 0
81 %obit = extractvalue {i64, i1} %t, 1
82 store i64 %val, i64 *%res
83 ret i1 %obit
84}
85
86; Check the high end of the aligned AGF range.
87define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
88; CHECK-LABEL: f5:
89; CHECK: agf %r3, 524284(%r4)
90; CHECK-DAG: stg %r3, 0(%r5)
91; CHECK-DAG: ipm [[REG:%r[0-5]]]
92; CHECK-DAG: afi [[REG]], 1342177280
93; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
94; CHECK: br %r14
95 %ptr = getelementptr i32, i32 *%src, i64 131071
96 %b = load i32, i32 *%ptr
97 %bext = sext i32 %b to i64
98 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
99 %val = extractvalue {i64, i1} %t, 0
100 %obit = extractvalue {i64, i1} %t, 1
101 store i64 %val, i64 *%res
102 ret i1 %obit
103}
104
105; Check the next word up, which needs separate address logic.
106; Other sequences besides this one would be OK.
107define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
108; CHECK-LABEL: f6:
109; CHECK: agfi %r4, 524288
110; CHECK: agf %r3, 0(%r4)
111; CHECK-DAG: stg %r3, 0(%r5)
112; CHECK-DAG: ipm [[REG:%r[0-5]]]
113; CHECK-DAG: afi [[REG]], 1342177280
114; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
115; CHECK: br %r14
116 %ptr = getelementptr i32, i32 *%src, i64 131072
117 %b = load i32, i32 *%ptr
118 %bext = sext i32 %b to i64
119 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
120 %val = extractvalue {i64, i1} %t, 0
121 %obit = extractvalue {i64, i1} %t, 1
122 store i64 %val, i64 *%res
123 ret i1 %obit
124}
125
126; Check the high end of the negative aligned AGF range.
127define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
128; CHECK-LABEL: f7:
129; CHECK: agf %r3, -4(%r4)
130; CHECK-DAG: stg %r3, 0(%r5)
131; CHECK-DAG: ipm [[REG:%r[0-5]]]
132; CHECK-DAG: afi [[REG]], 1342177280
133; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
134; CHECK: br %r14
135 %ptr = getelementptr i32, i32 *%src, i64 -1
136 %b = load i32, i32 *%ptr
137 %bext = sext i32 %b to i64
138 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
139 %val = extractvalue {i64, i1} %t, 0
140 %obit = extractvalue {i64, i1} %t, 1
141 store i64 %val, i64 *%res
142 ret i1 %obit
143}
144
145; Check the low end of the AGF range.
146define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
147; CHECK-LABEL: f8:
148; CHECK: agf %r3, -524288(%r4)
149; CHECK-DAG: stg %r3, 0(%r5)
150; CHECK-DAG: ipm [[REG:%r[0-5]]]
151; CHECK-DAG: afi [[REG]], 1342177280
152; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
153; CHECK: br %r14
154 %ptr = getelementptr i32, i32 *%src, i64 -131072
155 %b = load i32, i32 *%ptr
156 %bext = sext i32 %b to i64
157 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
158 %val = extractvalue {i64, i1} %t, 0
159 %obit = extractvalue {i64, i1} %t, 1
160 store i64 %val, i64 *%res
161 ret i1 %obit
162}
163
164; Check the next word down, which needs separate address logic.
165; Other sequences besides this one would be OK.
166define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) {
167; CHECK-LABEL: f9:
168; CHECK: agfi %r4, -524292
169; CHECK: agf %r3, 0(%r4)
170; CHECK-DAG: stg %r3, 0(%r5)
171; CHECK-DAG: ipm [[REG:%r[0-5]]]
172; CHECK-DAG: afi [[REG]], 1342177280
173; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
174; CHECK: br %r14
175 %ptr = getelementptr i32, i32 *%src, i64 -131073
176 %b = load i32, i32 *%ptr
177 %bext = sext i32 %b to i64
178 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
179 %val = extractvalue {i64, i1} %t, 0
180 %obit = extractvalue {i64, i1} %t, 1
181 store i64 %val, i64 *%res
182 ret i1 %obit
183}
184
185; Check that AGF allows an index.
186define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) {
187; CHECK-LABEL: f10:
188; CHECK: agf %r4, 524284({{%r3,%r2|%r2,%r3}})
189; CHECK-DAG: stg %r4, 0(%r5)
190; CHECK-DAG: ipm [[REG:%r[0-5]]]
191; CHECK-DAG: afi [[REG]], 1342177280
192; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
193; CHECK: br %r14
194 %add1 = add i64 %src, %index
195 %add2 = add i64 %add1, 524284
196 %ptr = inttoptr i64 %add2 to i32 *
197 %b = load i32, i32 *%ptr
198 %bext = sext i32 %b to i64
199 %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext)
200 %val = extractvalue {i64, i1} %t, 0
201 %obit = extractvalue {i64, i1} %t, 1
202 store i64 %val, i64 *%res
203 ret i1 %obit
204}
205
206; Check that additions of spilled values can use AGF rather than AGFR.
207define zeroext i1 @f11(i32 *%ptr0) {
208; CHECK-LABEL: f11:
209; CHECK: brasl %r14, foo@PLT
210; CHECK: agf %r2, 16{{[04]}}(%r15)
211; CHECK: br %r14
212 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
213 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
214 %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
215 %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
216 %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
217 %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
218 %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
219 %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
220 %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
221
Ulrich Weigand9dd23b82018-07-20 12:12:10 +0000222 %val0 = load i32, i32 *%ptr0
223 %val1 = load i32, i32 *%ptr1
224 %val2 = load i32, i32 *%ptr2
225 %val3 = load i32, i32 *%ptr3
226 %val4 = load i32, i32 *%ptr4
227 %val5 = load i32, i32 *%ptr5
228 %val6 = load i32, i32 *%ptr6
229 %val7 = load i32, i32 *%ptr7
230 %val8 = load i32, i32 *%ptr8
231 %val9 = load i32, i32 *%ptr9
Ulrich Weigandc3ec80f2018-04-30 17:54:28 +0000232
233 %frob0 = add i32 %val0, 100
234 %frob1 = add i32 %val1, 100
235 %frob2 = add i32 %val2, 100
236 %frob3 = add i32 %val3, 100
237 %frob4 = add i32 %val4, 100
238 %frob5 = add i32 %val5, 100
239 %frob6 = add i32 %val6, 100
240 %frob7 = add i32 %val7, 100
241 %frob8 = add i32 %val8, 100
242 %frob9 = add i32 %val9, 100
243
244 store i32 %frob0, i32 *%ptr0
245 store i32 %frob1, i32 *%ptr1
246 store i32 %frob2, i32 *%ptr2
247 store i32 %frob3, i32 *%ptr3
248 store i32 %frob4, i32 *%ptr4
249 store i32 %frob5, i32 *%ptr5
250 store i32 %frob6, i32 *%ptr6
251 store i32 %frob7, i32 *%ptr7
252 store i32 %frob8, i32 *%ptr8
253 store i32 %frob9, i32 *%ptr9
254
255 %ret = call i64 @foo()
256
257 %ext0 = sext i32 %frob0 to i64
258 %ext1 = sext i32 %frob1 to i64
259 %ext2 = sext i32 %frob2 to i64
260 %ext3 = sext i32 %frob3 to i64
261 %ext4 = sext i32 %frob4 to i64
262 %ext5 = sext i32 %frob5 to i64
263 %ext6 = sext i32 %frob6 to i64
264 %ext7 = sext i32 %frob7 to i64
265 %ext8 = sext i32 %frob8 to i64
266 %ext9 = sext i32 %frob9 to i64
267
268 %t0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %ret, i64 %ext0)
269 %add0 = extractvalue {i64, i1} %t0, 0
270 %obit0 = extractvalue {i64, i1} %t0, 1
271 %t1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add0, i64 %ext1)
272 %add1 = extractvalue {i64, i1} %t1, 0
273 %obit1 = extractvalue {i64, i1} %t1, 1
274 %res1 = or i1 %obit0, %obit1
275 %t2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add1, i64 %ext2)
276 %add2 = extractvalue {i64, i1} %t2, 0
277 %obit2 = extractvalue {i64, i1} %t2, 1
278 %res2 = or i1 %res1, %obit2
279 %t3 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add2, i64 %ext3)
280 %add3 = extractvalue {i64, i1} %t3, 0
281 %obit3 = extractvalue {i64, i1} %t3, 1
282 %res3 = or i1 %res2, %obit3
283 %t4 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add3, i64 %ext4)
284 %add4 = extractvalue {i64, i1} %t4, 0
285 %obit4 = extractvalue {i64, i1} %t4, 1
286 %res4 = or i1 %res3, %obit4
287 %t5 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add4, i64 %ext5)
288 %add5 = extractvalue {i64, i1} %t5, 0
289 %obit5 = extractvalue {i64, i1} %t5, 1
290 %res5 = or i1 %res4, %obit5
291 %t6 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add5, i64 %ext6)
292 %add6 = extractvalue {i64, i1} %t6, 0
293 %obit6 = extractvalue {i64, i1} %t6, 1
294 %res6 = or i1 %res5, %obit6
295 %t7 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add6, i64 %ext7)
296 %add7 = extractvalue {i64, i1} %t7, 0
297 %obit7 = extractvalue {i64, i1} %t7, 1
298 %res7 = or i1 %res6, %obit7
299 %t8 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add7, i64 %ext8)
300 %add8 = extractvalue {i64, i1} %t8, 0
301 %obit8 = extractvalue {i64, i1} %t8, 1
302 %res8 = or i1 %res7, %obit8
303 %t9 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add8, i64 %ext9)
304 %add9 = extractvalue {i64, i1} %t9, 0
305 %obit9 = extractvalue {i64, i1} %t9, 1
306 %res9 = or i1 %res8, %obit9
307
308 ret i1 %res9
309}
310
311declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
312