blob: fa6b845ea6f252edd8d22103303985cbd5c010ee [file] [log] [blame]
Ulrich Weigand9e3577f2013-05-06 16:17:29 +00001; Test the handling of base + 12-bit displacement addresses for large frames,
2; in cases where no 20-bit form exists.
3;
4; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
5; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
6
7; This file tests what happens when a displacement is converted from
8; being relative to the start of a frame object to being relative to
9; the frame itself. In some cases the test is only possible if two
10; objects are allocated.
11;
12; Rather than rely on a particular order for those objects, the tests
13; instead allocate two objects of the same size and apply the test to
14; both of them. For consistency, all tests follow this model, even if
15; one object would actually be enough.
16
17; First check the highest in-range offset after conversion, which is 4092
18; for word-addressing instructions like MVHI.
19;
20; The last in-range doubleword offset is 4088. Since the frame has an
21; emergency spill slot at 160(%r15), the amount that we need to allocate
22; in order to put another object at offset 4088 is (4088 - 168) / 4 = 980
23; words.
24define void @f1() {
25; CHECK-NOFP: f1:
26; CHECK-NOFP: mvhi 4092(%r15), 42
27; CHECK-NOFP: br %r14
28;
29; CHECK-FP: f1:
30; CHECK-FP: mvhi 4092(%r11), 42
31; CHECK-FP: br %r14
32 %region1 = alloca [980 x i32], align 8
33 %region2 = alloca [980 x i32], align 8
34 %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 1
35 %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 1
36 store volatile i32 42, i32 *%ptr1
37 store volatile i32 42, i32 *%ptr2
38 ret void
39}
40
41; Test the first out-of-range offset. We cannot use an index register here.
42define void @f2() {
43; CHECK-NOFP: f2:
44; CHECK-NOFP: lay %r1, 4096(%r15)
45; CHECK-NOFP: mvhi 0(%r1), 42
46; CHECK-NOFP: br %r14
47;
48; CHECK-FP: f2:
49; CHECK-FP: lay %r1, 4096(%r11)
50; CHECK-FP: mvhi 0(%r1), 42
51; CHECK-FP: br %r14
52 %region1 = alloca [980 x i32], align 8
53 %region2 = alloca [980 x i32], align 8
54 %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2
55 %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2
56 store volatile i32 42, i32 *%ptr1
57 store volatile i32 42, i32 *%ptr2
58 ret void
59}
60
61; Test the next offset after that.
62define void @f3() {
63; CHECK-NOFP: f3:
64; CHECK-NOFP: lay %r1, 4096(%r15)
65; CHECK-NOFP: mvhi 4(%r1), 42
66; CHECK-NOFP: br %r14
67;
68; CHECK-FP: f3:
69; CHECK-FP: lay %r1, 4096(%r11)
70; CHECK-FP: mvhi 4(%r1), 42
71; CHECK-FP: br %r14
72 %region1 = alloca [980 x i32], align 8
73 %region2 = alloca [980 x i32], align 8
74 %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 3
75 %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 3
76 store volatile i32 42, i32 *%ptr1
77 store volatile i32 42, i32 *%ptr2
78 ret void
79}
80
81; Add 4096 bytes (1024 words) to the size of each object and repeat.
82define void @f4() {
83; CHECK-NOFP: f4:
84; CHECK-NOFP: lay %r1, 4096(%r15)
85; CHECK-NOFP: mvhi 4092(%r1), 42
86; CHECK-NOFP: br %r14
87;
88; CHECK-FP: f4:
89; CHECK-FP: lay %r1, 4096(%r11)
90; CHECK-FP: mvhi 4092(%r1), 42
91; CHECK-FP: br %r14
92 %region1 = alloca [2004 x i32], align 8
93 %region2 = alloca [2004 x i32], align 8
94 %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 1
95 %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 1
96 store volatile i32 42, i32 *%ptr1
97 store volatile i32 42, i32 *%ptr2
98 ret void
99}
100
101; ...as above.
102define void @f5() {
103; CHECK-NOFP: f5:
104; CHECK-NOFP: lay %r1, 8192(%r15)
105; CHECK-NOFP: mvhi 0(%r1), 42
106; CHECK-NOFP: br %r14
107;
108; CHECK-FP: f5:
109; CHECK-FP: lay %r1, 8192(%r11)
110; CHECK-FP: mvhi 0(%r1), 42
111; CHECK-FP: br %r14
112 %region1 = alloca [2004 x i32], align 8
113 %region2 = alloca [2004 x i32], align 8
114 %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 2
115 %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 2
116 store volatile i32 42, i32 *%ptr1
117 store volatile i32 42, i32 *%ptr2
118 ret void
119}
120
121; ...as above.
122define void @f6() {
123; CHECK-NOFP: f6:
124; CHECK-NOFP: lay %r1, 8192(%r15)
125; CHECK-NOFP: mvhi 4(%r1), 42
126; CHECK-NOFP: br %r14
127;
128; CHECK-FP: f6:
129; CHECK-FP: lay %r1, 8192(%r11)
130; CHECK-FP: mvhi 4(%r1), 42
131; CHECK-FP: br %r14
132 %region1 = alloca [2004 x i32], align 8
133 %region2 = alloca [2004 x i32], align 8
134 %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 3
135 %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 3
136 store volatile i32 42, i32 *%ptr1
137 store volatile i32 42, i32 *%ptr2
138 ret void
139}
140
141; Now try an offset of 4092 from the start of the object, with the object
142; being at offset 8192. This time we need objects of (8192 - 168) / 4 = 2006
143; words.
144define void @f7() {
145; CHECK-NOFP: f7:
146; CHECK-NOFP: lay %r1, 8192(%r15)
147; CHECK-NOFP: mvhi 4092(%r1), 42
148; CHECK-NOFP: br %r14
149;
150; CHECK-FP: f7:
151; CHECK-FP: lay %r1, 8192(%r11)
152; CHECK-FP: mvhi 4092(%r1), 42
153; CHECK-FP: br %r14
154 %region1 = alloca [2006 x i32], align 8
155 %region2 = alloca [2006 x i32], align 8
156 %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1023
157 %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1023
158 store volatile i32 42, i32 *%ptr1
159 store volatile i32 42, i32 *%ptr2
160 ret void
161}
162
163; Keep the object-relative offset the same but bump the size of the
164; objects by one doubleword.
165define void @f8() {
166; CHECK-NOFP: f8:
167; CHECK-NOFP: lay %r1, 12288(%r15)
168; CHECK-NOFP: mvhi 4(%r1), 42
169; CHECK-NOFP: br %r14
170;
171; CHECK-FP: f8:
172; CHECK-FP: lay %r1, 12288(%r11)
173; CHECK-FP: mvhi 4(%r1), 42
174; CHECK-FP: br %r14
175 %region1 = alloca [2008 x i32], align 8
176 %region2 = alloca [2008 x i32], align 8
177 %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1023
178 %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1023
179 store volatile i32 42, i32 *%ptr1
180 store volatile i32 42, i32 *%ptr2
181 ret void
182}
183
184; Check a case where the original displacement is out of range. The backend
185; should force an LAY from the outset. We don't yet do any kind of anchor
186; optimization, so there should be no offset on the MVHI itself.
187define void @f9() {
188; CHECK-NOFP: f9:
189; CHECK-NOFP: lay %r1, 12296(%r15)
190; CHECK-NOFP: mvhi 0(%r1), 42
191; CHECK-NOFP: br %r14
192;
193; CHECK-FP: f9:
194; CHECK-FP: lay %r1, 12296(%r11)
195; CHECK-FP: mvhi 0(%r1), 42
196; CHECK-FP: br %r14
197 %region1 = alloca [2008 x i32], align 8
198 %region2 = alloca [2008 x i32], align 8
199 %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1024
200 %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1024
201 store volatile i32 42, i32 *%ptr1
202 store volatile i32 42, i32 *%ptr2
203 ret void
204}
205
206; Repeat f2 in a case that needs the emergency spill slot (because all
207; call-clobbered registers are live and no call-saved ones have been
208; allocated).
209define void @f10(i32 *%vptr) {
210; CHECK-NOFP: f10:
211; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
212; CHECK-NOFP: lay [[REGISTER]], 4096(%r15)
213; CHECK-NOFP: mvhi 0([[REGISTER]]), 42
214; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
215; CHECK-NOFP: br %r14
216;
217; CHECK-FP: f10:
218; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
219; CHECK-FP: lay [[REGISTER]], 4096(%r11)
220; CHECK-FP: mvhi 0([[REGISTER]]), 42
221; CHECK-FP: lg [[REGISTER]], 160(%r11)
222; CHECK-FP: br %r14
223 %i0 = load volatile i32 *%vptr
224 %i1 = load volatile i32 *%vptr
225 %i3 = load volatile i32 *%vptr
226 %i4 = load volatile i32 *%vptr
227 %i5 = load volatile i32 *%vptr
228 %region1 = alloca [980 x i32], align 8
229 %region2 = alloca [980 x i32], align 8
230 %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2
231 %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2
232 store volatile i32 42, i32 *%ptr1
233 store volatile i32 42, i32 *%ptr2
234 store volatile i32 %i0, i32 *%vptr
235 store volatile i32 %i1, i32 *%vptr
236 store volatile i32 %i3, i32 *%vptr
237 store volatile i32 %i4, i32 *%vptr
238 store volatile i32 %i5, i32 *%vptr
239 ret void
240}
241
242; And again with maximum register pressure. The only spill slot that the
243; NOFP case needs is the emergency one, so the offsets are the same as for f2.
244; However, the FP case uses %r11 as the frame pointer and must therefore
245; spill a second register. This leads to an extra displacement of 8.
246define void @f11(i32 *%vptr) {
247; CHECK-NOFP: f11:
248; CHECK-NOFP: stmg %r6, %r15,
249; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
250; CHECK-NOFP: lay [[REGISTER]], 4096(%r15)
251; CHECK-NOFP: mvhi 0([[REGISTER]]), 42
252; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
253; CHECK-NOFP: lmg %r6, %r15,
254; CHECK-NOFP: br %r14
255;
256; CHECK-FP: f11:
257; CHECK-FP: stmg %r6, %r15,
258; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
259; CHECK-FP: lay [[REGISTER]], 4096(%r11)
260; CHECK-FP: mvhi 8([[REGISTER]]), 42
261; CHECK-FP: lg [[REGISTER]], 160(%r11)
262; CHECK-FP: lmg %r6, %r15,
263; CHECK-FP: br %r14
264 %i0 = load volatile i32 *%vptr
265 %i1 = load volatile i32 *%vptr
266 %i3 = load volatile i32 *%vptr
267 %i4 = load volatile i32 *%vptr
268 %i5 = load volatile i32 *%vptr
269 %i6 = load volatile i32 *%vptr
270 %i7 = load volatile i32 *%vptr
271 %i8 = load volatile i32 *%vptr
272 %i9 = load volatile i32 *%vptr
273 %i10 = load volatile i32 *%vptr
274 %i11 = load volatile i32 *%vptr
275 %i12 = load volatile i32 *%vptr
276 %i13 = load volatile i32 *%vptr
277 %i14 = load volatile i32 *%vptr
278 %region1 = alloca [980 x i32], align 8
279 %region2 = alloca [980 x i32], align 8
280 %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2
281 %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2
282 store volatile i32 42, i32 *%ptr1
283 store volatile i32 42, i32 *%ptr2
284 store volatile i32 %i0, i32 *%vptr
285 store volatile i32 %i1, i32 *%vptr
286 store volatile i32 %i3, i32 *%vptr
287 store volatile i32 %i4, i32 *%vptr
288 store volatile i32 %i5, i32 *%vptr
289 store volatile i32 %i6, i32 *%vptr
290 store volatile i32 %i7, i32 *%vptr
291 store volatile i32 %i8, i32 *%vptr
292 store volatile i32 %i9, i32 *%vptr
293 store volatile i32 %i10, i32 *%vptr
294 store volatile i32 %i11, i32 *%vptr
295 store volatile i32 %i12, i32 *%vptr
296 store volatile i32 %i13, i32 *%vptr
297 store volatile i32 %i14, i32 *%vptr
298 ret void
299}