blob: f666efc2db94f85f3d9eabc965348b6e477d7aa0 [file] [log] [blame]
Tim Northovere0ccdc62015-10-28 22:46:43 +00001; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
2
3%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> }
4
5define i32 @test_i64_align() {
6; CHECK-LABEL: test_i64_align:
7; CHECL: movs r0, #8
8 ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32)
9}
10
11define i32 @test_f64_align() {
12; CHECK-LABEL: test_f64_align:
13; CHECL: movs r0, #24
14 ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32)
15}
16
17define i32 @test_v2f32_align() {
18; CHECK-LABEL: test_v2f32_align:
19; CHECL: movs r0, #40
20 ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32)
21}
22
23define i32 @test_v4f32_align() {
24; CHECK-LABEL: test_v4f32_align:
25; CHECL: movs r0, #64
26 ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32)
27}
28
29; Key point here is than an extra register has to be saved so that the DPRs end
30; up in an aligned location (as prologue/epilogue inserter had calculated).
31define void @test_dpr_unwind_align() {
32; CHECK-LABEL: test_dpr_unwind_align:
33; CHECK: push {r5, r6, r7, lr}
34; CHECK-NOT: sub sp
35; CHECK: vpush {d8, d9}
36; [...]
37; CHECK: bl _test_i64_align
38; CHECK-NOT: add sp,
39; CHECK: vpop {d8, d9}
40; CHECK-NOT: add sp,
41; CHECK: pop {r5, r6, r7, pc}
42
43 call void asm sideeffect "", "~{r6},~{d8},~{d9}"()
44
45 ; Whatever
46 call i32 @test_i64_align()
47 ret void
48}
49
50; This time, there's no viable way to tack CS-registers onto the list: a real SP
51; adjustment needs to be performed to put d8 and d9 where they should be.
52define void @test_dpr_unwind_align_manually() {
53; CHECK-LABEL: test_dpr_unwind_align_manually:
54; CHECK: push {r4, r5, r6, r7, lr}
55; CHECK-NOT: sub sp
56; CHECK: push.w {r8, r11}
57; CHECK: sub sp, #4
58; CHECK: vpush {d8, d9}
59; [...]
60; CHECK: bl _test_i64_align
61; CHECK-NOT: add sp,
62; CHECK: vpop {d8, d9}
63; CHECK: add sp, #4
64; CHECK: pop.w {r8, r11}
65; CHECK: pop {r4, r5, r6, r7, pc}
66
67 call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"()
68
69 ; Whatever
70 call i32 @test_i64_align()
71 ret void
72}
73
74; If there's only a CS1 area, the sub should be in the right place:
75define void @test_dpr_unwind_align_just_cs1() {
76; CHECK-LABEL: test_dpr_unwind_align_just_cs1:
77; CHECK: push {r4, r5, r6, r7, lr}
78; CHECK: sub sp, #4
79; CHECK: vpush {d8, d9}
80; CHECK: sub sp, #8
81; [...]
82; CHECK: bl _test_i64_align
83; CHECK: add sp, #8
84; CHECK: vpop {d8, d9}
85; CHECK: add sp, #4
86; CHECK: pop {r4, r5, r6, r7, pc}
87
88 call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"()
89
90 ; Whatever
91 call i32 @test_i64_align()
92 ret void
93}
94
95; If there are no DPRs, we shouldn't try to align the stack in stages anyway
96define void @test_dpr_unwind_align_no_dprs() {
97; CHECK-LABEL: test_dpr_unwind_align_no_dprs:
98; CHECK: push {r4, r5, r6, r7, lr}
99; CHECK: sub sp, #12
100; [...]
101; CHECK: bl _test_i64_align
102; CHECK: add sp, #12
103; CHECK: pop {r4, r5, r6, r7, pc}
104
105 call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"()
106
107 ; Whatever
108 call i32 @test_i64_align()
109 ret void
110}
111
112; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on
113; the stack.
114define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) {
115; CHECK-LABEL: test_v128_stack_pass:
116; CHECK: add r[[ADDR:[0-9]+]], sp, #16
117; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128]
118
119 ret <4 x float> %in
120}
121
122declare void @varargs(i32, ...)
123
124; When varargs are enabled, we go down a different route. Still want 128-bit
125; alignment though.
126define void @test_v128_stack_pass_varargs(<4 x float> %in) {
127; CHECK-LABEL: test_v128_stack_pass_varargs:
128; CHECK: add r[[ADDR:[0-9]+]], sp, #16
129; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128]
130
131 call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in)
132 ret void
133}
134
135; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give
136; a single pointer), 64-bit quantities must be pass
137define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) {
138; CHECK-LABEL: test_64bit_gpr_align:
139; CHECK: ldr [[RHS:r[0-9]+]], [sp]
140; CHECK: adds r0, [[RHS]], r2
141; CHECK: adc r1, r3, #0
142
143 %ext = zext i32 %sp to i64
144 %sum = add i64 %ext, %r2_r3
145 ret i64 %sum
146}