[AArch64] Reduce number of callee-save save/restores.
Summary:
Before this change, callee-save registers would be rounded up to even
pairs of GPRs and FPRs. This change eliminates these extra padding
load/stores, though it does keep the stack allocation the same size
unless both the GPR and FPR sets have an odd size, in which case one
full pair stack slot (16 bytes) is saved.
This optimization cannot currently be done for MachO targets since they
rely on a fast-path .debug_frame equivalent that can only encode
callee-save registers as pairs.
Reviewers: t.p.northover, rengolin, mcrosier, jmolloy
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17000
llvm-svn: 260689
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index 1820b81..6520b16 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -1,4 +1,5 @@
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO
; This test aims to check basic correctness of frame layout &
; frame access code. There are 8 functions in this test file,
@@ -97,7 +98,7 @@
; CHECK-LABEL: novla_nodynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -105,8 +106,7 @@
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w19, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
@@ -114,10 +114,34 @@
; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
; Check epilogue:
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _novla_nodynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; Check correct access to local variable on the stack, through stack pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp, #12]
+; Check epilogue:
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
declare i32 @g() #0
@@ -159,7 +183,7 @@
; CHECK-LABEL: novla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -170,8 +194,7 @@
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w19, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
@@ -181,10 +204,39 @@
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16 // =16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _novla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK-MACHO: sub x9, sp, #96
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
; Function Attrs: nounwind
define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -336,7 +388,7 @@
; CHECK-LABEL: vla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x22, x21, [sp, #-48]!
+; CHECK: str x21, [sp, #-48]!
; CHECK: stp x20, x19, [sp, #16]
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #32]
@@ -354,8 +406,7 @@
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
-; CHECK: .cfi_offset w21, -40
-; CHECK: .cfi_offset w22, -48
+; CHECK: .cfi_offset w21, -48
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
@@ -376,10 +427,57 @@
; CHECK: sub sp, x29, #32
; CHECK: ldp x29, x30, [sp, #32]
; CHECK: ldp x20, x19, [sp, #16]
-; CHECK: ldp x22, x21, [sp], #48
+; CHECK: ldr x21, [sp], #48
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x22, x21, [sp, #-48]!
+; CHECK-MACHO: stp x20, x19, [sp, #16]
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #32]
+; CHECK-MACHO: add x29, sp, #32
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #80
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov x19, sp
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; CHECK-MACHO: .cfi_offset w21, -40
+; CHECK-MACHO: .cfi_offset w22, -48
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #32
+; CHECK-MACHO: ldp x29, x30, [sp, #32]
+; CHECK-MACHO: ldp x20, x19, [sp, #16]
+; CHECK-MACHO: ldp x22, x21, [sp], #48
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -398,7 +496,7 @@
; CHECK-LABEL: vla_dynamicrealign_nocall
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -428,9 +526,44 @@
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall:
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #96
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -449,7 +582,7 @@
; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: stp x28, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -479,9 +612,44 @@
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldp x28, x19, [sp], #32
; CHECK: ret
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall_large_align:
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #7, lsl #12
+; CHECK-MACHO: and sp, x9, #0xffffffffffff8000
+; CHECK-MACHO: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
define void @realign_conditional(i1 %b) {
entry: