[AArch64] Reduce number of callee-save save/restores.
Summary:
Before this change, callee-save registers would be rounded up to even
pairs of GPRs and FPRs. This change eliminates these extra padding
load/stores, though it does keep the stack allocation the same size
unless both the GPR and FPR sets have an odd size, in which case one
full pair stack slot (16 bytes) is saved.
This optimization cannot currently be done for MachO targets since they
rely on a fast-path .debug_frame equivalent that can only encode
callee-save registers as pairs.
Reviewers: t.p.northover, rengolin, mcrosier, jmolloy
Subscribers: aemerson, rengolin, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D17000
llvm-svn: 260689
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index 1820b81..6520b16 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -1,4 +1,5 @@
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO
; This test aims to check basic correctness of frame layout &
; frame access code. There are 8 functions in this test file,
@@ -97,7 +98,7 @@
; CHECK-LABEL: novla_nodynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -105,8 +106,7 @@
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w19, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
@@ -114,10 +114,34 @@
; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
; Check epilogue:
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _novla_nodynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; Check correct access to local variable on the stack, through stack pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp, #12]
+; Check epilogue:
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
declare i32 @g() #0
@@ -159,7 +183,7 @@
; CHECK-LABEL: novla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -170,8 +194,7 @@
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w19, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
@@ -181,10 +204,39 @@
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16 // =16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _novla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK-MACHO: sub x9, sp, #96
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
; Function Attrs: nounwind
define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -336,7 +388,7 @@
; CHECK-LABEL: vla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x22, x21, [sp, #-48]!
+; CHECK: str x21, [sp, #-48]!
; CHECK: stp x20, x19, [sp, #16]
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #32]
@@ -354,8 +406,7 @@
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
-; CHECK: .cfi_offset w21, -40
-; CHECK: .cfi_offset w22, -48
+; CHECK: .cfi_offset w21, -48
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
@@ -376,10 +427,57 @@
; CHECK: sub sp, x29, #32
; CHECK: ldp x29, x30, [sp, #32]
; CHECK: ldp x20, x19, [sp, #16]
-; CHECK: ldp x22, x21, [sp], #48
+; CHECK: ldr x21, [sp], #48
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x22, x21, [sp, #-48]!
+; CHECK-MACHO: stp x20, x19, [sp, #16]
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #32]
+; CHECK-MACHO: add x29, sp, #32
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #80
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov x19, sp
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; CHECK-MACHO: .cfi_offset w21, -40
+; CHECK-MACHO: .cfi_offset w22, -48
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #32
+; CHECK-MACHO: ldp x29, x30, [sp, #32]
+; CHECK-MACHO: ldp x20, x19, [sp, #16]
+; CHECK-MACHO: ldp x22, x21, [sp], #48
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -398,7 +496,7 @@
; CHECK-LABEL: vla_dynamicrealign_nocall
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -428,9 +526,44 @@
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall:
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #96
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -449,7 +582,7 @@
; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: stp x28, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
@@ -479,9 +612,44 @@
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldp x28, x19, [sp], #32
; CHECK: ret
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall_large_align:
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #7, lsl #12
+; CHECK-MACHO: and sp, x9, #0xffffffffffff8000
+; CHECK-MACHO: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
define void @realign_conditional(i1 %b) {
entry:
diff --git a/llvm/test/CodeGen/AArch64/alloca.ll b/llvm/test/CodeGen/AArch64/alloca.ll
index 4575437..2a1e287 100644
--- a/llvm/test/CodeGen/AArch64/alloca.ll
+++ b/llvm/test/CodeGen/AArch64/alloca.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=arm64-apple-ios -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK-MACHO
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
declare void @use_addr(i8*)
@@ -113,14 +114,21 @@
define void @test_alloca_large_frame(i64 %n) {
; CHECK-LABEL: test_alloca_large_frame:
+; CHECK-MACHO-LABEL: test_alloca_large_frame:
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: stp x28, x19, [sp, #-32]!
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; CHECK: sub sp, sp, #1953, lsl #12
; CHECK: sub sp, sp, #512
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; CHECK-MACHO: sub sp, sp, #1953, lsl #12
+; CHECK-MACHO: sub sp, sp, #512
+
%addr1 = alloca i8, i64 %n
%addr2 = alloca i64, i64 1000000
@@ -130,7 +138,11 @@
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldp x28, x19, [sp], #32
+
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
}
declare i8* @llvm.stacksave()
diff --git a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
index ac6e8a7..cd87f55 100644
--- a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
; rdar://9167275
diff --git a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
index 99defb1..b0bad20 100644
--- a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck -check-prefix CHECK-NOTMACHO %s
;
; rdar://14075006
@@ -23,6 +24,19 @@
; CHECK: ldp d11, d10, [sp, #32]
; CHECK: ldp d13, d12, [sp, #16]
; CHECK: ldp d15, d14, [sp], #144
+
+; CHECK-NOTMACHO-LABEL: odd:
+; CHECK-NOTMACHO: stp d14, d12, [sp, #-80]!
+; CHECK-NOTMACHO: stp d10, d8, [sp, #16]
+; CHECK-NOTMACHO: str x27, [sp, #32]
+; CHECK-NOTMACHO: stp x25, x23, [sp, #48]
+; CHECK-NOTMACHO: stp x21, x19, [sp, #64]
+; CHECK-NOTMACHO: movz x0, #0x2a
+; CHECK-NOTMACHO: ldp x21, x19, [sp, #64]
+; CHECK-NOTMACHO: ldp x25, x23, [sp, #48]
+; CHECK-NOTMACHO: ldr x27, [sp, #32]
+; CHECK-NOTMACHO: ldp d10, d8, [sp, #16]
+; CHECK-NOTMACHO: ldp d14, d12, [sp], #80
call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind
ret void
}
@@ -48,6 +62,19 @@
; CHECK: ldp d11, d10, [sp, #32]
; CHECK: ldp d13, d12, [sp, #16]
; CHECK: ldp d15, d14, [sp], #144
+
+; CHECK-NOTMACHO-LABEL: even:
+; CHECK-NOTMACHO: stp d15, d13, [sp, #-80]!
+; CHECK-NOTMACHO: stp d11, d9, [sp, #16]
+; CHECK-NOTMACHO: str x28, [sp, #32]
+; CHECK-NOTMACHO: stp x26, x24, [sp, #48]
+; CHECK-NOTMACHO: stp x22, x20, [sp, #64]
+; CHECK-NOTMACHO: movz x0, #0x2a
+; CHECK-NOTMACHO: ldp x22, x20, [sp, #64]
+; CHECK-NOTMACHO: ldp x26, x24, [sp, #48]
+; CHECK-NOTMACHO: ldr x28, [sp, #32]
+; CHECK-NOTMACHO: ldp d11, d9, [sp, #16]
+; CHECK-NOTMACHO: ldp d15, d13, [sp], #80
call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind
ret void
}