[AArch64] Reduce number of callee-save save/restores.

Summary:
Before this change, callee-save registers would be rounded up to even
pairs of GPRs and FPRs.  This change eliminates these extra padding
load/stores, though it does keep the stack allocation the same size
unless both the GPR and FPR sets have an odd size, in which case one
full pair stack slot (16 bytes) is saved.

This optimization cannot currently be done for MachO targets since they
rely on a fast-path .debug_frame equivalent that can only encode
callee-save registers as pairs.

Reviewers: t.p.northover, rengolin, mcrosier, jmolloy

Subscribers: aemerson, rengolin, mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D17000

llvm-svn: 260689
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index 1820b81..6520b16 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO
 
 ; This test aims to check basic correctness of frame layout &
 ; frame access code. There are 8 functions in this test file,
@@ -97,7 +98,7 @@
 ; CHECK-LABEL: novla_nodynamicrealign_call
 ; CHECK: .cfi_startproc
 ;   Check that used callee-saved registers are saved
-; CHECK: stp	x20, x19, [sp, #-32]!
+; CHECK: str	x19, [sp, #-32]!
 ;   Check that the frame pointer is created:
 ; CHECK: stp	x29, x30, [sp, #16]
 ; CHECK: add	x29, sp, #16
@@ -105,8 +106,7 @@
 ; CHECK: .cfi_def_cfa w29, 16
 ; CHECK: .cfi_offset w30, -8
 ; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w19, -32
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
@@ -114,10 +114,34 @@
 ; CHECK: ldr	w[[ILOC:[0-9]+]], [sp, #12]
 ;   Check epilogue:
 ; CHECK: ldp	x29, x30, [sp, #16]
-; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ldr	x19, [sp], #32
 ; CHECK: ret
 ; CHECK: .cfi_endproc
 
+; CHECK-MACHO-LABEL: _novla_nodynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK-MACHO: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK-MACHO: stp	x29, x30, [sp, #16]
+; CHECK-MACHO: add	x29, sp, #16
+;   Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
+;   Check correct access to local variable on the stack, through stack pointer
+; CHECK-MACHO: ldr	w[[ILOC:[0-9]+]], [sp, #12]
+;   Check epilogue:
+; CHECK-MACHO: ldp	x29, x30, [sp, #16]
+; CHECK-MACHO: ldp	x20, x19, [sp], #32
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
 
 declare i32 @g() #0
 
@@ -159,7 +183,7 @@
 ; CHECK-LABEL: novla_dynamicrealign_call
 ; CHECK: .cfi_startproc
 ;   Check that used callee-saved registers are saved
-; CHECK: stp	x20, x19, [sp, #-32]!
+; CHECK: str	x19, [sp, #-32]!
 ;   Check that the frame pointer is created:
 ; CHECK: stp	x29, x30, [sp, #16]
 ; CHECK: add	x29, sp, #16
@@ -170,8 +194,7 @@
 ; CHECK: .cfi_def_cfa w29, 16
 ; CHECK: .cfi_offset w30, -8
 ; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w19, -32
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
@@ -181,10 +204,39 @@
 ;     Check that stack pointer get restored from frame pointer.
 ; CHECK: sub	sp, x29, #16            // =16
 ; CHECK: ldp	x29, x30, [sp, #16]
-; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ldr	x19, [sp], #32
 ; CHECK: ret
 ; CHECK: .cfi_endproc
 
+; CHECK-MACHO-LABEL: _novla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK-MACHO: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK-MACHO: stp	x29, x30, [sp, #16]
+; CHECK-MACHO: add	x29, sp, #16
+;   Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK-MACHO: sub	x9, sp, #96
+; CHECK-MACHO: and	sp, x9, #0xffffffffffffff80
+;   Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
+;   Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK-MACHO: ldr	w[[ILOC:[0-9]+]], [sp]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub	sp, x29, #16
+; CHECK-MACHO: ldp	x29, x30, [sp, #16]
+; CHECK-MACHO: ldp	x20, x19, [sp], #32
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
 
 ; Function Attrs: nounwind
 define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -336,7 +388,7 @@
 ; CHECK-LABEL: vla_dynamicrealign_call
 ; CHECK: .cfi_startproc
 ;   Check that used callee-saved registers are saved
-; CHECK: stp	x22, x21, [sp, #-48]!
+; CHECK: str	x21, [sp, #-48]!
 ; CHECK: stp	x20, x19, [sp, #16]
 ;   Check that the frame pointer is created:
 ; CHECK: stp	x29, x30, [sp, #32]
@@ -354,8 +406,7 @@
 ; CHECK: .cfi_offset w29, -16
 ; CHECK: .cfi_offset w19, -24
 ; CHECK: .cfi_offset w20, -32
-; CHECK: .cfi_offset w21, -40
-; CHECK: .cfi_offset w22, -48
+; CHECK: .cfi_offset w21, -48
 ;   Check correct access to arguments passed on the stack, through frame pointer
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
 ; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
@@ -376,10 +427,57 @@
 ; CHECK: sub	sp, x29, #32
 ; CHECK: ldp	x29, x30, [sp, #32]
 ; CHECK: ldp	x20, x19, [sp, #16]
-; CHECK: ldp	x22, x21, [sp], #48
+; CHECK: ldr	x21, [sp], #48
 ; CHECK: ret
 ; CHECK: .cfi_endproc
 
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+;   Check that used callee-saved registers are saved
+; CHECK-MACHO: stp	x22, x21, [sp, #-48]!
+; CHECK-MACHO: stp	x20, x19, [sp, #16]
+;   Check that the frame pointer is created:
+; CHECK-MACHO: stp	x29, x30, [sp, #32]
+; CHECK-MACHO: add	x29, sp, #32
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK-MACHO: sub	x9, sp, #80
+; CHECK-MACHO: and	sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov    x19, sp
+;   Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; CHECK-MACHO: .cfi_offset w21, -40
+; CHECK-MACHO: .cfi_offset w22, -48
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+;   and set-up of base pointer (x19).
+; CHECK-MACHO: mov	w9, w0
+; CHECK-MACHO: mov	 x10, sp
+; CHECK-MACHO: lsl	x9, x9, #2
+; CHECK-MACHO: add	x9, x9, #15
+; CHECK-MACHO: and	x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr	w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr	 w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub	sp, x29, #32
+; CHECK-MACHO: ldp	x29, x30, [sp, #32]
+; CHECK-MACHO: ldp	x20, x19, [sp, #16]
+; CHECK-MACHO: ldp	x22, x21, [sp], #48
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
 
 ; Function Attrs: nounwind
 define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -398,7 +496,7 @@
 
 ; CHECK-LABEL: vla_dynamicrealign_nocall
 ;   Check that used callee-saved registers are saved
-; CHECK: stp	x20, x19, [sp, #-32]!
+; CHECK: str	x19, [sp, #-32]!
 ;   Check that the frame pointer is created:
 ; CHECK: stp	x29, x30, [sp, #16]
 ; CHECK: add	x29, sp, #16
@@ -428,9 +526,44 @@
 ;     Check that stack pointer get restored from frame pointer.
 ; CHECK: sub	sp, x29, #16
 ; CHECK: ldp	x29, x30, [sp, #16]
-; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ldr	x19, [sp], #32
 ; CHECK: ret
 
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall:
+;   Check that used callee-saved registers are saved
+; CHECK-MACHO: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK-MACHO: stp	x29, x30, [sp, #16]
+; CHECK-MACHO: add	x29, sp, #16
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK-MACHO: sub	x9, sp, #96
+; CHECK-MACHO: and	sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov    x19, sp
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+;   and set-up of base pointer (x19).
+; CHECK-MACHO: mov	w9, w0
+; CHECK-MACHO: mov	 x10, sp
+; CHECK-MACHO: lsl	x9, x9, #2
+; CHECK-MACHO: add	x9, x9, #15
+; CHECK-MACHO: and	x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr	w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr	 w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub	sp, x29, #16
+; CHECK-MACHO: ldp	x29, x30, [sp, #16]
+; CHECK-MACHO: ldp	x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
 
 ; Function Attrs: nounwind
 define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
@@ -449,7 +582,7 @@
 
 ; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
 ;   Check that used callee-saved registers are saved
-; CHECK: stp	x20, x19, [sp, #-32]!
+; CHECK: stp	x28, x19, [sp, #-32]!
 ;   Check that the frame pointer is created:
 ; CHECK: stp	x29, x30, [sp, #16]
 ; CHECK: add	x29, sp, #16
@@ -479,9 +612,44 @@
 ;     Check that stack pointer get restored from frame pointer.
 ; CHECK: sub	sp, x29, #16
 ; CHECK: ldp	x29, x30, [sp, #16]
-; CHECK: ldp	x20, x19, [sp], #32
+; CHECK: ldp	x28, x19, [sp], #32
 ; CHECK: ret
 
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall_large_align:
+;   Check that used callee-saved registers are saved
+; CHECK-MACHO: stp	x20, x19, [sp, #-32]!
+;   Check that the frame pointer is created:
+; CHECK-MACHO: stp	x29, x30, [sp, #16]
+; CHECK-MACHO: add	x29, sp, #16
+;   Check that the stack pointer gets re-aligned to 128
+;   bytes & the base pointer (x19) gets initialized to
+;   this 128-byte aligned area for local variables &
+;   spill slots
+; CHECK-MACHO: sub	x9, sp, #7, lsl #12
+; CHECK-MACHO: and	sp, x9, #0xffffffffffff8000
+; CHECK-MACHO: mov    x19, sp
+;   Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr	w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr	d[[DARG:[0-9]+]], [x29, #32]
+;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+;   and set-up of base pointer (x19).
+; CHECK-MACHO: mov	w9, w0
+; CHECK-MACHO: mov	 x10, sp
+; CHECK-MACHO: lsl	x9, x9, #2
+; CHECK-MACHO: add	x9, x9, #15
+; CHECK-MACHO: and	x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov	 sp, x[[VLASPTMP]]
+;   Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr	w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr	 w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+;   Check epilogue:
+;     Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub	sp, x29, #16
+; CHECK-MACHO: ldp	x29, x30, [sp, #16]
+; CHECK-MACHO: ldp	x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
 
 define void @realign_conditional(i1 %b) {
 entry:
diff --git a/llvm/test/CodeGen/AArch64/alloca.ll b/llvm/test/CodeGen/AArch64/alloca.ll
index 4575437..2a1e287 100644
--- a/llvm/test/CodeGen/AArch64/alloca.ll
+++ b/llvm/test/CodeGen/AArch64/alloca.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=arm64-apple-ios -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK-MACHO
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
 
 declare void @use_addr(i8*)
@@ -113,14 +114,21 @@
 
 define void @test_alloca_large_frame(i64 %n) {
 ; CHECK-LABEL: test_alloca_large_frame:
+; CHECK-MACHO-LABEL: test_alloca_large_frame:
 
 
-; CHECK: stp     x20, x19, [sp, #-32]!
+; CHECK: stp     x28, x19, [sp, #-32]!
 ; CHECK: stp     x29, x30, [sp, #16]
 ; CHECK: add     x29, sp, #16
 ; CHECK: sub     sp, sp, #1953, lsl #12
 ; CHECK: sub     sp, sp, #512
 
+; CHECK-MACHO: stp     x20, x19, [sp, #-32]!
+; CHECK-MACHO: stp     x29, x30, [sp, #16]
+; CHECK-MACHO: add     x29, sp, #16
+; CHECK-MACHO: sub     sp, sp, #1953, lsl #12
+; CHECK-MACHO: sub     sp, sp, #512
+
   %addr1 = alloca i8, i64 %n
   %addr2 = alloca i64, i64 1000000
 
@@ -130,7 +138,11 @@
 
 ; CHECK: sub     sp, x29, #16
 ; CHECK: ldp     x29, x30, [sp, #16]
-; CHECK: ldp     x20, x19, [sp], #32
+; CHECK: ldp     x28, x19, [sp], #32
+
+; CHECK-MACHO: sub     sp, x29, #16
+; CHECK-MACHO: ldp     x29, x30, [sp, #16]
+; CHECK-MACHO: ldp     x20, x19, [sp], #32
 }
 
 declare i8* @llvm.stacksave()
diff --git a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
index ac6e8a7..cd87f55 100644
--- a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
 
 ; rdar://9167275
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
index 99defb1..b0bad20 100644
--- a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck -check-prefix CHECK-NOTMACHO %s
 ;
 ; rdar://14075006
 
@@ -23,6 +24,19 @@
 ; CHECK: ldp d11, d10, [sp, #32]
 ; CHECK: ldp d13, d12, [sp, #16]
 ; CHECK: ldp d15, d14, [sp], #144
+
+; CHECK-NOTMACHO-LABEL: odd:
+; CHECK-NOTMACHO: stp d14, d12, [sp, #-80]!
+; CHECK-NOTMACHO: stp d10, d8, [sp, #16]
+; CHECK-NOTMACHO: str x27, [sp, #32]
+; CHECK-NOTMACHO: stp x25, x23, [sp, #48]
+; CHECK-NOTMACHO: stp x21, x19, [sp, #64]
+; CHECK-NOTMACHO: movz x0, #0x2a
+; CHECK-NOTMACHO: ldp x21, x19, [sp, #64]
+; CHECK-NOTMACHO: ldp x25, x23, [sp, #48]
+; CHECK-NOTMACHO: ldr x27, [sp, #32]
+; CHECK-NOTMACHO: ldp d10, d8, [sp, #16]
+; CHECK-NOTMACHO: ldp d14, d12, [sp], #80
   call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind
   ret void
 }
@@ -48,6 +62,19 @@
 ; CHECK: ldp d11, d10, [sp, #32]
 ; CHECK: ldp d13, d12, [sp, #16]
 ; CHECK: ldp d15, d14, [sp], #144
+
+; CHECK-NOTMACHO-LABEL: even:
+; CHECK-NOTMACHO: stp d15, d13, [sp, #-80]!
+; CHECK-NOTMACHO: stp d11, d9, [sp, #16]
+; CHECK-NOTMACHO: str x28, [sp, #32]
+; CHECK-NOTMACHO: stp x26, x24, [sp, #48]
+; CHECK-NOTMACHO: stp x22, x20, [sp, #64]
+; CHECK-NOTMACHO: movz x0, #0x2a
+; CHECK-NOTMACHO: ldp x22, x20, [sp, #64]
+; CHECK-NOTMACHO: ldp x26, x24, [sp, #48]
+; CHECK-NOTMACHO: ldr x28, [sp, #32]
+; CHECK-NOTMACHO: ldp d11, d9, [sp, #16]
+; CHECK-NOTMACHO: ldp d15, d13, [sp], #80
   call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind
   ret void
 }