blob: 95600482434f1a09bea45c4da8bc0aee71aa4f14 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of TargetFrameLowering class.
11//
Kristof Beyls17cb8982015-04-09 08:49:47 +000012// On AArch64, stack frames are structured as follows:
13//
14// The stack grows downward.
15//
16// All of the individual frame areas on the frame below are optional, i.e. it's
17// possible to create a function so that the particular area isn't present
18// in the frame.
19//
20// At function entry, the "frame" looks as follows:
21//
22// | | Higher address
23// |-----------------------------------|
24// | |
25// | arguments passed on the stack |
26// | |
27// |-----------------------------------| <- sp
28// | | Lower address
29//
30//
31// After the prologue has run, the frame has the following general structure.
32// Note that this doesn't depict the case where a red-zone is used. Also,
33// technically the last frame area (VLAs) doesn't get created until in the
34// main function body, after the prologue is run. However, it's depicted here
35// for completeness.
36//
37// | | Higher address
38// |-----------------------------------|
39// | |
40// | arguments passed on the stack |
41// | |
42// |-----------------------------------|
43// | |
44// | prev_fp, prev_lr |
45// | (a.k.a. "frame record") |
46// |-----------------------------------| <- fp(=x29)
47// | |
48// | other callee-saved registers |
49// | |
50// |-----------------------------------|
51// |.empty.space.to.make.part.below....|
52// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
53// |.the.standard.16-byte.alignment....| compile time; if present)
54// |-----------------------------------|
55// | |
56// | local variables of fixed size |
57// | including spill slots |
58// |-----------------------------------| <- bp(not defined by ABI,
59// |.variable-sized.local.variables....| LLVM chooses X19)
60// |.(VLAs)............................| (size of this area is unknown at
61// |...................................| compile time)
62// |-----------------------------------| <- sp
63// | | Lower address
64//
65//
66// To access the data in a frame, at-compile time, a constant offset must be
67// computable from one of the pointers (fp, bp, sp) to access it. The size
68// of the areas with a dotted background cannot be computed at compile-time
69// if they are present, making it required to have all three of fp, bp and
70// sp to be set up to be able to access all contents in the frame areas,
71// assuming all of the frame areas are non-empty.
72//
73// For most functions, some of the frame areas are empty. For those functions,
74// it may not be necessary to set up fp or bp:
Benjamin Kramerdf005cb2015-08-08 18:27:36 +000075// * A base pointer is definitely needed when there are both VLAs and local
Kristof Beyls17cb8982015-04-09 08:49:47 +000076// variables with more-than-default alignment requirements.
Benjamin Kramerdf005cb2015-08-08 18:27:36 +000077// * A frame pointer is definitely needed when there are local variables with
Kristof Beyls17cb8982015-04-09 08:49:47 +000078// more-than-default alignment requirements.
79//
80// In some cases when a base pointer is not strictly needed, it is generated
81// anyway when offsets from the frame pointer to access local variables become
82// so large that the offset can't be encoded in the immediate fields of loads
83// or stores.
84//
85// FIXME: also explain the redzone concept.
86// FIXME: also explain the concept of reserved call frames.
87//
Tim Northover3b0846e2014-05-24 12:50:23 +000088//===----------------------------------------------------------------------===//
89
90#include "AArch64FrameLowering.h"
91#include "AArch64InstrInfo.h"
92#include "AArch64MachineFunctionInfo.h"
93#include "AArch64Subtarget.h"
94#include "AArch64TargetMachine.h"
95#include "llvm/ADT/Statistic.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000096#include "llvm/CodeGen/MachineFrameInfo.h"
97#include "llvm/CodeGen/MachineFunction.h"
98#include "llvm/CodeGen/MachineInstrBuilder.h"
99#include "llvm/CodeGen/MachineModuleInfo.h"
100#include "llvm/CodeGen/MachineRegisterInfo.h"
101#include "llvm/CodeGen/RegisterScavenging.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000102#include "llvm/IR/DataLayout.h"
103#include "llvm/IR/Function.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000104#include "llvm/Support/CommandLine.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000105#include "llvm/Support/Debug.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000106#include "llvm/Support/raw_ostream.h"
107
108using namespace llvm;
109
110#define DEBUG_TYPE "frame-info"
111
112static cl::opt<bool> EnableRedZone("aarch64-redzone",
113 cl::desc("enable use of redzone on AArch64"),
114 cl::init(false), cl::Hidden);
115
116STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
117
Tim Northover3b0846e2014-05-24 12:50:23 +0000118bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
119 if (!EnableRedZone)
120 return false;
121 // Don't use the red zone if the function explicitly asks us not to.
122 // This is typically used for kernel code.
Duncan P. N. Exon Smith003bb7d2015-02-14 02:09:06 +0000123 if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
Tim Northover3b0846e2014-05-24 12:50:23 +0000124 return false;
125
126 const MachineFrameInfo *MFI = MF.getFrameInfo();
127 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
128 unsigned NumBytes = AFI->getLocalStackSize();
129
Eric Christopher114fa1c2016-02-29 22:50:49 +0000130 return !(MFI->hasCalls() || hasFP(MF) || NumBytes > 128);
Tim Northover3b0846e2014-05-24 12:50:23 +0000131}
132
133/// hasFP - Return true if the specified function should have a dedicated frame
134/// pointer register.
135bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
136 const MachineFrameInfo *MFI = MF.getFrameInfo();
Eric Christopherfc6de422014-08-05 02:39:49 +0000137 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
Geoff Berry62c1a1e2016-03-02 17:58:31 +0000138 // Retain behavior of always omitting the FP for leaf functions when possible.
139 return (MFI->hasCalls() &&
140 MF.getTarget().Options.DisableFramePointerElim(MF)) ||
141 MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
142 MFI->hasStackMap() || MFI->hasPatchPoint() ||
143 RegInfo->needsStackRealignment(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000144}
145
146/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
147/// not required, we reserve argument space for call sites in the function
148/// immediately on entry to the current function. This eliminates the need for
149/// add/sub sp brackets around call sites. Returns true if the call frame is
150/// included as part of the stack frame.
151bool
152AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
153 return !MF.getFrameInfo()->hasVarSizedObjects();
154}
155
156void AArch64FrameLowering::eliminateCallFramePseudoInstr(
157 MachineFunction &MF, MachineBasicBlock &MBB,
158 MachineBasicBlock::iterator I) const {
Eric Christopherfc6de422014-08-05 02:39:49 +0000159 const AArch64InstrInfo *TII =
160 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000161 DebugLoc DL = I->getDebugLoc();
Matthias Braunfa3872e2015-05-18 20:27:55 +0000162 unsigned Opc = I->getOpcode();
Tim Northover3b0846e2014-05-24 12:50:23 +0000163 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
164 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
165
Eric Christopherfc6de422014-08-05 02:39:49 +0000166 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
Tim Northover3b0846e2014-05-24 12:50:23 +0000167 if (!TFI->hasReservedCallFrame(MF)) {
168 unsigned Align = getStackAlignment();
169
170 int64_t Amount = I->getOperand(0).getImm();
Rui Ueyamada00f2f2016-01-14 21:06:47 +0000171 Amount = alignTo(Amount, Align);
Tim Northover3b0846e2014-05-24 12:50:23 +0000172 if (!IsDestroy)
173 Amount = -Amount;
174
175 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
176 // doesn't have to pop anything), then the first operand will be zero too so
177 // this adjustment is a no-op.
178 if (CalleePopAmount == 0) {
179 // FIXME: in-function stack adjustment for calls is limited to 24-bits
180 // because there's no guaranteed temporary register available.
181 //
Sylvestre Ledru469de192014-08-11 18:04:46 +0000182 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
Tim Northover3b0846e2014-05-24 12:50:23 +0000183 // 1) For offset <= 12-bit, we use LSL #0
184 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
185 // LSL #0, and the other uses LSL #12.
186 //
Chad Rosier401a4ab2016-01-19 16:50:45 +0000187 // Most call frames will be allocated at the start of a function so
Tim Northover3b0846e2014-05-24 12:50:23 +0000188 // this is OK, but it is a limitation that needs dealing with.
189 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
190 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
191 }
192 } else if (CalleePopAmount != 0) {
193 // If the calling convention demands that the callee pops arguments from the
194 // stack, we want to add it back if we have a reserved call frame.
195 assert(CalleePopAmount < 0xffffff && "call frame too large");
196 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
197 TII);
198 }
199 MBB.erase(I);
200}
201
202void AArch64FrameLowering::emitCalleeSavedFrameMoves(
Geoff Berry62d47252016-02-25 16:36:08 +0000203 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000204 MachineFunction &MF = *MBB.getParent();
205 MachineFrameInfo *MFI = MF.getFrameInfo();
206 MachineModuleInfo &MMI = MF.getMMI();
207 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
Eric Christopherfc6de422014-08-05 02:39:49 +0000208 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000209 DebugLoc DL = MBB.findDebugLoc(MBBI);
210
211 // Add callee saved registers to move list.
212 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
213 if (CSI.empty())
214 return;
215
Tim Northover3b0846e2014-05-24 12:50:23 +0000216 for (const auto &Info : CSI) {
217 unsigned Reg = Info.getReg();
Geoff Berry62d47252016-02-25 16:36:08 +0000218 int64_t Offset =
219 MFI->getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
Tim Northover3b0846e2014-05-24 12:50:23 +0000220 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
Geoff Berry62d47252016-02-25 16:36:08 +0000221 unsigned CFIIndex = MMI.addFrameInst(
222 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
Tim Northover3b0846e2014-05-24 12:50:23 +0000223 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000224 .addCFIIndex(CFIIndex)
225 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000226 }
227}
228
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000229// Find a scratch register that we can use at the start of the prologue to
230// re-align the stack pointer. We avoid using callee-save registers since they
231// may appear to be free when this is called from canUseAsPrologue (during
232// shrink wrapping), but then no longer be free when this is called from
233// emitPrologue.
234//
235// FIXME: This is a bit conservative, since in the above case we could use one
236// of the callee-save registers as a scratch temp to re-align the stack pointer,
237// but we would then have to make sure that we were in fact saving at least one
238// callee-save register in the prologue, which is additional complexity that
239// doesn't seem worth the benefit.
240static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
241 MachineFunction *MF = MBB->getParent();
242
243 // If MBB is an entry block, use X9 as the scratch register
244 if (&MF->front() == MBB)
245 return AArch64::X9;
246
247 RegScavenger RS;
248 RS.enterBasicBlock(MBB);
249
250 // Prefer X9 since it was historically used for the prologue scratch reg.
251 if (!RS.isRegUsed(AArch64::X9))
252 return AArch64::X9;
253
254 // Find a free non callee-save reg.
255 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
256 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
257 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF);
258 BitVector CalleeSaveRegs(RegInfo->getNumRegs());
259 for (unsigned i = 0; CSRegs[i]; ++i)
260 CalleeSaveRegs.set(CSRegs[i]);
261
262 BitVector Available = RS.getRegsAvailable(&AArch64::GPR64RegClass);
263 for (int AvailReg = Available.find_first(); AvailReg != -1;
264 AvailReg = Available.find_next(AvailReg))
265 if (!CalleeSaveRegs.test(AvailReg))
266 return AvailReg;
267
268 return AArch64::NoRegister;
269}
270
271bool AArch64FrameLowering::canUseAsPrologue(
272 const MachineBasicBlock &MBB) const {
273 const MachineFunction *MF = MBB.getParent();
274 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
275 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
276 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
277
278 // Don't need a scratch register if we're not going to re-align the stack.
279 if (!RegInfo->needsStackRealignment(*MF))
280 return true;
281 // Otherwise, we can use any block as long as it has a scratch register
282 // available.
283 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
284}
285
Quentin Colombet61b305e2015-05-05 17:38:16 +0000286void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
287 MachineBasicBlock &MBB) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000288 MachineBasicBlock::iterator MBBI = MBB.begin();
289 const MachineFrameInfo *MFI = MF.getFrameInfo();
290 const Function *Fn = MF.getFunction();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000291 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
292 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
293 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000294 MachineModuleInfo &MMI = MF.getMMI();
Tim Northover775aaeb2015-11-05 21:54:58 +0000295 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
296 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
297 bool HasFP = hasFP(MF);
298
299 // Debug location must be unknown since the first debug location is used
300 // to determine the end of the prologue.
301 DebugLoc DL;
302
303 // All calls are tail calls in GHC calling conv, and functions have no
304 // prologue/epilogue.
Greg Fitzgeraldfa78d082015-01-19 17:40:05 +0000305 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
306 return;
307
Tim Northover3b0846e2014-05-24 12:50:23 +0000308 int NumBytes = (int)MFI->getStackSize();
309 if (!AFI->hasStackFrame()) {
310 assert(!HasFP && "unexpected function without stack frame but with FP");
311
312 // All of the stack allocation is for locals.
313 AFI->setLocalStackSize(NumBytes);
314
315 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
Jim Grosbach6f482002015-05-18 18:43:14 +0000316 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
Tim Northover3b0846e2014-05-24 12:50:23 +0000317
318 // REDZONE: If the stack size is less than 128 bytes, we don't need
319 // to actually allocate.
320 if (NumBytes && !canUseRedZone(MF)) {
321 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
322 MachineInstr::FrameSetup);
323
324 // Encode the stack size of the leaf function.
325 unsigned CFIIndex = MMI.addFrameInst(
326 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
327 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000328 .addCFIIndex(CFIIndex)
329 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000330 } else if (NumBytes) {
331 ++NumRedZoneFunctions;
332 }
333
334 return;
335 }
336
337 // Only set up FP if we actually need to.
338 int FPOffset = 0;
Manman Ren0e208222015-04-29 20:03:38 +0000339 if (HasFP)
Geoff Berry04bf91a2016-02-01 16:29:19 +0000340 // Frame pointer is fp = sp - 16.
341 FPOffset = AFI->getCalleeSavedStackSize() - 16;
Tim Northover3b0846e2014-05-24 12:50:23 +0000342
343 // Move past the saves of the callee-saved registers.
Geoff Berry04bf91a2016-02-01 16:29:19 +0000344 MachineBasicBlock::iterator End = MBB.end();
345 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup))
Tim Northover3b0846e2014-05-24 12:50:23 +0000346 ++MBBI;
Geoff Berry04bf91a2016-02-01 16:29:19 +0000347 NumBytes -= AFI->getCalleeSavedStackSize();
Tim Northover3b0846e2014-05-24 12:50:23 +0000348 assert(NumBytes >= 0 && "Negative stack allocation size!?");
349 if (HasFP) {
350 // Issue sub fp, sp, FPOffset or
351 // mov fp,sp when FPOffset is zero.
352 // Note: All stores of callee-saved registers are marked as "FrameSetup".
353 // This code marks the instruction(s) that set the FP also.
354 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
355 MachineInstr::FrameSetup);
356 }
357
358 // All of the remaining stack allocations are for locals.
359 AFI->setLocalStackSize(NumBytes);
360
361 // Allocate space for the rest of the frame.
Kristof Beyls17cb8982015-04-09 08:49:47 +0000362
363 const unsigned Alignment = MFI->getMaxAlignment();
Evgeniy Stepanov00b30202015-07-10 21:24:07 +0000364 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000365 unsigned scratchSPReg = AArch64::SP;
Evgeniy Stepanov00b30202015-07-10 21:24:07 +0000366 if (NumBytes && NeedsRealignment) {
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000367 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
368 assert(scratchSPReg != AArch64::NoRegister);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000369 }
370
371 // If we're a leaf function, try using the red zone.
372 if (NumBytes && !canUseRedZone(MF))
373 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
374 // the correct value here, as NumBytes also includes padding bytes,
375 // which shouldn't be counted here.
376 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
377 MachineInstr::FrameSetup);
378
Kristof Beyls17cb8982015-04-09 08:49:47 +0000379 if (NumBytes && NeedsRealignment) {
380 const unsigned NrBitsToZero = countTrailingZeros(Alignment);
381 assert(NrBitsToZero > 1);
382 assert(scratchSPReg != AArch64::SP);
383
384 // SUB X9, SP, NumBytes
385 // -- X9 is temporary register, so shouldn't contain any live data here,
386 // -- free to use. This is already produced by emitFrameOffset above.
387 // AND SP, X9, 0b11111...0000
388 // The logical immediates have a non-trivial encoding. The following
389 // formula computes the encoded immediate with all ones but
390 // NrBitsToZero zero bits as least significant bits.
391 uint32_t andMaskEncoded =
392 (1 <<12) // = N
393 | ((64-NrBitsToZero) << 6) // immr
394 | ((64-NrBitsToZero-1) << 0) // imms
395 ;
396 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
397 .addReg(scratchSPReg, RegState::Kill)
398 .addImm(andMaskEncoded);
Tim Northover3b0846e2014-05-24 12:50:23 +0000399 }
400
401 // If we need a base pointer, set it up here. It's whatever the value of the
402 // stack pointer is at this point. Any variable size objects will be allocated
403 // after this, so we can still use the base pointer to reference locals.
404 //
405 // FIXME: Clarify FrameSetup flags here.
406 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
407 // needed.
Kristof Beyls17cb8982015-04-09 08:49:47 +0000408 if (RegInfo->hasBasePointer(MF)) {
409 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
410 false);
411 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000412
413 if (needsFrameMoves) {
Mehdi Aminibd7287e2015-07-16 06:11:10 +0000414 const DataLayout &TD = MF.getDataLayout();
415 const int StackGrowth = -TD.getPointerSize(0);
Tim Northover3b0846e2014-05-24 12:50:23 +0000416 unsigned FramePtr = RegInfo->getFrameRegister(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000417 // An example of the prologue:
418 //
419 // .globl __foo
420 // .align 2
421 // __foo:
422 // Ltmp0:
423 // .cfi_startproc
424 // .cfi_personality 155, ___gxx_personality_v0
425 // Leh_func_begin:
426 // .cfi_lsda 16, Lexception33
427 //
428 // stp xa,bx, [sp, -#offset]!
429 // ...
430 // stp x28, x27, [sp, #offset-32]
431 // stp fp, lr, [sp, #offset-16]
432 // add fp, sp, #offset - 16
433 // sub sp, sp, #1360
434 //
435 // The Stack:
436 // +-------------------------------------------+
437 // 10000 | ........ | ........ | ........ | ........ |
438 // 10004 | ........ | ........ | ........ | ........ |
439 // +-------------------------------------------+
440 // 10008 | ........ | ........ | ........ | ........ |
441 // 1000c | ........ | ........ | ........ | ........ |
442 // +===========================================+
443 // 10010 | X28 Register |
444 // 10014 | X28 Register |
445 // +-------------------------------------------+
446 // 10018 | X27 Register |
447 // 1001c | X27 Register |
448 // +===========================================+
449 // 10020 | Frame Pointer |
450 // 10024 | Frame Pointer |
451 // +-------------------------------------------+
452 // 10028 | Link Register |
453 // 1002c | Link Register |
454 // +===========================================+
455 // 10030 | ........ | ........ | ........ | ........ |
456 // 10034 | ........ | ........ | ........ | ........ |
457 // +-------------------------------------------+
458 // 10038 | ........ | ........ | ........ | ........ |
459 // 1003c | ........ | ........ | ........ | ........ |
460 // +-------------------------------------------+
461 //
462 // [sp] = 10030 :: >>initial value<<
463 // sp = 10020 :: stp fp, lr, [sp, #-16]!
464 // fp = sp == 10020 :: mov fp, sp
465 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
466 // sp == 10010 :: >>final value<<
467 //
468 // The frame pointer (w29) points to address 10020. If we use an offset of
469 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
470 // for w27, and -32 for w28:
471 //
472 // Ltmp1:
473 // .cfi_def_cfa w29, 16
474 // Ltmp2:
475 // .cfi_offset w30, -8
476 // Ltmp3:
477 // .cfi_offset w29, -16
478 // Ltmp4:
479 // .cfi_offset w27, -24
480 // Ltmp5:
481 // .cfi_offset w28, -32
482
483 if (HasFP) {
484 // Define the current CFA rule to use the provided FP.
485 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
486 unsigned CFIIndex = MMI.addFrameInst(
487 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
488 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000489 .addCFIIndex(CFIIndex)
490 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000491 } else {
492 // Encode the stack size of the leaf function.
493 unsigned CFIIndex = MMI.addFrameInst(
494 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
495 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000496 .addCFIIndex(CFIIndex)
497 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000498 }
499
Geoff Berry62d47252016-02-25 16:36:08 +0000500 // Now emit the moves for whatever callee saved regs we have (including FP,
501 // LR if those are saved).
502 emitCalleeSavedFrameMoves(MBB, MBBI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000503 }
504}
505
Tim Northover3b0846e2014-05-24 12:50:23 +0000506void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
507 MachineBasicBlock &MBB) const {
508 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
Tim Northover3b0846e2014-05-24 12:50:23 +0000509 MachineFrameInfo *MFI = MF.getFrameInfo();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000510 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000511 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
Quentin Colombet61b305e2015-05-05 17:38:16 +0000512 DebugLoc DL;
513 bool IsTailCallReturn = false;
514 if (MBB.end() != MBBI) {
515 DL = MBBI->getDebugLoc();
516 unsigned RetOpcode = MBBI->getOpcode();
517 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
518 RetOpcode == AArch64::TCRETURNri;
519 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000520 int NumBytes = MFI->getStackSize();
521 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
522
Greg Fitzgeraldfa78d082015-01-19 17:40:05 +0000523 // All calls are tail calls in GHC calling conv, and functions have no
524 // prologue/epilogue.
525 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
526 return;
527
Kristof Beyls17cb8982015-04-09 08:49:47 +0000528 // Initial and residual are named for consistency with the prologue. Note that
Tim Northover3b0846e2014-05-24 12:50:23 +0000529 // in the epilogue, the residual adjustment is executed first.
530 uint64_t ArgumentPopSize = 0;
Quentin Colombet61b305e2015-05-05 17:38:16 +0000531 if (IsTailCallReturn) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000532 MachineOperand &StackAdjust = MBBI->getOperand(1);
533
534 // For a tail-call in a callee-pops-arguments environment, some or all of
535 // the stack may actually be in use for the call's arguments, this is
536 // calculated during LowerCall and consumed here...
537 ArgumentPopSize = StackAdjust.getImm();
538 } else {
539 // ... otherwise the amount to pop is *all* of the argument space,
540 // conveniently stored in the MachineFunctionInfo by
541 // LowerFormalArguments. This will, of course, be zero for the C calling
542 // convention.
543 ArgumentPopSize = AFI->getArgumentStackToRestore();
544 }
545
546 // The stack frame should be like below,
547 //
548 // ---------------------- ---
549 // | | |
550 // | BytesInStackArgArea| CalleeArgStackSize
551 // | (NumReusableBytes) | (of tail call)
552 // | | ---
553 // | | |
554 // ---------------------| --- |
555 // | | | |
556 // | CalleeSavedReg | | |
Geoff Berry04bf91a2016-02-01 16:29:19 +0000557 // | (CalleeSavedStackSize)| | |
Tim Northover3b0846e2014-05-24 12:50:23 +0000558 // | | | |
559 // ---------------------| | NumBytes
560 // | | StackSize (StackAdjustUp)
561 // | LocalStackSize | | |
562 // | (covering callee | | |
563 // | args) | | |
564 // | | | |
565 // ---------------------- --- ---
566 //
567 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
568 // = StackSize + ArgumentPopSize
569 //
570 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
571 // it as the 2nd argument of AArch64ISD::TC_RETURN.
Tim Northover3b0846e2014-05-24 12:50:23 +0000572
Tim Northover3b0846e2014-05-24 12:50:23 +0000573 // Move past the restores of the callee-saved registers.
Quentin Colombet61b305e2015-05-05 17:38:16 +0000574 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
Matthias Braun45419292015-12-17 03:18:47 +0000575 MachineBasicBlock::iterator Begin = MBB.begin();
576 while (LastPopI != Begin) {
577 --LastPopI;
Geoff Berry04bf91a2016-02-01 16:29:19 +0000578 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000579 ++LastPopI;
Matthias Braun45419292015-12-17 03:18:47 +0000580 break;
Tim Northover3b0846e2014-05-24 12:50:23 +0000581 }
582 }
Geoff Berry04bf91a2016-02-01 16:29:19 +0000583 NumBytes -= AFI->getCalleeSavedStackSize();
Tim Northover3b0846e2014-05-24 12:50:23 +0000584 assert(NumBytes >= 0 && "Negative stack allocation size!?");
585
586 if (!hasFP(MF)) {
Geoff Berrya1c62692016-02-23 16:54:36 +0000587 bool RedZone = canUseRedZone(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000588 // If this was a redzone leaf function, we don't need to restore the
Geoff Berrya1c62692016-02-23 16:54:36 +0000589 // stack pointer (but we may need to pop stack args for fastcc).
590 if (RedZone && ArgumentPopSize == 0)
591 return;
592
593 bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0;
594 int StackRestoreBytes = RedZone ? 0 : NumBytes;
595 if (NoCalleeSaveRestore)
596 StackRestoreBytes += ArgumentPopSize;
597 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
598 StackRestoreBytes, TII, MachineInstr::FrameDestroy);
599 // If we were able to combine the local stack pop with the argument pop,
600 // then we're done.
601 if (NoCalleeSaveRestore || ArgumentPopSize == 0)
602 return;
603 NumBytes = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000604 }
605
606 // Restore the original stack pointer.
607 // FIXME: Rather than doing the math here, we should instead just use
608 // non-post-indexed loads for the restores if we aren't actually going to
609 // be able to save any instructions.
610 if (NumBytes || MFI->hasVarSizedObjects())
611 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
Geoff Berry04bf91a2016-02-01 16:29:19 +0000612 -AFI->getCalleeSavedStackSize() + 16, TII,
613 MachineInstr::FrameDestroy);
Geoff Berrya1c62692016-02-23 16:54:36 +0000614
615 // This must be placed after the callee-save restore code because that code
616 // assumes the SP is at the same location as it was after the callee-save save
617 // code in the prologue.
618 if (ArgumentPopSize)
619 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
620 ArgumentPopSize, TII, MachineInstr::FrameDestroy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000621}
622
Tim Northover3b0846e2014-05-24 12:50:23 +0000623/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
624/// debug info. It's the same as what we use for resolving the code-gen
625/// references for now. FIXME: This can go wrong when references are
626/// SP-relative and simple call frames aren't used.
627int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
628 int FI,
629 unsigned &FrameReg) const {
630 return resolveFrameIndexReference(MF, FI, FrameReg);
631}
632
633int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
634 int FI, unsigned &FrameReg,
635 bool PreferFP) const {
636 const MachineFrameInfo *MFI = MF.getFrameInfo();
637 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +0000638 MF.getSubtarget().getRegisterInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000639 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
640 int FPOffset = MFI->getObjectOffset(FI) + 16;
641 int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
642 bool isFixed = MFI->isFixedObjectIndex(FI);
643
644 // Use frame pointer to reference fixed objects. Use it for locals if
Kristof Beyls17cb8982015-04-09 08:49:47 +0000645 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
646 // reliable as a base). Make sure useFPForScavengingIndex() does the
647 // right thing for the emergency spill slot.
Tim Northover3b0846e2014-05-24 12:50:23 +0000648 bool UseFP = false;
649 if (AFI->hasStackFrame()) {
650 // Note: Keeping the following as multiple 'if' statements rather than
651 // merging to a single expression for readability.
652 //
653 // Argument access should always use the FP.
654 if (isFixed) {
655 UseFP = hasFP(MF);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000656 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
657 !RegInfo->needsStackRealignment(MF)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000658 // Use SP or FP, whichever gives us the best chance of the offset
659 // being in range for direct access. If the FPOffset is positive,
660 // that'll always be best, as the SP will be even further away.
661 // If the FPOffset is negative, we have to keep in mind that the
662 // available offset range for negative offsets is smaller than for
663 // positive ones. If we have variable sized objects, we're stuck with
664 // using the FP regardless, though, as the SP offset is unknown
665 // and we don't have a base pointer available. If an offset is
666 // available via the FP and the SP, use whichever is closest.
667 if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
668 (FPOffset >= -256 && Offset > -FPOffset))
669 UseFP = true;
670 }
671 }
672
Kristof Beyls17cb8982015-04-09 08:49:47 +0000673 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
674 "In the presence of dynamic stack pointer realignment, "
675 "non-argument objects cannot be accessed through the frame pointer");
676
Tim Northover3b0846e2014-05-24 12:50:23 +0000677 if (UseFP) {
678 FrameReg = RegInfo->getFrameRegister(MF);
679 return FPOffset;
680 }
681
682 // Use the base pointer if we have one.
683 if (RegInfo->hasBasePointer(MF))
684 FrameReg = RegInfo->getBaseRegister();
685 else {
686 FrameReg = AArch64::SP;
687 // If we're using the red zone for this function, the SP won't actually
688 // be adjusted, so the offsets will be negative. They're also all
689 // within range of the signed 9-bit immediate instructions.
690 if (canUseRedZone(MF))
691 Offset -= AFI->getLocalStackSize();
692 }
693
694 return Offset;
695}
696
697static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
698 if (Reg != AArch64::LR)
699 return getKillRegState(true);
700
701 // LR maybe referred to later by an @llvm.returnaddress intrinsic.
702 bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
703 bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
704 return getKillRegState(LRKill);
705}
706
Geoff Berry29d4a692016-02-01 19:07:06 +0000707struct RegPairInfo {
708 RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {}
709 unsigned Reg1;
710 unsigned Reg2;
711 int FrameIdx;
712 int Offset;
713 bool IsGPR;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000714 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
Geoff Berry29d4a692016-02-01 19:07:06 +0000715};
716
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000717static void computeCalleeSaveRegisterPairs(
718 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
719 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
Geoff Berry29d4a692016-02-01 19:07:06 +0000720
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000721 if (CSI.empty())
722 return;
723
724 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
725 MachineFrameInfo *MFI = MF.getFrameInfo();
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000726 CallingConv::ID CC = MF.getFunction()->getCallingConv();
Tim Northover3b0846e2014-05-24 12:50:23 +0000727 unsigned Count = CSI.size();
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000728 (void)CC;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000729 // MachO's compact unwind format relies on all registers being stored in
730 // pairs.
731 assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000732 CC == CallingConv::PreserveMost ||
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000733 (Count & 1) == 0) &&
734 "Odd number of callee-saved regs to spill!");
735 unsigned Offset = AFI->getCalleeSavedStackSize();
Tim Northover775aaeb2015-11-05 21:54:58 +0000736
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000737 for (unsigned i = 0; i < Count; ++i) {
Geoff Berry29d4a692016-02-01 19:07:06 +0000738 RegPairInfo RPI;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000739 RPI.Reg1 = CSI[i].getReg();
740
741 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
742 AArch64::FPR64RegClass.contains(RPI.Reg1));
743 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
744
745 // Add the next reg to the pair if it is in the same register class.
746 if (i + 1 < Count) {
747 unsigned NextReg = CSI[i + 1].getReg();
748 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
749 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
750 RPI.Reg2 = NextReg;
751 }
Geoff Berry29d4a692016-02-01 19:07:06 +0000752
Tim Northover3b0846e2014-05-24 12:50:23 +0000753 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
754 // list to come in sorted by frame index so that we can issue the store
755 // pair instructions directly. Assert if we see anything otherwise.
756 //
757 // The order of the registers in the list is controlled by
758 // getCalleeSavedRegs(), so they will always be in-order, as well.
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000759 assert((!RPI.isPaired() ||
760 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
Tim Northover3b0846e2014-05-24 12:50:23 +0000761 "Out of order callee saved regs!");
Geoff Berry29d4a692016-02-01 19:07:06 +0000762
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000763 // MachO's compact unwind format relies on all registers being stored in
764 // adjacent register pairs.
765 assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000766 CC == CallingConv::PreserveMost ||
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000767 (RPI.isPaired() &&
768 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
769 RPI.Reg1 + 1 == RPI.Reg2))) &&
770 "Callee-save registers not saved as adjacent register pair!");
771
772 RPI.FrameIdx = CSI[i].getFrameIdx();
773
774 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
775 // Round up size of non-pair to pair size if we need to pad the
776 // callee-save area to ensure 16-byte alignment.
777 Offset -= 16;
778 assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
779 MFI->setObjectSize(RPI.FrameIdx, 16);
780 } else
781 Offset -= RPI.isPaired() ? 16 : 8;
782 assert(Offset % 8 == 0);
783 RPI.Offset = Offset / 8;
Geoff Berry29d4a692016-02-01 19:07:06 +0000784 assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
785 "Offset out of bounds for LDP/STP immediate");
786
787 RegPairs.push_back(RPI);
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000788 if (RPI.isPaired())
789 ++i;
Geoff Berry29d4a692016-02-01 19:07:06 +0000790 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000791
792 // Align first offset to even 16-byte boundary to avoid additional SP
793 // adjustment instructions.
794 // Last pair offset is size of whole callee-save region for SP
795 // pre-dec/post-inc.
796 RegPairInfo &LastPair = RegPairs.back();
797 assert(AFI->getCalleeSavedStackSize() % 8 == 0);
798 LastPair.Offset = AFI->getCalleeSavedStackSize() / 8;
Geoff Berry29d4a692016-02-01 19:07:06 +0000799}
800
801bool AArch64FrameLowering::spillCalleeSavedRegisters(
802 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
803 const std::vector<CalleeSavedInfo> &CSI,
804 const TargetRegisterInfo *TRI) const {
805 MachineFunction &MF = *MBB.getParent();
806 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
807 DebugLoc DL;
808 SmallVector<RegPairInfo, 8> RegPairs;
809
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000810 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
Geoff Berry29d4a692016-02-01 19:07:06 +0000811
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000812 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
Geoff Berry29d4a692016-02-01 19:07:06 +0000813 ++RPII) {
814 RegPairInfo RPI = *RPII;
815 unsigned Reg1 = RPI.Reg1;
816 unsigned Reg2 = RPI.Reg2;
817 unsigned StrOpc;
818
Tim Northover3b0846e2014-05-24 12:50:23 +0000819 // Issue sequence of non-sp increment and pi sp spills for cs regs. The
820 // first spill is a pre-increment that allocates the stack.
821 // For example:
822 // stp x22, x21, [sp, #-48]! // addImm(-6)
823 // stp x20, x19, [sp, #16] // addImm(+2)
824 // stp fp, lr, [sp, #32] // addImm(+4)
825 // Rationale: This sequence saves uop updates compared to a sequence of
826 // pre-increment spills like stp xi,xj,[sp,#-16]!
Geoff Berry29d4a692016-02-01 19:07:06 +0000827 // Note: Similar rationale and sequence for restores in epilog.
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000828 bool BumpSP = RPII == RegPairs.rbegin();
Geoff Berry29d4a692016-02-01 19:07:06 +0000829 if (RPI.IsGPR) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000830 // For first spill use pre-increment store.
Geoff Berry29d4a692016-02-01 19:07:06 +0000831 if (BumpSP)
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000832 StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre;
Tim Northover3b0846e2014-05-24 12:50:23 +0000833 else
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000834 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
Geoff Berry29d4a692016-02-01 19:07:06 +0000835 } else {
Tim Northover3b0846e2014-05-24 12:50:23 +0000836 // For first spill use pre-increment store.
Geoff Berry29d4a692016-02-01 19:07:06 +0000837 if (BumpSP)
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000838 StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre;
Tim Northover3b0846e2014-05-24 12:50:23 +0000839 else
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000840 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
Geoff Berry29d4a692016-02-01 19:07:06 +0000841 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000842 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
843 if (RPI.isPaired())
844 dbgs() << ", " << TRI->getName(Reg2);
845 dbgs() << ") -> fi#(" << RPI.FrameIdx;
846 if (RPI.isPaired())
847 dbgs() << ", " << RPI.FrameIdx+1;
848 dbgs() << ")\n");
Geoff Berry29d4a692016-02-01 19:07:06 +0000849
850 const int Offset = BumpSP ? -RPI.Offset : RPI.Offset;
Tim Northover3b0846e2014-05-24 12:50:23 +0000851 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
Geoff Berry29d4a692016-02-01 19:07:06 +0000852 if (BumpSP)
Tim Northover3b0846e2014-05-24 12:50:23 +0000853 MIB.addReg(AArch64::SP, RegState::Define);
854
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000855 if (RPI.isPaired()) {
856 MBB.addLiveIn(Reg1);
857 MBB.addLiveIn(Reg2);
858 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
Tim Northover3b0846e2014-05-24 12:50:23 +0000859 .addReg(Reg1, getPrologueDeath(MF, Reg1))
860 .addReg(AArch64::SP)
861 .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
862 .setMIFlag(MachineInstr::FrameSetup);
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000863 } else {
864 MBB.addLiveIn(Reg1);
865 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
866 .addReg(AArch64::SP)
867 .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled
868 .setMIFlag(MachineInstr::FrameSetup);
869 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000870 }
871 return true;
872}
873
874bool AArch64FrameLowering::restoreCalleeSavedRegisters(
875 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
876 const std::vector<CalleeSavedInfo> &CSI,
877 const TargetRegisterInfo *TRI) const {
878 MachineFunction &MF = *MBB.getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +0000879 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000880 DebugLoc DL;
Geoff Berry29d4a692016-02-01 19:07:06 +0000881 SmallVector<RegPairInfo, 8> RegPairs;
Tim Northover3b0846e2014-05-24 12:50:23 +0000882
883 if (MI != MBB.end())
884 DL = MI->getDebugLoc();
885
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000886 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
Geoff Berry29d4a692016-02-01 19:07:06 +0000887
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000888 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
Geoff Berry29d4a692016-02-01 19:07:06 +0000889 ++RPII) {
890 RegPairInfo RPI = *RPII;
891 unsigned Reg1 = RPI.Reg1;
892 unsigned Reg2 = RPI.Reg2;
893
Tim Northover3b0846e2014-05-24 12:50:23 +0000894 // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
895 // the last load is sp-pi post-increment and de-allocates the stack:
896 // For example:
897 // ldp fp, lr, [sp, #32] // addImm(+4)
898 // ldp x20, x19, [sp, #16] // addImm(+2)
899 // ldp x22, x21, [sp], #48 // addImm(+6)
900 // Note: see comment in spillCalleeSavedRegisters()
901 unsigned LdrOpc;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000902 bool BumpSP = RPII == std::prev(RegPairs.end());
Geoff Berry29d4a692016-02-01 19:07:06 +0000903 if (RPI.IsGPR) {
904 if (BumpSP)
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000905 LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000906 else
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000907 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
Geoff Berry29d4a692016-02-01 19:07:06 +0000908 } else {
909 if (BumpSP)
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000910 LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000911 else
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000912 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
Geoff Berry29d4a692016-02-01 19:07:06 +0000913 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000914 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
915 if (RPI.isPaired())
916 dbgs() << ", " << TRI->getName(Reg2);
917 dbgs() << ") -> fi#(" << RPI.FrameIdx;
918 if (RPI.isPaired())
919 dbgs() << ", " << RPI.FrameIdx+1;
920 dbgs() << ")\n");
Tim Northover3b0846e2014-05-24 12:50:23 +0000921
Geoff Berry29d4a692016-02-01 19:07:06 +0000922 const int Offset = RPI.Offset;
Tim Northover3b0846e2014-05-24 12:50:23 +0000923 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
Geoff Berry29d4a692016-02-01 19:07:06 +0000924 if (BumpSP)
Tim Northover3b0846e2014-05-24 12:50:23 +0000925 MIB.addReg(AArch64::SP, RegState::Define);
926
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000927 if (RPI.isPaired())
928 MIB.addReg(Reg2, getDefRegState(true))
Tim Northover3b0846e2014-05-24 12:50:23 +0000929 .addReg(Reg1, getDefRegState(true))
930 .addReg(AArch64::SP)
Geoff Berry04bf91a2016-02-01 16:29:19 +0000931 .addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8]
932 // where the factor * 8 is implicit
933 .setMIFlag(MachineInstr::FrameDestroy);
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000934 else
935 MIB.addReg(Reg1, getDefRegState(true))
936 .addReg(AArch64::SP)
937 .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled
938 .setMIFlag(MachineInstr::FrameDestroy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000939 }
940 return true;
941}
942
Matthias Braun02564862015-07-14 17:17:13 +0000943void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
944 BitVector &SavedRegs,
945 RegScavenger *RS) const {
946 // All calls are tail calls in GHC calling conv, and functions have no
947 // prologue/epilogue.
948 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
949 return;
950
951 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
Tim Northover3b0846e2014-05-24 12:50:23 +0000952 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +0000953 MF.getSubtarget().getRegisterInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000954 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000955 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
956 unsigned UnspilledCSGPR = AArch64::NoRegister;
957 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
Tim Northover3b0846e2014-05-24 12:50:23 +0000958
959 // The frame record needs to be created by saving the appropriate registers
960 if (hasFP(MF)) {
Matthias Braun02564862015-07-14 17:17:13 +0000961 SavedRegs.set(AArch64::FP);
962 SavedRegs.set(AArch64::LR);
Tim Northover3b0846e2014-05-24 12:50:23 +0000963 }
964
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000965 unsigned BasePointerReg = AArch64::NoRegister;
Tim Northover3b0846e2014-05-24 12:50:23 +0000966 if (RegInfo->hasBasePointer(MF))
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000967 BasePointerReg = RegInfo->getBaseRegister();
Tim Northover3b0846e2014-05-24 12:50:23 +0000968
Tim Northover3b0846e2014-05-24 12:50:23 +0000969 bool ExtraCSSpill = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000970 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000971 // Figure out which callee-saved registers to save/restore.
972 for (unsigned i = 0; CSRegs[i]; ++i) {
973 const unsigned Reg = CSRegs[i];
Tim Northover3b0846e2014-05-24 12:50:23 +0000974
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000975 // Add the base pointer register to SavedRegs if it is callee-save.
976 if (Reg == BasePointerReg)
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000977 SavedRegs.set(Reg);
Tim Northover3b0846e2014-05-24 12:50:23 +0000978
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000979 bool RegUsed = SavedRegs.test(Reg);
980 unsigned PairedReg = CSRegs[i ^ 1];
981 if (!RegUsed) {
982 if (AArch64::GPR64RegClass.contains(Reg) &&
983 !RegInfo->isReservedReg(MF, Reg)) {
984 UnspilledCSGPR = Reg;
985 UnspilledCSGPRPaired = PairedReg;
Tim Northover3b0846e2014-05-24 12:50:23 +0000986 }
987 continue;
988 }
989
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000990 // MachO's compact unwind format relies on all registers being stored in
991 // pairs.
992 // FIXME: the usual format is actually better if unwinding isn't needed.
993 if (Subtarget.isTargetMachO() && !SavedRegs.test(PairedReg)) {
994 SavedRegs.set(PairedReg);
995 ExtraCSSpill = true;
Tim Northover3b0846e2014-05-24 12:50:23 +0000996 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000997 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000998
999 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
1000 for (int Reg = SavedRegs.find_first(); Reg != -1;
1001 Reg = SavedRegs.find_next(Reg))
1002 dbgs() << ' ' << PrintReg(Reg, RegInfo);
1003 dbgs() << "\n";);
1004
1005 // If any callee-saved registers are used, the frame cannot be eliminated.
1006 unsigned NumRegsSpilled = SavedRegs.count();
1007 bool CanEliminateFrame = NumRegsSpilled == 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001008
1009 // FIXME: Set BigStack if any stack slot references may be out of range.
1010 // For now, just conservatively guestimate based on unscaled indexing
1011 // range. We'll end up allocating an unnecessary spill slot a lot, but
1012 // realistically that's not a big deal at this stage of the game.
1013 // The CSR spill slots have not been allocated yet, so estimateStackSize
1014 // won't include them.
1015 MachineFrameInfo *MFI = MF.getFrameInfo();
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001016 unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled;
Tim Northover3b0846e2014-05-24 12:50:23 +00001017 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
1018 bool BigStack = (CFSize >= 256);
1019 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
1020 AFI->setHasStackFrame(true);
1021
1022 // Estimate if we might need to scavenge a register at some point in order
1023 // to materialize a stack offset. If so, either spill one additional
1024 // callee-saved register or reserve a special spill slot to facilitate
1025 // register scavenging. If we already spilled an extra callee-saved register
1026 // above to keep the number of spills even, we don't need to do anything else
1027 // here.
1028 if (BigStack && !ExtraCSSpill) {
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001029 if (UnspilledCSGPR != AArch64::NoRegister) {
1030 DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
1031 << " to get a scratch register.\n");
1032 SavedRegs.set(UnspilledCSGPR);
1033 // MachO's compact unwind format relies on all registers being stored in
1034 // pairs, so if we need to spill one extra for BigStack, then we need to
1035 // store the pair.
1036 if (Subtarget.isTargetMachO())
1037 SavedRegs.set(UnspilledCSGPRPaired);
Tim Northover3b0846e2014-05-24 12:50:23 +00001038 ExtraCSSpill = true;
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001039 NumRegsSpilled = SavedRegs.count();
Tim Northover3b0846e2014-05-24 12:50:23 +00001040 }
1041
1042 // If we didn't find an extra callee-saved register to spill, create
1043 // an emergency spill slot.
1044 if (!ExtraCSSpill) {
1045 const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
1046 int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
1047 RS->addScavengingFrameIndex(FI);
1048 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
1049 << " as the emergency spill slot.\n");
1050 }
1051 }
Geoff Berry04bf91a2016-02-01 16:29:19 +00001052
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001053 // Round up to register pair alignment to avoid additional SP adjustment
1054 // instructions.
1055 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
Tim Northover3b0846e2014-05-24 12:50:23 +00001056}