blob: f567b624bebfe9539f4852566c1f1d483e3ae37a [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of TargetFrameLowering class.
11//
Kristof Beyls17cb8982015-04-09 08:49:47 +000012// On AArch64, stack frames are structured as follows:
13//
14// The stack grows downward.
15//
16// All of the individual frame areas on the frame below are optional, i.e. it's
17// possible to create a function so that the particular area isn't present
18// in the frame.
19//
20// At function entry, the "frame" looks as follows:
21//
22// | | Higher address
23// |-----------------------------------|
24// | |
25// | arguments passed on the stack |
26// | |
27// |-----------------------------------| <- sp
28// | | Lower address
29//
30//
31// After the prologue has run, the frame has the following general structure.
32// Note that this doesn't depict the case where a red-zone is used. Also,
33// technically the last frame area (VLAs) doesn't get created until in the
34// main function body, after the prologue is run. However, it's depicted here
35// for completeness.
36//
37// | | Higher address
38// |-----------------------------------|
39// | |
40// | arguments passed on the stack |
41// | |
42// |-----------------------------------|
43// | |
44// | prev_fp, prev_lr |
45// | (a.k.a. "frame record") |
46// |-----------------------------------| <- fp(=x29)
47// | |
48// | other callee-saved registers |
49// | |
50// |-----------------------------------|
51// |.empty.space.to.make.part.below....|
52// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
53// |.the.standard.16-byte.alignment....| compile time; if present)
54// |-----------------------------------|
55// | |
56// | local variables of fixed size |
57// | including spill slots |
58// |-----------------------------------| <- bp(not defined by ABI,
59// |.variable-sized.local.variables....| LLVM chooses X19)
60// |.(VLAs)............................| (size of this area is unknown at
61// |...................................| compile time)
62// |-----------------------------------| <- sp
63// | | Lower address
64//
65//
66// To access the data in a frame, at-compile time, a constant offset must be
67// computable from one of the pointers (fp, bp, sp) to access it. The size
68// of the areas with a dotted background cannot be computed at compile-time
69// if they are present, making it required to have all three of fp, bp and
70// sp to be set up to be able to access all contents in the frame areas,
71// assuming all of the frame areas are non-empty.
72//
73// For most functions, some of the frame areas are empty. For those functions,
74// it may not be necessary to set up fp or bp:
Benjamin Kramerdf005cb2015-08-08 18:27:36 +000075// * A base pointer is definitely needed when there are both VLAs and local
Kristof Beyls17cb8982015-04-09 08:49:47 +000076// variables with more-than-default alignment requirements.
Benjamin Kramerdf005cb2015-08-08 18:27:36 +000077// * A frame pointer is definitely needed when there are local variables with
Kristof Beyls17cb8982015-04-09 08:49:47 +000078// more-than-default alignment requirements.
79//
80// In some cases when a base pointer is not strictly needed, it is generated
81// anyway when offsets from the frame pointer to access local variables become
82// so large that the offset can't be encoded in the immediate fields of loads
83// or stores.
84//
85// FIXME: also explain the redzone concept.
86// FIXME: also explain the concept of reserved call frames.
87//
Tim Northover3b0846e2014-05-24 12:50:23 +000088//===----------------------------------------------------------------------===//
89
90#include "AArch64FrameLowering.h"
91#include "AArch64InstrInfo.h"
92#include "AArch64MachineFunctionInfo.h"
93#include "AArch64Subtarget.h"
94#include "AArch64TargetMachine.h"
95#include "llvm/ADT/Statistic.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000096#include "llvm/CodeGen/MachineFrameInfo.h"
97#include "llvm/CodeGen/MachineFunction.h"
98#include "llvm/CodeGen/MachineInstrBuilder.h"
99#include "llvm/CodeGen/MachineModuleInfo.h"
100#include "llvm/CodeGen/MachineRegisterInfo.h"
101#include "llvm/CodeGen/RegisterScavenging.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000102#include "llvm/IR/DataLayout.h"
103#include "llvm/IR/Function.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000104#include "llvm/Support/CommandLine.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000105#include "llvm/Support/Debug.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000106#include "llvm/Support/raw_ostream.h"
107
108using namespace llvm;
109
110#define DEBUG_TYPE "frame-info"
111
112static cl::opt<bool> EnableRedZone("aarch64-redzone",
113 cl::desc("enable use of redzone on AArch64"),
114 cl::init(false), cl::Hidden);
115
116STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
117
Tim Northover3b0846e2014-05-24 12:50:23 +0000118bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
119 if (!EnableRedZone)
120 return false;
121 // Don't use the red zone if the function explicitly asks us not to.
122 // This is typically used for kernel code.
Duncan P. N. Exon Smith003bb7d2015-02-14 02:09:06 +0000123 if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
Tim Northover3b0846e2014-05-24 12:50:23 +0000124 return false;
125
126 const MachineFrameInfo *MFI = MF.getFrameInfo();
127 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
128 unsigned NumBytes = AFI->getLocalStackSize();
129
Eric Christopher114fa1c2016-02-29 22:50:49 +0000130 return !(MFI->hasCalls() || hasFP(MF) || NumBytes > 128);
Tim Northover3b0846e2014-05-24 12:50:23 +0000131}
132
133/// hasFP - Return true if the specified function should have a dedicated frame
134/// pointer register.
135bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
136 const MachineFrameInfo *MFI = MF.getFrameInfo();
Eric Christopherfc6de422014-08-05 02:39:49 +0000137 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
Geoff Berry62c1a1e2016-03-02 17:58:31 +0000138 // Retain behavior of always omitting the FP for leaf functions when possible.
139 return (MFI->hasCalls() &&
140 MF.getTarget().Options.DisableFramePointerElim(MF)) ||
141 MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
142 MFI->hasStackMap() || MFI->hasPatchPoint() ||
143 RegInfo->needsStackRealignment(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000144}
145
146/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
147/// not required, we reserve argument space for call sites in the function
148/// immediately on entry to the current function. This eliminates the need for
149/// add/sub sp brackets around call sites. Returns true if the call frame is
150/// included as part of the stack frame.
151bool
152AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
153 return !MF.getFrameInfo()->hasVarSizedObjects();
154}
155
Hans Wennborge1a2e902016-03-31 18:33:38 +0000156MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
Tim Northover3b0846e2014-05-24 12:50:23 +0000157 MachineFunction &MF, MachineBasicBlock &MBB,
158 MachineBasicBlock::iterator I) const {
Eric Christopherfc6de422014-08-05 02:39:49 +0000159 const AArch64InstrInfo *TII =
160 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000161 DebugLoc DL = I->getDebugLoc();
Matthias Braunfa3872e2015-05-18 20:27:55 +0000162 unsigned Opc = I->getOpcode();
Tim Northover3b0846e2014-05-24 12:50:23 +0000163 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
164 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
165
Eric Christopherfc6de422014-08-05 02:39:49 +0000166 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
Tim Northover3b0846e2014-05-24 12:50:23 +0000167 if (!TFI->hasReservedCallFrame(MF)) {
168 unsigned Align = getStackAlignment();
169
170 int64_t Amount = I->getOperand(0).getImm();
Rui Ueyamada00f2f2016-01-14 21:06:47 +0000171 Amount = alignTo(Amount, Align);
Tim Northover3b0846e2014-05-24 12:50:23 +0000172 if (!IsDestroy)
173 Amount = -Amount;
174
175 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
176 // doesn't have to pop anything), then the first operand will be zero too so
177 // this adjustment is a no-op.
178 if (CalleePopAmount == 0) {
179 // FIXME: in-function stack adjustment for calls is limited to 24-bits
180 // because there's no guaranteed temporary register available.
181 //
Sylvestre Ledru469de192014-08-11 18:04:46 +0000182 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
Tim Northover3b0846e2014-05-24 12:50:23 +0000183 // 1) For offset <= 12-bit, we use LSL #0
184 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
185 // LSL #0, and the other uses LSL #12.
186 //
Chad Rosier401a4ab2016-01-19 16:50:45 +0000187 // Most call frames will be allocated at the start of a function so
Tim Northover3b0846e2014-05-24 12:50:23 +0000188 // this is OK, but it is a limitation that needs dealing with.
189 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
190 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
191 }
192 } else if (CalleePopAmount != 0) {
193 // If the calling convention demands that the callee pops arguments from the
194 // stack, we want to add it back if we have a reserved call frame.
195 assert(CalleePopAmount < 0xffffff && "call frame too large");
196 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
197 TII);
198 }
Hans Wennborge1a2e902016-03-31 18:33:38 +0000199 return MBB.erase(I);
Tim Northover3b0846e2014-05-24 12:50:23 +0000200}
201
202void AArch64FrameLowering::emitCalleeSavedFrameMoves(
Geoff Berry62d47252016-02-25 16:36:08 +0000203 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000204 MachineFunction &MF = *MBB.getParent();
205 MachineFrameInfo *MFI = MF.getFrameInfo();
206 MachineModuleInfo &MMI = MF.getMMI();
207 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
Eric Christopherfc6de422014-08-05 02:39:49 +0000208 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000209 DebugLoc DL = MBB.findDebugLoc(MBBI);
210
211 // Add callee saved registers to move list.
212 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
213 if (CSI.empty())
214 return;
215
Tim Northover3b0846e2014-05-24 12:50:23 +0000216 for (const auto &Info : CSI) {
217 unsigned Reg = Info.getReg();
Geoff Berry62d47252016-02-25 16:36:08 +0000218 int64_t Offset =
219 MFI->getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
Tim Northover3b0846e2014-05-24 12:50:23 +0000220 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
Geoff Berry62d47252016-02-25 16:36:08 +0000221 unsigned CFIIndex = MMI.addFrameInst(
222 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
Tim Northover3b0846e2014-05-24 12:50:23 +0000223 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000224 .addCFIIndex(CFIIndex)
225 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000226 }
227}
228
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000229// Find a scratch register that we can use at the start of the prologue to
230// re-align the stack pointer. We avoid using callee-save registers since they
231// may appear to be free when this is called from canUseAsPrologue (during
232// shrink wrapping), but then no longer be free when this is called from
233// emitPrologue.
234//
235// FIXME: This is a bit conservative, since in the above case we could use one
236// of the callee-save registers as a scratch temp to re-align the stack pointer,
237// but we would then have to make sure that we were in fact saving at least one
238// callee-save register in the prologue, which is additional complexity that
239// doesn't seem worth the benefit.
240static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
241 MachineFunction *MF = MBB->getParent();
242
243 // If MBB is an entry block, use X9 as the scratch register
244 if (&MF->front() == MBB)
245 return AArch64::X9;
246
247 RegScavenger RS;
Matthias Braun8e594fd2016-04-06 02:59:44 +0000248 RS.enterBasicBlock(*MBB);
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000249
250 // Prefer X9 since it was historically used for the prologue scratch reg.
251 if (!RS.isRegUsed(AArch64::X9))
252 return AArch64::X9;
253
254 // Find a free non callee-save reg.
255 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
256 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
257 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF);
258 BitVector CalleeSaveRegs(RegInfo->getNumRegs());
259 for (unsigned i = 0; CSRegs[i]; ++i)
260 CalleeSaveRegs.set(CSRegs[i]);
261
262 BitVector Available = RS.getRegsAvailable(&AArch64::GPR64RegClass);
263 for (int AvailReg = Available.find_first(); AvailReg != -1;
264 AvailReg = Available.find_next(AvailReg))
265 if (!CalleeSaveRegs.test(AvailReg))
266 return AvailReg;
267
268 return AArch64::NoRegister;
269}
270
271bool AArch64FrameLowering::canUseAsPrologue(
272 const MachineBasicBlock &MBB) const {
273 const MachineFunction *MF = MBB.getParent();
274 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
275 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
276 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
277
278 // Don't need a scratch register if we're not going to re-align the stack.
279 if (!RegInfo->needsStackRealignment(*MF))
280 return true;
281 // Otherwise, we can use any block as long as it has a scratch register
282 // available.
283 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
284}
285
Quentin Colombet61b305e2015-05-05 17:38:16 +0000286void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
287 MachineBasicBlock &MBB) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000288 MachineBasicBlock::iterator MBBI = MBB.begin();
289 const MachineFrameInfo *MFI = MF.getFrameInfo();
290 const Function *Fn = MF.getFunction();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000291 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
292 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
293 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000294 MachineModuleInfo &MMI = MF.getMMI();
Tim Northover775aaeb2015-11-05 21:54:58 +0000295 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
296 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
297 bool HasFP = hasFP(MF);
298
299 // Debug location must be unknown since the first debug location is used
300 // to determine the end of the prologue.
301 DebugLoc DL;
302
303 // All calls are tail calls in GHC calling conv, and functions have no
304 // prologue/epilogue.
Greg Fitzgeraldfa78d082015-01-19 17:40:05 +0000305 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
306 return;
307
Tim Northover3b0846e2014-05-24 12:50:23 +0000308 int NumBytes = (int)MFI->getStackSize();
309 if (!AFI->hasStackFrame()) {
310 assert(!HasFP && "unexpected function without stack frame but with FP");
311
312 // All of the stack allocation is for locals.
313 AFI->setLocalStackSize(NumBytes);
314
Chad Rosier27c352d2016-03-14 18:24:34 +0000315 if (!NumBytes)
316 return;
Tim Northover3b0846e2014-05-24 12:50:23 +0000317 // REDZONE: If the stack size is less than 128 bytes, we don't need
318 // to actually allocate.
Chad Rosier27c352d2016-03-14 18:24:34 +0000319 if (canUseRedZone(MF))
320 ++NumRedZoneFunctions;
321 else {
Tim Northover3b0846e2014-05-24 12:50:23 +0000322 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
323 MachineInstr::FrameSetup);
324
Chad Rosier27c352d2016-03-14 18:24:34 +0000325 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
326 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
Tim Northover3b0846e2014-05-24 12:50:23 +0000327 // Encode the stack size of the leaf function.
328 unsigned CFIIndex = MMI.addFrameInst(
329 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
330 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000331 .addCFIIndex(CFIIndex)
332 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000333 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000334 return;
335 }
336
Chad Rosier27c352d2016-03-14 18:24:34 +0000337 NumBytes -= AFI->getCalleeSavedStackSize();
338 assert(NumBytes >= 0 && "Negative stack allocation size!?");
339 // All of the remaining stack allocations are for locals.
340 AFI->setLocalStackSize(NumBytes);
Tim Northover3b0846e2014-05-24 12:50:23 +0000341
342 // Move past the saves of the callee-saved registers.
Geoff Berry04bf91a2016-02-01 16:29:19 +0000343 MachineBasicBlock::iterator End = MBB.end();
344 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup))
Tim Northover3b0846e2014-05-24 12:50:23 +0000345 ++MBBI;
Tim Northover3b0846e2014-05-24 12:50:23 +0000346 if (HasFP) {
Chad Rosier27c352d2016-03-14 18:24:34 +0000347 // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
348 int FPOffset = AFI->getCalleeSavedStackSize() - 16;
349
Tim Northover3b0846e2014-05-24 12:50:23 +0000350 // Issue sub fp, sp, FPOffset or
351 // mov fp,sp when FPOffset is zero.
352 // Note: All stores of callee-saved registers are marked as "FrameSetup".
353 // This code marks the instruction(s) that set the FP also.
354 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
355 MachineInstr::FrameSetup);
356 }
357
Tim Northover3b0846e2014-05-24 12:50:23 +0000358 // Allocate space for the rest of the frame.
Chad Rosier27c352d2016-03-14 18:24:34 +0000359 if (NumBytes) {
360 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
361 unsigned scratchSPReg = AArch64::SP;
Kristof Beyls17cb8982015-04-09 08:49:47 +0000362
Chad Rosier27c352d2016-03-14 18:24:34 +0000363 if (NeedsRealignment) {
364 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
365 assert(scratchSPReg != AArch64::NoRegister);
366 }
Kristof Beyls17cb8982015-04-09 08:49:47 +0000367
Chad Rosier27c352d2016-03-14 18:24:34 +0000368 // If we're a leaf function, try using the red zone.
369 if (!canUseRedZone(MF))
370 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
371 // the correct value here, as NumBytes also includes padding bytes,
372 // which shouldn't be counted here.
373 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
374 MachineInstr::FrameSetup);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000375
Chad Rosier27c352d2016-03-14 18:24:34 +0000376 if (NeedsRealignment) {
377 const unsigned Alignment = MFI->getMaxAlignment();
378 const unsigned NrBitsToZero = countTrailingZeros(Alignment);
379 assert(NrBitsToZero > 1);
380 assert(scratchSPReg != AArch64::SP);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000381
Chad Rosier27c352d2016-03-14 18:24:34 +0000382 // SUB X9, SP, NumBytes
383 // -- X9 is temporary register, so shouldn't contain any live data here,
384 // -- free to use. This is already produced by emitFrameOffset above.
385 // AND SP, X9, 0b11111...0000
386 // The logical immediates have a non-trivial encoding. The following
387 // formula computes the encoded immediate with all ones but
388 // NrBitsToZero zero bits as least significant bits.
389 uint32_t andMaskEncoded = (1 << 12) // = N
390 | ((64 - NrBitsToZero) << 6) // immr
391 | ((64 - NrBitsToZero - 1) << 0); // imms
392
393 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
394 .addReg(scratchSPReg, RegState::Kill)
395 .addImm(andMaskEncoded);
396 AFI->setStackRealigned(true);
397 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000398 }
399
400 // If we need a base pointer, set it up here. It's whatever the value of the
401 // stack pointer is at this point. Any variable size objects will be allocated
402 // after this, so we can still use the base pointer to reference locals.
403 //
404 // FIXME: Clarify FrameSetup flags here.
405 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
406 // needed.
Kristof Beyls17cb8982015-04-09 08:49:47 +0000407 if (RegInfo->hasBasePointer(MF)) {
408 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
409 false);
410 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000411
412 if (needsFrameMoves) {
Mehdi Aminibd7287e2015-07-16 06:11:10 +0000413 const DataLayout &TD = MF.getDataLayout();
414 const int StackGrowth = -TD.getPointerSize(0);
Tim Northover3b0846e2014-05-24 12:50:23 +0000415 unsigned FramePtr = RegInfo->getFrameRegister(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000416 // An example of the prologue:
417 //
418 // .globl __foo
419 // .align 2
420 // __foo:
421 // Ltmp0:
422 // .cfi_startproc
423 // .cfi_personality 155, ___gxx_personality_v0
424 // Leh_func_begin:
425 // .cfi_lsda 16, Lexception33
426 //
427 // stp xa,bx, [sp, -#offset]!
428 // ...
429 // stp x28, x27, [sp, #offset-32]
430 // stp fp, lr, [sp, #offset-16]
431 // add fp, sp, #offset - 16
432 // sub sp, sp, #1360
433 //
434 // The Stack:
435 // +-------------------------------------------+
436 // 10000 | ........ | ........ | ........ | ........ |
437 // 10004 | ........ | ........ | ........ | ........ |
438 // +-------------------------------------------+
439 // 10008 | ........ | ........ | ........ | ........ |
440 // 1000c | ........ | ........ | ........ | ........ |
441 // +===========================================+
442 // 10010 | X28 Register |
443 // 10014 | X28 Register |
444 // +-------------------------------------------+
445 // 10018 | X27 Register |
446 // 1001c | X27 Register |
447 // +===========================================+
448 // 10020 | Frame Pointer |
449 // 10024 | Frame Pointer |
450 // +-------------------------------------------+
451 // 10028 | Link Register |
452 // 1002c | Link Register |
453 // +===========================================+
454 // 10030 | ........ | ........ | ........ | ........ |
455 // 10034 | ........ | ........ | ........ | ........ |
456 // +-------------------------------------------+
457 // 10038 | ........ | ........ | ........ | ........ |
458 // 1003c | ........ | ........ | ........ | ........ |
459 // +-------------------------------------------+
460 //
461 // [sp] = 10030 :: >>initial value<<
462 // sp = 10020 :: stp fp, lr, [sp, #-16]!
463 // fp = sp == 10020 :: mov fp, sp
464 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
465 // sp == 10010 :: >>final value<<
466 //
467 // The frame pointer (w29) points to address 10020. If we use an offset of
468 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
469 // for w27, and -32 for w28:
470 //
471 // Ltmp1:
472 // .cfi_def_cfa w29, 16
473 // Ltmp2:
474 // .cfi_offset w30, -8
475 // Ltmp3:
476 // .cfi_offset w29, -16
477 // Ltmp4:
478 // .cfi_offset w27, -24
479 // Ltmp5:
480 // .cfi_offset w28, -32
481
482 if (HasFP) {
483 // Define the current CFA rule to use the provided FP.
484 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
485 unsigned CFIIndex = MMI.addFrameInst(
486 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
487 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000488 .addCFIIndex(CFIIndex)
489 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000490 } else {
491 // Encode the stack size of the leaf function.
492 unsigned CFIIndex = MMI.addFrameInst(
493 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
494 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000495 .addCFIIndex(CFIIndex)
496 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000497 }
498
Geoff Berry62d47252016-02-25 16:36:08 +0000499 // Now emit the moves for whatever callee saved regs we have (including FP,
500 // LR if those are saved).
501 emitCalleeSavedFrameMoves(MBB, MBBI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000502 }
503}
504
Tim Northover3b0846e2014-05-24 12:50:23 +0000505void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
506 MachineBasicBlock &MBB) const {
507 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
Tim Northover3b0846e2014-05-24 12:50:23 +0000508 MachineFrameInfo *MFI = MF.getFrameInfo();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000509 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000510 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
Quentin Colombet61b305e2015-05-05 17:38:16 +0000511 DebugLoc DL;
512 bool IsTailCallReturn = false;
513 if (MBB.end() != MBBI) {
514 DL = MBBI->getDebugLoc();
515 unsigned RetOpcode = MBBI->getOpcode();
516 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
517 RetOpcode == AArch64::TCRETURNri;
518 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000519 int NumBytes = MFI->getStackSize();
520 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
521
Greg Fitzgeraldfa78d082015-01-19 17:40:05 +0000522 // All calls are tail calls in GHC calling conv, and functions have no
523 // prologue/epilogue.
524 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
525 return;
526
Kristof Beyls17cb8982015-04-09 08:49:47 +0000527 // Initial and residual are named for consistency with the prologue. Note that
Tim Northover3b0846e2014-05-24 12:50:23 +0000528 // in the epilogue, the residual adjustment is executed first.
529 uint64_t ArgumentPopSize = 0;
Quentin Colombet61b305e2015-05-05 17:38:16 +0000530 if (IsTailCallReturn) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000531 MachineOperand &StackAdjust = MBBI->getOperand(1);
532
533 // For a tail-call in a callee-pops-arguments environment, some or all of
534 // the stack may actually be in use for the call's arguments, this is
535 // calculated during LowerCall and consumed here...
536 ArgumentPopSize = StackAdjust.getImm();
537 } else {
538 // ... otherwise the amount to pop is *all* of the argument space,
539 // conveniently stored in the MachineFunctionInfo by
540 // LowerFormalArguments. This will, of course, be zero for the C calling
541 // convention.
542 ArgumentPopSize = AFI->getArgumentStackToRestore();
543 }
544
545 // The stack frame should be like below,
546 //
547 // ---------------------- ---
548 // | | |
549 // | BytesInStackArgArea| CalleeArgStackSize
550 // | (NumReusableBytes) | (of tail call)
551 // | | ---
552 // | | |
553 // ---------------------| --- |
554 // | | | |
555 // | CalleeSavedReg | | |
Geoff Berry04bf91a2016-02-01 16:29:19 +0000556 // | (CalleeSavedStackSize)| | |
Tim Northover3b0846e2014-05-24 12:50:23 +0000557 // | | | |
558 // ---------------------| | NumBytes
559 // | | StackSize (StackAdjustUp)
560 // | LocalStackSize | | |
561 // | (covering callee | | |
562 // | args) | | |
563 // | | | |
564 // ---------------------- --- ---
565 //
566 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
567 // = StackSize + ArgumentPopSize
568 //
569 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
570 // it as the 2nd argument of AArch64ISD::TC_RETURN.
Tim Northover3b0846e2014-05-24 12:50:23 +0000571
Tim Northover3b0846e2014-05-24 12:50:23 +0000572 // Move past the restores of the callee-saved registers.
Quentin Colombet61b305e2015-05-05 17:38:16 +0000573 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
Matthias Braun45419292015-12-17 03:18:47 +0000574 MachineBasicBlock::iterator Begin = MBB.begin();
575 while (LastPopI != Begin) {
576 --LastPopI;
Geoff Berry04bf91a2016-02-01 16:29:19 +0000577 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000578 ++LastPopI;
Matthias Braun45419292015-12-17 03:18:47 +0000579 break;
Tim Northover3b0846e2014-05-24 12:50:23 +0000580 }
581 }
Geoff Berry04bf91a2016-02-01 16:29:19 +0000582 NumBytes -= AFI->getCalleeSavedStackSize();
Tim Northover3b0846e2014-05-24 12:50:23 +0000583 assert(NumBytes >= 0 && "Negative stack allocation size!?");
584
585 if (!hasFP(MF)) {
Geoff Berrya1c62692016-02-23 16:54:36 +0000586 bool RedZone = canUseRedZone(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000587 // If this was a redzone leaf function, we don't need to restore the
Geoff Berrya1c62692016-02-23 16:54:36 +0000588 // stack pointer (but we may need to pop stack args for fastcc).
589 if (RedZone && ArgumentPopSize == 0)
590 return;
591
592 bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0;
593 int StackRestoreBytes = RedZone ? 0 : NumBytes;
594 if (NoCalleeSaveRestore)
595 StackRestoreBytes += ArgumentPopSize;
596 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
597 StackRestoreBytes, TII, MachineInstr::FrameDestroy);
598 // If we were able to combine the local stack pop with the argument pop,
599 // then we're done.
600 if (NoCalleeSaveRestore || ArgumentPopSize == 0)
601 return;
602 NumBytes = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000603 }
604
605 // Restore the original stack pointer.
606 // FIXME: Rather than doing the math here, we should instead just use
607 // non-post-indexed loads for the restores if we aren't actually going to
608 // be able to save any instructions.
Chad Rosier6d986552016-03-14 18:17:41 +0000609 if (MFI->hasVarSizedObjects() || AFI->isStackRealigned())
Tim Northover3b0846e2014-05-24 12:50:23 +0000610 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
Geoff Berry04bf91a2016-02-01 16:29:19 +0000611 -AFI->getCalleeSavedStackSize() + 16, TII,
612 MachineInstr::FrameDestroy);
Chad Rosier6d986552016-03-14 18:17:41 +0000613 else if (NumBytes)
614 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
615 MachineInstr::FrameDestroy);
Geoff Berrya1c62692016-02-23 16:54:36 +0000616
617 // This must be placed after the callee-save restore code because that code
618 // assumes the SP is at the same location as it was after the callee-save save
619 // code in the prologue.
620 if (ArgumentPopSize)
621 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
622 ArgumentPopSize, TII, MachineInstr::FrameDestroy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000623}
624
Tim Northover3b0846e2014-05-24 12:50:23 +0000625/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
626/// debug info. It's the same as what we use for resolving the code-gen
627/// references for now. FIXME: This can go wrong when references are
628/// SP-relative and simple call frames aren't used.
629int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
630 int FI,
631 unsigned &FrameReg) const {
632 return resolveFrameIndexReference(MF, FI, FrameReg);
633}
634
635int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
636 int FI, unsigned &FrameReg,
637 bool PreferFP) const {
638 const MachineFrameInfo *MFI = MF.getFrameInfo();
639 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +0000640 MF.getSubtarget().getRegisterInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000641 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
642 int FPOffset = MFI->getObjectOffset(FI) + 16;
643 int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
644 bool isFixed = MFI->isFixedObjectIndex(FI);
645
646 // Use frame pointer to reference fixed objects. Use it for locals if
Kristof Beyls17cb8982015-04-09 08:49:47 +0000647 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
648 // reliable as a base). Make sure useFPForScavengingIndex() does the
649 // right thing for the emergency spill slot.
Tim Northover3b0846e2014-05-24 12:50:23 +0000650 bool UseFP = false;
651 if (AFI->hasStackFrame()) {
652 // Note: Keeping the following as multiple 'if' statements rather than
653 // merging to a single expression for readability.
654 //
655 // Argument access should always use the FP.
656 if (isFixed) {
657 UseFP = hasFP(MF);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000658 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
659 !RegInfo->needsStackRealignment(MF)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000660 // Use SP or FP, whichever gives us the best chance of the offset
661 // being in range for direct access. If the FPOffset is positive,
662 // that'll always be best, as the SP will be even further away.
663 // If the FPOffset is negative, we have to keep in mind that the
664 // available offset range for negative offsets is smaller than for
665 // positive ones. If we have variable sized objects, we're stuck with
666 // using the FP regardless, though, as the SP offset is unknown
667 // and we don't have a base pointer available. If an offset is
668 // available via the FP and the SP, use whichever is closest.
669 if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
670 (FPOffset >= -256 && Offset > -FPOffset))
671 UseFP = true;
672 }
673 }
674
Kristof Beyls17cb8982015-04-09 08:49:47 +0000675 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
676 "In the presence of dynamic stack pointer realignment, "
677 "non-argument objects cannot be accessed through the frame pointer");
678
Tim Northover3b0846e2014-05-24 12:50:23 +0000679 if (UseFP) {
680 FrameReg = RegInfo->getFrameRegister(MF);
681 return FPOffset;
682 }
683
684 // Use the base pointer if we have one.
685 if (RegInfo->hasBasePointer(MF))
686 FrameReg = RegInfo->getBaseRegister();
687 else {
688 FrameReg = AArch64::SP;
689 // If we're using the red zone for this function, the SP won't actually
690 // be adjusted, so the offsets will be negative. They're also all
691 // within range of the signed 9-bit immediate instructions.
692 if (canUseRedZone(MF))
693 Offset -= AFI->getLocalStackSize();
694 }
695
696 return Offset;
697}
698
699static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
Matthias Braun74a0bd32016-04-13 21:43:16 +0000700 // Do not set a kill flag on values that are also marked as live-in. This
701 // happens with the @llvm-returnaddress intrinsic and with arguments passed in
702 // callee saved registers.
703 // Omitting the kill flags is conservatively correct even if the live-in
704 // is not used after all.
705 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
706 return getKillRegState(!IsLiveIn);
Tim Northover3b0846e2014-05-24 12:50:23 +0000707}
708
Manman Ren57518142016-04-11 21:08:06 +0000709static bool produceCompactUnwindFrame(MachineFunction &MF) {
710 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
711 AttributeSet Attrs = MF.getFunction()->getAttributes();
712 return Subtarget.isTargetMachO() &&
713 !(Subtarget.getTargetLowering()->supportSwiftError() &&
714 Attrs.hasAttrSomewhere(Attribute::SwiftError));
715}
716
717
Geoff Berry29d4a692016-02-01 19:07:06 +0000718struct RegPairInfo {
719 RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {}
720 unsigned Reg1;
721 unsigned Reg2;
722 int FrameIdx;
723 int Offset;
724 bool IsGPR;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000725 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
Geoff Berry29d4a692016-02-01 19:07:06 +0000726};
727
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000728static void computeCalleeSaveRegisterPairs(
729 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
730 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
Geoff Berry29d4a692016-02-01 19:07:06 +0000731
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000732 if (CSI.empty())
733 return;
734
735 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
736 MachineFrameInfo *MFI = MF.getFrameInfo();
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000737 CallingConv::ID CC = MF.getFunction()->getCallingConv();
Tim Northover3b0846e2014-05-24 12:50:23 +0000738 unsigned Count = CSI.size();
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000739 (void)CC;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000740 // MachO's compact unwind format relies on all registers being stored in
741 // pairs.
Manman Ren57518142016-04-11 21:08:06 +0000742 assert((!produceCompactUnwindFrame(MF) ||
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000743 CC == CallingConv::PreserveMost ||
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000744 (Count & 1) == 0) &&
745 "Odd number of callee-saved regs to spill!");
746 unsigned Offset = AFI->getCalleeSavedStackSize();
Tim Northover775aaeb2015-11-05 21:54:58 +0000747
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000748 for (unsigned i = 0; i < Count; ++i) {
Geoff Berry29d4a692016-02-01 19:07:06 +0000749 RegPairInfo RPI;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000750 RPI.Reg1 = CSI[i].getReg();
751
752 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
753 AArch64::FPR64RegClass.contains(RPI.Reg1));
754 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
755
756 // Add the next reg to the pair if it is in the same register class.
757 if (i + 1 < Count) {
758 unsigned NextReg = CSI[i + 1].getReg();
759 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
760 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
761 RPI.Reg2 = NextReg;
762 }
Geoff Berry29d4a692016-02-01 19:07:06 +0000763
Tim Northover3b0846e2014-05-24 12:50:23 +0000764 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
765 // list to come in sorted by frame index so that we can issue the store
766 // pair instructions directly. Assert if we see anything otherwise.
767 //
768 // The order of the registers in the list is controlled by
769 // getCalleeSavedRegs(), so they will always be in-order, as well.
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000770 assert((!RPI.isPaired() ||
771 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
Tim Northover3b0846e2014-05-24 12:50:23 +0000772 "Out of order callee saved regs!");
Geoff Berry29d4a692016-02-01 19:07:06 +0000773
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000774 // MachO's compact unwind format relies on all registers being stored in
775 // adjacent register pairs.
Manman Ren57518142016-04-11 21:08:06 +0000776 assert((!produceCompactUnwindFrame(MF) ||
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000777 CC == CallingConv::PreserveMost ||
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000778 (RPI.isPaired() &&
779 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
780 RPI.Reg1 + 1 == RPI.Reg2))) &&
781 "Callee-save registers not saved as adjacent register pair!");
782
783 RPI.FrameIdx = CSI[i].getFrameIdx();
784
785 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
786 // Round up size of non-pair to pair size if we need to pad the
787 // callee-save area to ensure 16-byte alignment.
788 Offset -= 16;
789 assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
790 MFI->setObjectSize(RPI.FrameIdx, 16);
791 } else
792 Offset -= RPI.isPaired() ? 16 : 8;
793 assert(Offset % 8 == 0);
794 RPI.Offset = Offset / 8;
Geoff Berry29d4a692016-02-01 19:07:06 +0000795 assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
796 "Offset out of bounds for LDP/STP immediate");
797
798 RegPairs.push_back(RPI);
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000799 if (RPI.isPaired())
800 ++i;
Geoff Berry29d4a692016-02-01 19:07:06 +0000801 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000802
803 // Align first offset to even 16-byte boundary to avoid additional SP
804 // adjustment instructions.
805 // Last pair offset is size of whole callee-save region for SP
806 // pre-dec/post-inc.
807 RegPairInfo &LastPair = RegPairs.back();
808 assert(AFI->getCalleeSavedStackSize() % 8 == 0);
809 LastPair.Offset = AFI->getCalleeSavedStackSize() / 8;
Geoff Berry29d4a692016-02-01 19:07:06 +0000810}
811
812bool AArch64FrameLowering::spillCalleeSavedRegisters(
813 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
814 const std::vector<CalleeSavedInfo> &CSI,
815 const TargetRegisterInfo *TRI) const {
816 MachineFunction &MF = *MBB.getParent();
817 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
818 DebugLoc DL;
819 SmallVector<RegPairInfo, 8> RegPairs;
820
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000821 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
Geoff Berry29d4a692016-02-01 19:07:06 +0000822
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000823 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
Geoff Berry29d4a692016-02-01 19:07:06 +0000824 ++RPII) {
825 RegPairInfo RPI = *RPII;
826 unsigned Reg1 = RPI.Reg1;
827 unsigned Reg2 = RPI.Reg2;
828 unsigned StrOpc;
829
Tim Northover3b0846e2014-05-24 12:50:23 +0000830 // Issue sequence of non-sp increment and pi sp spills for cs regs. The
831 // first spill is a pre-increment that allocates the stack.
832 // For example:
833 // stp x22, x21, [sp, #-48]! // addImm(-6)
834 // stp x20, x19, [sp, #16] // addImm(+2)
835 // stp fp, lr, [sp, #32] // addImm(+4)
836 // Rationale: This sequence saves uop updates compared to a sequence of
837 // pre-increment spills like stp xi,xj,[sp,#-16]!
Geoff Berry29d4a692016-02-01 19:07:06 +0000838 // Note: Similar rationale and sequence for restores in epilog.
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000839 bool BumpSP = RPII == RegPairs.rbegin();
Geoff Berry29d4a692016-02-01 19:07:06 +0000840 if (RPI.IsGPR) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000841 // For first spill use pre-increment store.
Geoff Berry29d4a692016-02-01 19:07:06 +0000842 if (BumpSP)
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000843 StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre;
Tim Northover3b0846e2014-05-24 12:50:23 +0000844 else
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000845 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
Geoff Berry29d4a692016-02-01 19:07:06 +0000846 } else {
Tim Northover3b0846e2014-05-24 12:50:23 +0000847 // For first spill use pre-increment store.
Geoff Berry29d4a692016-02-01 19:07:06 +0000848 if (BumpSP)
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000849 StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre;
Tim Northover3b0846e2014-05-24 12:50:23 +0000850 else
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000851 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
Geoff Berry29d4a692016-02-01 19:07:06 +0000852 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000853 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
854 if (RPI.isPaired())
855 dbgs() << ", " << TRI->getName(Reg2);
856 dbgs() << ") -> fi#(" << RPI.FrameIdx;
857 if (RPI.isPaired())
858 dbgs() << ", " << RPI.FrameIdx+1;
859 dbgs() << ")\n");
Geoff Berry29d4a692016-02-01 19:07:06 +0000860
861 const int Offset = BumpSP ? -RPI.Offset : RPI.Offset;
Tim Northover3b0846e2014-05-24 12:50:23 +0000862 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
Geoff Berry29d4a692016-02-01 19:07:06 +0000863 if (BumpSP)
Tim Northover3b0846e2014-05-24 12:50:23 +0000864 MIB.addReg(AArch64::SP, RegState::Define);
865
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000866 if (RPI.isPaired()) {
867 MBB.addLiveIn(Reg1);
868 MBB.addLiveIn(Reg2);
869 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
Tim Northover3b0846e2014-05-24 12:50:23 +0000870 .addReg(Reg1, getPrologueDeath(MF, Reg1))
871 .addReg(AArch64::SP)
872 .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
873 .setMIFlag(MachineInstr::FrameSetup);
Geoff Berryc3764062016-04-15 15:16:19 +0000874 MIB.addMemOperand(MF.getMachineMemOperand(
875 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
876 MachineMemOperand::MOStore, 8, 8));
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000877 } else {
878 MBB.addLiveIn(Reg1);
879 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
880 .addReg(AArch64::SP)
881 .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled
882 .setMIFlag(MachineInstr::FrameSetup);
883 }
Geoff Berryc3764062016-04-15 15:16:19 +0000884 MIB.addMemOperand(MF.getMachineMemOperand(
885 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
886 MachineMemOperand::MOStore, 8, 8));
Tim Northover3b0846e2014-05-24 12:50:23 +0000887 }
888 return true;
889}
890
891bool AArch64FrameLowering::restoreCalleeSavedRegisters(
892 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
893 const std::vector<CalleeSavedInfo> &CSI,
894 const TargetRegisterInfo *TRI) const {
895 MachineFunction &MF = *MBB.getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +0000896 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000897 DebugLoc DL;
Geoff Berry29d4a692016-02-01 19:07:06 +0000898 SmallVector<RegPairInfo, 8> RegPairs;
Tim Northover3b0846e2014-05-24 12:50:23 +0000899
900 if (MI != MBB.end())
901 DL = MI->getDebugLoc();
902
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000903 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
Geoff Berry29d4a692016-02-01 19:07:06 +0000904
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000905 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
Geoff Berry29d4a692016-02-01 19:07:06 +0000906 ++RPII) {
907 RegPairInfo RPI = *RPII;
908 unsigned Reg1 = RPI.Reg1;
909 unsigned Reg2 = RPI.Reg2;
910
Tim Northover3b0846e2014-05-24 12:50:23 +0000911 // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
912 // the last load is sp-pi post-increment and de-allocates the stack:
913 // For example:
914 // ldp fp, lr, [sp, #32] // addImm(+4)
915 // ldp x20, x19, [sp, #16] // addImm(+2)
916 // ldp x22, x21, [sp], #48 // addImm(+6)
917 // Note: see comment in spillCalleeSavedRegisters()
918 unsigned LdrOpc;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000919 bool BumpSP = RPII == std::prev(RegPairs.end());
Geoff Berry29d4a692016-02-01 19:07:06 +0000920 if (RPI.IsGPR) {
921 if (BumpSP)
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000922 LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000923 else
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000924 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
Geoff Berry29d4a692016-02-01 19:07:06 +0000925 } else {
926 if (BumpSP)
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000927 LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost;
Tim Northover3b0846e2014-05-24 12:50:23 +0000928 else
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000929 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
Geoff Berry29d4a692016-02-01 19:07:06 +0000930 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000931 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
932 if (RPI.isPaired())
933 dbgs() << ", " << TRI->getName(Reg2);
934 dbgs() << ") -> fi#(" << RPI.FrameIdx;
935 if (RPI.isPaired())
936 dbgs() << ", " << RPI.FrameIdx+1;
937 dbgs() << ")\n");
Tim Northover3b0846e2014-05-24 12:50:23 +0000938
Geoff Berry29d4a692016-02-01 19:07:06 +0000939 const int Offset = RPI.Offset;
Tim Northover3b0846e2014-05-24 12:50:23 +0000940 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
Geoff Berry29d4a692016-02-01 19:07:06 +0000941 if (BumpSP)
Tim Northover3b0846e2014-05-24 12:50:23 +0000942 MIB.addReg(AArch64::SP, RegState::Define);
943
Geoff Berryc3764062016-04-15 15:16:19 +0000944 if (RPI.isPaired()) {
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000945 MIB.addReg(Reg2, getDefRegState(true))
Tim Northover3b0846e2014-05-24 12:50:23 +0000946 .addReg(Reg1, getDefRegState(true))
947 .addReg(AArch64::SP)
Geoff Berry04bf91a2016-02-01 16:29:19 +0000948 .addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8]
949 // where the factor * 8 is implicit
950 .setMIFlag(MachineInstr::FrameDestroy);
Geoff Berryc3764062016-04-15 15:16:19 +0000951 MIB.addMemOperand(MF.getMachineMemOperand(
952 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
953 MachineMemOperand::MOLoad, 8, 8));
954 } else {
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000955 MIB.addReg(Reg1, getDefRegState(true))
956 .addReg(AArch64::SP)
957 .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled
958 .setMIFlag(MachineInstr::FrameDestroy);
Geoff Berryc3764062016-04-15 15:16:19 +0000959 }
960 MIB.addMemOperand(MF.getMachineMemOperand(
961 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
962 MachineMemOperand::MOLoad, 8, 8));
Tim Northover3b0846e2014-05-24 12:50:23 +0000963 }
964 return true;
965}
966
Matthias Braun02564862015-07-14 17:17:13 +0000967void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
968 BitVector &SavedRegs,
969 RegScavenger *RS) const {
970 // All calls are tail calls in GHC calling conv, and functions have no
971 // prologue/epilogue.
972 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
973 return;
974
975 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
Tim Northover3b0846e2014-05-24 12:50:23 +0000976 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +0000977 MF.getSubtarget().getRegisterInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000978 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000979 unsigned UnspilledCSGPR = AArch64::NoRegister;
980 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
Tim Northover3b0846e2014-05-24 12:50:23 +0000981
982 // The frame record needs to be created by saving the appropriate registers
983 if (hasFP(MF)) {
Matthias Braun02564862015-07-14 17:17:13 +0000984 SavedRegs.set(AArch64::FP);
985 SavedRegs.set(AArch64::LR);
Tim Northover3b0846e2014-05-24 12:50:23 +0000986 }
987
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000988 unsigned BasePointerReg = AArch64::NoRegister;
Tim Northover3b0846e2014-05-24 12:50:23 +0000989 if (RegInfo->hasBasePointer(MF))
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000990 BasePointerReg = RegInfo->getBaseRegister();
Tim Northover3b0846e2014-05-24 12:50:23 +0000991
Tim Northover3b0846e2014-05-24 12:50:23 +0000992 bool ExtraCSSpill = false;
Tim Northover3b0846e2014-05-24 12:50:23 +0000993 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000994 // Figure out which callee-saved registers to save/restore.
995 for (unsigned i = 0; CSRegs[i]; ++i) {
996 const unsigned Reg = CSRegs[i];
Tim Northover3b0846e2014-05-24 12:50:23 +0000997
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000998 // Add the base pointer register to SavedRegs if it is callee-save.
999 if (Reg == BasePointerReg)
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001000 SavedRegs.set(Reg);
Tim Northover3b0846e2014-05-24 12:50:23 +00001001
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001002 bool RegUsed = SavedRegs.test(Reg);
1003 unsigned PairedReg = CSRegs[i ^ 1];
1004 if (!RegUsed) {
1005 if (AArch64::GPR64RegClass.contains(Reg) &&
1006 !RegInfo->isReservedReg(MF, Reg)) {
1007 UnspilledCSGPR = Reg;
1008 UnspilledCSGPRPaired = PairedReg;
Tim Northover3b0846e2014-05-24 12:50:23 +00001009 }
1010 continue;
1011 }
1012
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001013 // MachO's compact unwind format relies on all registers being stored in
1014 // pairs.
1015 // FIXME: the usual format is actually better if unwinding isn't needed.
Manman Ren57518142016-04-11 21:08:06 +00001016 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001017 SavedRegs.set(PairedReg);
1018 ExtraCSSpill = true;
Tim Northover3b0846e2014-05-24 12:50:23 +00001019 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001020 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001021
1022 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
1023 for (int Reg = SavedRegs.find_first(); Reg != -1;
1024 Reg = SavedRegs.find_next(Reg))
1025 dbgs() << ' ' << PrintReg(Reg, RegInfo);
1026 dbgs() << "\n";);
1027
1028 // If any callee-saved registers are used, the frame cannot be eliminated.
1029 unsigned NumRegsSpilled = SavedRegs.count();
1030 bool CanEliminateFrame = NumRegsSpilled == 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001031
1032 // FIXME: Set BigStack if any stack slot references may be out of range.
1033 // For now, just conservatively guestimate based on unscaled indexing
1034 // range. We'll end up allocating an unnecessary spill slot a lot, but
1035 // realistically that's not a big deal at this stage of the game.
1036 // The CSR spill slots have not been allocated yet, so estimateStackSize
1037 // won't include them.
1038 MachineFrameInfo *MFI = MF.getFrameInfo();
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001039 unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled;
Tim Northover3b0846e2014-05-24 12:50:23 +00001040 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
1041 bool BigStack = (CFSize >= 256);
1042 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
1043 AFI->setHasStackFrame(true);
1044
1045 // Estimate if we might need to scavenge a register at some point in order
1046 // to materialize a stack offset. If so, either spill one additional
1047 // callee-saved register or reserve a special spill slot to facilitate
1048 // register scavenging. If we already spilled an extra callee-saved register
1049 // above to keep the number of spills even, we don't need to do anything else
1050 // here.
1051 if (BigStack && !ExtraCSSpill) {
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001052 if (UnspilledCSGPR != AArch64::NoRegister) {
1053 DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
1054 << " to get a scratch register.\n");
1055 SavedRegs.set(UnspilledCSGPR);
1056 // MachO's compact unwind format relies on all registers being stored in
1057 // pairs, so if we need to spill one extra for BigStack, then we need to
1058 // store the pair.
Manman Ren57518142016-04-11 21:08:06 +00001059 if (produceCompactUnwindFrame(MF))
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001060 SavedRegs.set(UnspilledCSGPRPaired);
Tim Northover3b0846e2014-05-24 12:50:23 +00001061 ExtraCSSpill = true;
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001062 NumRegsSpilled = SavedRegs.count();
Tim Northover3b0846e2014-05-24 12:50:23 +00001063 }
1064
1065 // If we didn't find an extra callee-saved register to spill, create
1066 // an emergency spill slot.
1067 if (!ExtraCSSpill) {
1068 const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
1069 int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
1070 RS->addScavengingFrameIndex(FI);
1071 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
1072 << " as the emergency spill slot.\n");
1073 }
1074 }
Geoff Berry04bf91a2016-02-01 16:29:19 +00001075
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001076 // Round up to register pair alignment to avoid additional SP adjustment
1077 // instructions.
1078 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
Tim Northover3b0846e2014-05-24 12:50:23 +00001079}