blob: dc916c0346613a6bcc374f854a35ccc416a64004 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of TargetFrameLowering class.
11//
Kristof Beyls17cb8982015-04-09 08:49:47 +000012// On AArch64, stack frames are structured as follows:
13//
14// The stack grows downward.
15//
16// All of the individual frame areas on the frame below are optional, i.e. it's
17// possible to create a function so that the particular area isn't present
18// in the frame.
19//
20// At function entry, the "frame" looks as follows:
21//
22// | | Higher address
23// |-----------------------------------|
24// | |
25// | arguments passed on the stack |
26// | |
27// |-----------------------------------| <- sp
28// | | Lower address
29//
30//
31// After the prologue has run, the frame has the following general structure.
32// Note that this doesn't depict the case where a red-zone is used. Also,
33// technically the last frame area (VLAs) doesn't get created until in the
34// main function body, after the prologue is run. However, it's depicted here
35// for completeness.
36//
37// | | Higher address
38// |-----------------------------------|
39// | |
40// | arguments passed on the stack |
41// | |
42// |-----------------------------------|
43// | |
44// | prev_fp, prev_lr |
45// | (a.k.a. "frame record") |
46// |-----------------------------------| <- fp(=x29)
47// | |
48// | other callee-saved registers |
49// | |
50// |-----------------------------------|
51// |.empty.space.to.make.part.below....|
52// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
53// |.the.standard.16-byte.alignment....| compile time; if present)
54// |-----------------------------------|
55// | |
56// | local variables of fixed size |
57// | including spill slots |
58// |-----------------------------------| <- bp(not defined by ABI,
59// |.variable-sized.local.variables....| LLVM chooses X19)
60// |.(VLAs)............................| (size of this area is unknown at
61// |...................................| compile time)
62// |-----------------------------------| <- sp
63// | | Lower address
64//
65//
66// To access the data in a frame, at-compile time, a constant offset must be
67// computable from one of the pointers (fp, bp, sp) to access it. The size
68// of the areas with a dotted background cannot be computed at compile-time
69// if they are present, making it required to have all three of fp, bp and
70// sp to be set up to be able to access all contents in the frame areas,
71// assuming all of the frame areas are non-empty.
72//
73// For most functions, some of the frame areas are empty. For those functions,
74// it may not be necessary to set up fp or bp:
Benjamin Kramerdf005cb2015-08-08 18:27:36 +000075// * A base pointer is definitely needed when there are both VLAs and local
Kristof Beyls17cb8982015-04-09 08:49:47 +000076// variables with more-than-default alignment requirements.
Benjamin Kramerdf005cb2015-08-08 18:27:36 +000077// * A frame pointer is definitely needed when there are local variables with
Kristof Beyls17cb8982015-04-09 08:49:47 +000078// more-than-default alignment requirements.
79//
80// In some cases when a base pointer is not strictly needed, it is generated
81// anyway when offsets from the frame pointer to access local variables become
82// so large that the offset can't be encoded in the immediate fields of loads
83// or stores.
84//
85// FIXME: also explain the redzone concept.
86// FIXME: also explain the concept of reserved call frames.
87//
Tim Northover3b0846e2014-05-24 12:50:23 +000088//===----------------------------------------------------------------------===//
89
90#include "AArch64FrameLowering.h"
91#include "AArch64InstrInfo.h"
92#include "AArch64MachineFunctionInfo.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +000093#include "AArch64RegisterInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000094#include "AArch64Subtarget.h"
95#include "AArch64TargetMachine.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +000096#include "llvm/ADT/SmallVector.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000097#include "llvm/ADT/Statistic.h"
Matthias Braun332bb5c2016-07-06 21:31:27 +000098#include "llvm/CodeGen/LivePhysRegs.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +000099#include "llvm/CodeGen/MachineBasicBlock.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000100#include "llvm/CodeGen/MachineFrameInfo.h"
101#include "llvm/CodeGen/MachineFunction.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000102#include "llvm/CodeGen/MachineInstr.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000103#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000104#include "llvm/CodeGen/MachineMemOperand.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000105#include "llvm/CodeGen/MachineModuleInfo.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000106#include "llvm/CodeGen/MachineOperand.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000107#include "llvm/CodeGen/MachineRegisterInfo.h"
108#include "llvm/CodeGen/RegisterScavenging.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000109#include "llvm/IR/Attributes.h"
110#include "llvm/IR/CallingConv.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000111#include "llvm/IR/DataLayout.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000112#include "llvm/IR/DebugLoc.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000113#include "llvm/IR/Function.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000114#include "llvm/MC/MCDwarf.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000115#include "llvm/Support/CommandLine.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000116#include "llvm/Support/Debug.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000117#include "llvm/Support/ErrorHandling.h"
118#include "llvm/Support/MathExtras.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000119#include "llvm/Support/raw_ostream.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000120#include "llvm/Target/TargetInstrInfo.h"
121#include "llvm/Target/TargetMachine.h"
122#include "llvm/Target/TargetOptions.h"
123#include "llvm/Target/TargetRegisterInfo.h"
124#include "llvm/Target/TargetSubtargetInfo.h"
125#include <cassert>
126#include <cstdint>
127#include <iterator>
128#include <vector>
Tim Northover3b0846e2014-05-24 12:50:23 +0000129
130using namespace llvm;
131
132#define DEBUG_TYPE "frame-info"
133
134static cl::opt<bool> EnableRedZone("aarch64-redzone",
135 cl::desc("enable use of redzone on AArch64"),
136 cl::init(false), cl::Hidden);
137
138STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
139
Tim Northover3b0846e2014-05-24 12:50:23 +0000140bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
141 if (!EnableRedZone)
142 return false;
143 // Don't use the red zone if the function explicitly asks us not to.
144 // This is typically used for kernel code.
Duncan P. N. Exon Smith003bb7d2015-02-14 02:09:06 +0000145 if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
Tim Northover3b0846e2014-05-24 12:50:23 +0000146 return false;
147
Matthias Braun941a7052016-07-28 18:40:00 +0000148 const MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000149 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
150 unsigned NumBytes = AFI->getLocalStackSize();
151
Matthias Braun941a7052016-07-28 18:40:00 +0000152 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
Tim Northover3b0846e2014-05-24 12:50:23 +0000153}
154
155/// hasFP - Return true if the specified function should have a dedicated frame
156/// pointer register.
157bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
Matthias Braun941a7052016-07-28 18:40:00 +0000158 const MachineFrameInfo &MFI = MF.getFrameInfo();
Eric Christopherfc6de422014-08-05 02:39:49 +0000159 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
Geoff Berry62c1a1e2016-03-02 17:58:31 +0000160 // Retain behavior of always omitting the FP for leaf functions when possible.
Matthias Braun941a7052016-07-28 18:40:00 +0000161 return (MFI.hasCalls() &&
Geoff Berry62c1a1e2016-03-02 17:58:31 +0000162 MF.getTarget().Options.DisableFramePointerElim(MF)) ||
Matthias Braun941a7052016-07-28 18:40:00 +0000163 MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
164 MFI.hasStackMap() || MFI.hasPatchPoint() ||
Geoff Berry62c1a1e2016-03-02 17:58:31 +0000165 RegInfo->needsStackRealignment(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000166}
167
168/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
169/// not required, we reserve argument space for call sites in the function
170/// immediately on entry to the current function. This eliminates the need for
171/// add/sub sp brackets around call sites. Returns true if the call frame is
172/// included as part of the stack frame.
173bool
174AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
Matthias Braun941a7052016-07-28 18:40:00 +0000175 return !MF.getFrameInfo().hasVarSizedObjects();
Tim Northover3b0846e2014-05-24 12:50:23 +0000176}
177
Hans Wennborge1a2e902016-03-31 18:33:38 +0000178MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
Tim Northover3b0846e2014-05-24 12:50:23 +0000179 MachineFunction &MF, MachineBasicBlock &MBB,
180 MachineBasicBlock::iterator I) const {
Eric Christopherfc6de422014-08-05 02:39:49 +0000181 const AArch64InstrInfo *TII =
182 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000183 DebugLoc DL = I->getDebugLoc();
Matthias Braunfa3872e2015-05-18 20:27:55 +0000184 unsigned Opc = I->getOpcode();
Tim Northover3b0846e2014-05-24 12:50:23 +0000185 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
186 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
187
Eric Christopherfc6de422014-08-05 02:39:49 +0000188 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
Tim Northover3b0846e2014-05-24 12:50:23 +0000189 if (!TFI->hasReservedCallFrame(MF)) {
190 unsigned Align = getStackAlignment();
191
192 int64_t Amount = I->getOperand(0).getImm();
Rui Ueyamada00f2f2016-01-14 21:06:47 +0000193 Amount = alignTo(Amount, Align);
Tim Northover3b0846e2014-05-24 12:50:23 +0000194 if (!IsDestroy)
195 Amount = -Amount;
196
197 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
198 // doesn't have to pop anything), then the first operand will be zero too so
199 // this adjustment is a no-op.
200 if (CalleePopAmount == 0) {
201 // FIXME: in-function stack adjustment for calls is limited to 24-bits
202 // because there's no guaranteed temporary register available.
203 //
Sylvestre Ledru469de192014-08-11 18:04:46 +0000204 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
Tim Northover3b0846e2014-05-24 12:50:23 +0000205 // 1) For offset <= 12-bit, we use LSL #0
206 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
207 // LSL #0, and the other uses LSL #12.
208 //
Chad Rosier401a4ab2016-01-19 16:50:45 +0000209 // Most call frames will be allocated at the start of a function so
Tim Northover3b0846e2014-05-24 12:50:23 +0000210 // this is OK, but it is a limitation that needs dealing with.
211 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
212 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
213 }
214 } else if (CalleePopAmount != 0) {
215 // If the calling convention demands that the callee pops arguments from the
216 // stack, we want to add it back if we have a reserved call frame.
217 assert(CalleePopAmount < 0xffffff && "call frame too large");
218 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
219 TII);
220 }
Hans Wennborge1a2e902016-03-31 18:33:38 +0000221 return MBB.erase(I);
Tim Northover3b0846e2014-05-24 12:50:23 +0000222}
223
224void AArch64FrameLowering::emitCalleeSavedFrameMoves(
Geoff Berry62d47252016-02-25 16:36:08 +0000225 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000226 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +0000227 MachineFrameInfo &MFI = MF.getFrameInfo();
Matthias Braunf23ef432016-11-30 23:48:42 +0000228 const TargetSubtargetInfo &STI = MF.getSubtarget();
229 const MCRegisterInfo *MRI = STI.getRegisterInfo();
230 const TargetInstrInfo *TII = STI.getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000231 DebugLoc DL = MBB.findDebugLoc(MBBI);
232
233 // Add callee saved registers to move list.
Matthias Braun941a7052016-07-28 18:40:00 +0000234 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000235 if (CSI.empty())
236 return;
237
Tim Northover3b0846e2014-05-24 12:50:23 +0000238 for (const auto &Info : CSI) {
239 unsigned Reg = Info.getReg();
Geoff Berry62d47252016-02-25 16:36:08 +0000240 int64_t Offset =
Matthias Braun941a7052016-07-28 18:40:00 +0000241 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
Tim Northover3b0846e2014-05-24 12:50:23 +0000242 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
Matthias Braunf23ef432016-11-30 23:48:42 +0000243 unsigned CFIIndex = MF.addFrameInst(
Geoff Berry62d47252016-02-25 16:36:08 +0000244 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
Tim Northover3b0846e2014-05-24 12:50:23 +0000245 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000246 .addCFIIndex(CFIIndex)
247 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000248 }
249}
250
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000251// Find a scratch register that we can use at the start of the prologue to
252// re-align the stack pointer. We avoid using callee-save registers since they
253// may appear to be free when this is called from canUseAsPrologue (during
254// shrink wrapping), but then no longer be free when this is called from
255// emitPrologue.
256//
257// FIXME: This is a bit conservative, since in the above case we could use one
258// of the callee-save registers as a scratch temp to re-align the stack pointer,
259// but we would then have to make sure that we were in fact saving at least one
260// callee-save register in the prologue, which is additional complexity that
261// doesn't seem worth the benefit.
262static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
263 MachineFunction *MF = MBB->getParent();
264
265 // If MBB is an entry block, use X9 as the scratch register
266 if (&MF->front() == MBB)
267 return AArch64::X9;
268
Eric Christopher60a245e2017-03-31 23:12:27 +0000269 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
270 const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
271 LivePhysRegs LiveRegs(TRI);
Matthias Braun332bb5c2016-07-06 21:31:27 +0000272 LiveRegs.addLiveIns(*MBB);
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000273
Matthias Braun332bb5c2016-07-06 21:31:27 +0000274 // Mark callee saved registers as used so we will not choose them.
Eric Christopher60a245e2017-03-31 23:12:27 +0000275 const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF);
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000276 for (unsigned i = 0; CSRegs[i]; ++i)
Matthias Braun332bb5c2016-07-06 21:31:27 +0000277 LiveRegs.addReg(CSRegs[i]);
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000278
Matthias Braun332bb5c2016-07-06 21:31:27 +0000279 // Prefer X9 since it was historically used for the prologue scratch reg.
280 const MachineRegisterInfo &MRI = MF->getRegInfo();
281 if (LiveRegs.available(MRI, AArch64::X9))
282 return AArch64::X9;
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000283
Matthias Braun332bb5c2016-07-06 21:31:27 +0000284 for (unsigned Reg : AArch64::GPR64RegClass) {
285 if (LiveRegs.available(MRI, Reg))
286 return Reg;
287 }
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000288 return AArch64::NoRegister;
289}
290
291bool AArch64FrameLowering::canUseAsPrologue(
292 const MachineBasicBlock &MBB) const {
293 const MachineFunction *MF = MBB.getParent();
294 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
295 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
296 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
297
298 // Don't need a scratch register if we're not going to re-align the stack.
299 if (!RegInfo->needsStackRealignment(*MF))
300 return true;
301 // Otherwise, we can use any block as long as it has a scratch register
302 // available.
303 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
304}
305
Geoff Berrya5335642016-05-06 16:34:59 +0000306bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
307 MachineFunction &MF, unsigned StackBumpBytes) const {
308 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Matthias Braun941a7052016-07-28 18:40:00 +0000309 const MachineFrameInfo &MFI = MF.getFrameInfo();
Geoff Berrya5335642016-05-06 16:34:59 +0000310 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
311 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
312
313 if (AFI->getLocalStackSize() == 0)
314 return false;
315
316 // 512 is the maximum immediate for stp/ldp that will be used for
317 // callee-save save/restores
318 if (StackBumpBytes >= 512)
319 return false;
320
Matthias Braun941a7052016-07-28 18:40:00 +0000321 if (MFI.hasVarSizedObjects())
Geoff Berrya5335642016-05-06 16:34:59 +0000322 return false;
323
324 if (RegInfo->needsStackRealignment(MF))
325 return false;
326
327 // This isn't strictly necessary, but it simplifies things a bit since the
328 // current RedZone handling code assumes the SP is adjusted by the
329 // callee-save save/restore code.
330 if (canUseRedZone(MF))
331 return false;
332
333 return true;
334}
335
336// Convert callee-save register save/restore instruction to do stack pointer
337// decrement/increment to allocate/deallocate the callee-save stack area by
338// converting store/load to use pre/post increment version.
339static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000340 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
341 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
Geoff Berrya5335642016-05-06 16:34:59 +0000342 unsigned NewOpc;
343 bool NewIsUnscaled = false;
344 switch (MBBI->getOpcode()) {
345 default:
346 llvm_unreachable("Unexpected callee-save save/restore opcode!");
347 case AArch64::STPXi:
348 NewOpc = AArch64::STPXpre;
349 break;
350 case AArch64::STPDi:
351 NewOpc = AArch64::STPDpre;
352 break;
353 case AArch64::STRXui:
354 NewOpc = AArch64::STRXpre;
355 NewIsUnscaled = true;
356 break;
357 case AArch64::STRDui:
358 NewOpc = AArch64::STRDpre;
359 NewIsUnscaled = true;
360 break;
361 case AArch64::LDPXi:
362 NewOpc = AArch64::LDPXpost;
363 break;
364 case AArch64::LDPDi:
365 NewOpc = AArch64::LDPDpost;
366 break;
367 case AArch64::LDRXui:
368 NewOpc = AArch64::LDRXpost;
369 NewIsUnscaled = true;
370 break;
371 case AArch64::LDRDui:
372 NewOpc = AArch64::LDRDpost;
373 NewIsUnscaled = true;
374 break;
375 }
376
377 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
378 MIB.addReg(AArch64::SP, RegState::Define);
379
380 // Copy all operands other than the immediate offset.
381 unsigned OpndIdx = 0;
382 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
383 ++OpndIdx)
Diana Picus116bbab2017-01-13 09:58:52 +0000384 MIB.add(MBBI->getOperand(OpndIdx));
Geoff Berrya5335642016-05-06 16:34:59 +0000385
386 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
387 "Unexpected immediate offset in first/last callee-save save/restore "
388 "instruction!");
389 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
390 "Unexpected base register in callee-save save/restore instruction!");
391 // Last operand is immediate offset that needs fixing.
392 assert(CSStackSizeInc % 8 == 0);
393 int64_t CSStackSizeIncImm = CSStackSizeInc;
394 if (!NewIsUnscaled)
395 CSStackSizeIncImm /= 8;
396 MIB.addImm(CSStackSizeIncImm);
397
398 MIB.setMIFlags(MBBI->getFlags());
399 MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
400
401 return std::prev(MBB.erase(MBBI));
402}
403
404// Fixup callee-save register save/restore instructions to take into account
405// combined SP bump by adding the local stack size to the stack offsets.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000406static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
Geoff Berrya5335642016-05-06 16:34:59 +0000407 unsigned LocalStackSize) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000408 unsigned Opc = MI.getOpcode();
Geoff Berrya5335642016-05-06 16:34:59 +0000409 (void)Opc;
410 assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
411 Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
412 Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
413 Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
414 "Unexpected callee-save save/restore opcode!");
415
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000416 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
417 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
Geoff Berrya5335642016-05-06 16:34:59 +0000418 "Unexpected base register in callee-save save/restore instruction!");
419 // Last operand is immediate offset that needs fixing.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000420 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
Geoff Berrya5335642016-05-06 16:34:59 +0000421 // All generated opcodes have scaled offsets.
422 assert(LocalStackSize % 8 == 0);
423 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
424}
425
Quentin Colombet61b305e2015-05-05 17:38:16 +0000426void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
427 MachineBasicBlock &MBB) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000428 MachineBasicBlock::iterator MBBI = MBB.begin();
Matthias Braun941a7052016-07-28 18:40:00 +0000429 const MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000430 const Function *Fn = MF.getFunction();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000431 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
432 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
433 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000434 MachineModuleInfo &MMI = MF.getMMI();
Tim Northover775aaeb2015-11-05 21:54:58 +0000435 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
436 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
437 bool HasFP = hasFP(MF);
438
439 // Debug location must be unknown since the first debug location is used
440 // to determine the end of the prologue.
441 DebugLoc DL;
442
443 // All calls are tail calls in GHC calling conv, and functions have no
444 // prologue/epilogue.
Greg Fitzgeraldfa78d082015-01-19 17:40:05 +0000445 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
446 return;
447
Matthias Braun941a7052016-07-28 18:40:00 +0000448 int NumBytes = (int)MFI.getStackSize();
Tim Northover3b0846e2014-05-24 12:50:23 +0000449 if (!AFI->hasStackFrame()) {
450 assert(!HasFP && "unexpected function without stack frame but with FP");
451
452 // All of the stack allocation is for locals.
453 AFI->setLocalStackSize(NumBytes);
454
Chad Rosier27c352d2016-03-14 18:24:34 +0000455 if (!NumBytes)
456 return;
Tim Northover3b0846e2014-05-24 12:50:23 +0000457 // REDZONE: If the stack size is less than 128 bytes, we don't need
458 // to actually allocate.
Chad Rosier27c352d2016-03-14 18:24:34 +0000459 if (canUseRedZone(MF))
460 ++NumRedZoneFunctions;
461 else {
Tim Northover3b0846e2014-05-24 12:50:23 +0000462 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
463 MachineInstr::FrameSetup);
464
Chad Rosier27c352d2016-03-14 18:24:34 +0000465 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
466 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
Tim Northover3b0846e2014-05-24 12:50:23 +0000467 // Encode the stack size of the leaf function.
Matthias Braunf23ef432016-11-30 23:48:42 +0000468 unsigned CFIIndex = MF.addFrameInst(
Tim Northover3b0846e2014-05-24 12:50:23 +0000469 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
470 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000471 .addCFIIndex(CFIIndex)
472 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000473 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000474 return;
475 }
476
Geoff Berrya5335642016-05-06 16:34:59 +0000477 auto CSStackSize = AFI->getCalleeSavedStackSize();
Chad Rosier27c352d2016-03-14 18:24:34 +0000478 // All of the remaining stack allocations are for locals.
Geoff Berrya5335642016-05-06 16:34:59 +0000479 AFI->setLocalStackSize(NumBytes - CSStackSize);
Tim Northover3b0846e2014-05-24 12:50:23 +0000480
Geoff Berrya5335642016-05-06 16:34:59 +0000481 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
482 if (CombineSPBump) {
483 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
484 MachineInstr::FrameSetup);
485 NumBytes = 0;
486 } else if (CSStackSize != 0) {
487 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
488 -CSStackSize);
489 NumBytes -= CSStackSize;
490 }
491 assert(NumBytes >= 0 && "Negative stack allocation size!?");
492
493 // Move past the saves of the callee-saved registers, fixing up the offsets
494 // and pre-inc if we decided to combine the callee-save and local stack
495 // pointer bump above.
Geoff Berry04bf91a2016-02-01 16:29:19 +0000496 MachineBasicBlock::iterator End = MBB.end();
Geoff Berrya5335642016-05-06 16:34:59 +0000497 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
498 if (CombineSPBump)
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000499 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
Tim Northover3b0846e2014-05-24 12:50:23 +0000500 ++MBBI;
Geoff Berrya5335642016-05-06 16:34:59 +0000501 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000502 if (HasFP) {
Chad Rosier27c352d2016-03-14 18:24:34 +0000503 // Only set up FP if we actually need to. Frame pointer is fp = sp - 16.
Geoff Berrya5335642016-05-06 16:34:59 +0000504 int FPOffset = CSStackSize - 16;
505 if (CombineSPBump)
506 FPOffset += AFI->getLocalStackSize();
Chad Rosier27c352d2016-03-14 18:24:34 +0000507
Tim Northover3b0846e2014-05-24 12:50:23 +0000508 // Issue sub fp, sp, FPOffset or
509 // mov fp,sp when FPOffset is zero.
510 // Note: All stores of callee-saved registers are marked as "FrameSetup".
511 // This code marks the instruction(s) that set the FP also.
512 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
513 MachineInstr::FrameSetup);
514 }
515
Tim Northover3b0846e2014-05-24 12:50:23 +0000516 // Allocate space for the rest of the frame.
Chad Rosier27c352d2016-03-14 18:24:34 +0000517 if (NumBytes) {
518 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
519 unsigned scratchSPReg = AArch64::SP;
Kristof Beyls17cb8982015-04-09 08:49:47 +0000520
Chad Rosier27c352d2016-03-14 18:24:34 +0000521 if (NeedsRealignment) {
522 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
523 assert(scratchSPReg != AArch64::NoRegister);
524 }
Kristof Beyls17cb8982015-04-09 08:49:47 +0000525
Chad Rosier27c352d2016-03-14 18:24:34 +0000526 // If we're a leaf function, try using the red zone.
527 if (!canUseRedZone(MF))
528 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
529 // the correct value here, as NumBytes also includes padding bytes,
530 // which shouldn't be counted here.
531 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
532 MachineInstr::FrameSetup);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000533
Chad Rosier27c352d2016-03-14 18:24:34 +0000534 if (NeedsRealignment) {
Matthias Braun941a7052016-07-28 18:40:00 +0000535 const unsigned Alignment = MFI.getMaxAlignment();
Chad Rosier27c352d2016-03-14 18:24:34 +0000536 const unsigned NrBitsToZero = countTrailingZeros(Alignment);
537 assert(NrBitsToZero > 1);
538 assert(scratchSPReg != AArch64::SP);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000539
Chad Rosier27c352d2016-03-14 18:24:34 +0000540 // SUB X9, SP, NumBytes
541 // -- X9 is temporary register, so shouldn't contain any live data here,
542 // -- free to use. This is already produced by emitFrameOffset above.
543 // AND SP, X9, 0b11111...0000
544 // The logical immediates have a non-trivial encoding. The following
545 // formula computes the encoded immediate with all ones but
546 // NrBitsToZero zero bits as least significant bits.
547 uint32_t andMaskEncoded = (1 << 12) // = N
548 | ((64 - NrBitsToZero) << 6) // immr
549 | ((64 - NrBitsToZero - 1) << 0); // imms
550
551 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
552 .addReg(scratchSPReg, RegState::Kill)
553 .addImm(andMaskEncoded);
554 AFI->setStackRealigned(true);
555 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000556 }
557
558 // If we need a base pointer, set it up here. It's whatever the value of the
559 // stack pointer is at this point. Any variable size objects will be allocated
560 // after this, so we can still use the base pointer to reference locals.
561 //
562 // FIXME: Clarify FrameSetup flags here.
563 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
564 // needed.
Kristof Beyls17cb8982015-04-09 08:49:47 +0000565 if (RegInfo->hasBasePointer(MF)) {
566 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
567 false);
568 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000569
570 if (needsFrameMoves) {
Mehdi Aminibd7287e2015-07-16 06:11:10 +0000571 const DataLayout &TD = MF.getDataLayout();
572 const int StackGrowth = -TD.getPointerSize(0);
Tim Northover3b0846e2014-05-24 12:50:23 +0000573 unsigned FramePtr = RegInfo->getFrameRegister(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000574 // An example of the prologue:
575 //
576 // .globl __foo
577 // .align 2
578 // __foo:
579 // Ltmp0:
580 // .cfi_startproc
581 // .cfi_personality 155, ___gxx_personality_v0
582 // Leh_func_begin:
583 // .cfi_lsda 16, Lexception33
584 //
585 // stp xa,bx, [sp, -#offset]!
586 // ...
587 // stp x28, x27, [sp, #offset-32]
588 // stp fp, lr, [sp, #offset-16]
589 // add fp, sp, #offset - 16
590 // sub sp, sp, #1360
591 //
592 // The Stack:
593 // +-------------------------------------------+
594 // 10000 | ........ | ........ | ........ | ........ |
595 // 10004 | ........ | ........ | ........ | ........ |
596 // +-------------------------------------------+
597 // 10008 | ........ | ........ | ........ | ........ |
598 // 1000c | ........ | ........ | ........ | ........ |
599 // +===========================================+
600 // 10010 | X28 Register |
601 // 10014 | X28 Register |
602 // +-------------------------------------------+
603 // 10018 | X27 Register |
604 // 1001c | X27 Register |
605 // +===========================================+
606 // 10020 | Frame Pointer |
607 // 10024 | Frame Pointer |
608 // +-------------------------------------------+
609 // 10028 | Link Register |
610 // 1002c | Link Register |
611 // +===========================================+
612 // 10030 | ........ | ........ | ........ | ........ |
613 // 10034 | ........ | ........ | ........ | ........ |
614 // +-------------------------------------------+
615 // 10038 | ........ | ........ | ........ | ........ |
616 // 1003c | ........ | ........ | ........ | ........ |
617 // +-------------------------------------------+
618 //
619 // [sp] = 10030 :: >>initial value<<
620 // sp = 10020 :: stp fp, lr, [sp, #-16]!
621 // fp = sp == 10020 :: mov fp, sp
622 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
623 // sp == 10010 :: >>final value<<
624 //
625 // The frame pointer (w29) points to address 10020. If we use an offset of
626 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
627 // for w27, and -32 for w28:
628 //
629 // Ltmp1:
630 // .cfi_def_cfa w29, 16
631 // Ltmp2:
632 // .cfi_offset w30, -8
633 // Ltmp3:
634 // .cfi_offset w29, -16
635 // Ltmp4:
636 // .cfi_offset w27, -24
637 // Ltmp5:
638 // .cfi_offset w28, -32
639
640 if (HasFP) {
641 // Define the current CFA rule to use the provided FP.
642 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
Matthias Braunf23ef432016-11-30 23:48:42 +0000643 unsigned CFIIndex = MF.addFrameInst(
Tim Northover3b0846e2014-05-24 12:50:23 +0000644 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
645 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000646 .addCFIIndex(CFIIndex)
647 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000648 } else {
649 // Encode the stack size of the leaf function.
Matthias Braunf23ef432016-11-30 23:48:42 +0000650 unsigned CFIIndex = MF.addFrameInst(
Matthias Braun941a7052016-07-28 18:40:00 +0000651 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
Tim Northover3b0846e2014-05-24 12:50:23 +0000652 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000653 .addCFIIndex(CFIIndex)
654 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000655 }
656
Geoff Berry62d47252016-02-25 16:36:08 +0000657 // Now emit the moves for whatever callee saved regs we have (including FP,
658 // LR if those are saved).
659 emitCalleeSavedFrameMoves(MBB, MBBI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000660 }
661}
662
Tim Northover3b0846e2014-05-24 12:50:23 +0000663void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
664 MachineBasicBlock &MBB) const {
665 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
Matthias Braun941a7052016-07-28 18:40:00 +0000666 MachineFrameInfo &MFI = MF.getFrameInfo();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000667 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000668 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
Quentin Colombet61b305e2015-05-05 17:38:16 +0000669 DebugLoc DL;
670 bool IsTailCallReturn = false;
671 if (MBB.end() != MBBI) {
672 DL = MBBI->getDebugLoc();
673 unsigned RetOpcode = MBBI->getOpcode();
674 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
675 RetOpcode == AArch64::TCRETURNri;
676 }
Matthias Braun941a7052016-07-28 18:40:00 +0000677 int NumBytes = MFI.getStackSize();
Tim Northover3b0846e2014-05-24 12:50:23 +0000678 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
679
Greg Fitzgeraldfa78d082015-01-19 17:40:05 +0000680 // All calls are tail calls in GHC calling conv, and functions have no
681 // prologue/epilogue.
682 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
683 return;
684
Kristof Beyls17cb8982015-04-09 08:49:47 +0000685 // Initial and residual are named for consistency with the prologue. Note that
Tim Northover3b0846e2014-05-24 12:50:23 +0000686 // in the epilogue, the residual adjustment is executed first.
687 uint64_t ArgumentPopSize = 0;
Quentin Colombet61b305e2015-05-05 17:38:16 +0000688 if (IsTailCallReturn) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000689 MachineOperand &StackAdjust = MBBI->getOperand(1);
690
691 // For a tail-call in a callee-pops-arguments environment, some or all of
692 // the stack may actually be in use for the call's arguments, this is
693 // calculated during LowerCall and consumed here...
694 ArgumentPopSize = StackAdjust.getImm();
695 } else {
696 // ... otherwise the amount to pop is *all* of the argument space,
697 // conveniently stored in the MachineFunctionInfo by
698 // LowerFormalArguments. This will, of course, be zero for the C calling
699 // convention.
700 ArgumentPopSize = AFI->getArgumentStackToRestore();
701 }
702
703 // The stack frame should be like below,
704 //
705 // ---------------------- ---
706 // | | |
707 // | BytesInStackArgArea| CalleeArgStackSize
708 // | (NumReusableBytes) | (of tail call)
709 // | | ---
710 // | | |
711 // ---------------------| --- |
712 // | | | |
713 // | CalleeSavedReg | | |
Geoff Berry04bf91a2016-02-01 16:29:19 +0000714 // | (CalleeSavedStackSize)| | |
Tim Northover3b0846e2014-05-24 12:50:23 +0000715 // | | | |
716 // ---------------------| | NumBytes
717 // | | StackSize (StackAdjustUp)
718 // | LocalStackSize | | |
719 // | (covering callee | | |
720 // | args) | | |
721 // | | | |
722 // ---------------------- --- ---
723 //
724 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
725 // = StackSize + ArgumentPopSize
726 //
727 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
728 // it as the 2nd argument of AArch64ISD::TC_RETURN.
Tim Northover3b0846e2014-05-24 12:50:23 +0000729
Geoff Berrya5335642016-05-06 16:34:59 +0000730 auto CSStackSize = AFI->getCalleeSavedStackSize();
731 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
732
733 if (!CombineSPBump && CSStackSize != 0)
734 convertCalleeSaveRestoreToSPPrePostIncDec(
735 MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize);
736
Tim Northover3b0846e2014-05-24 12:50:23 +0000737 // Move past the restores of the callee-saved registers.
Quentin Colombet61b305e2015-05-05 17:38:16 +0000738 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
Matthias Braun45419292015-12-17 03:18:47 +0000739 MachineBasicBlock::iterator Begin = MBB.begin();
740 while (LastPopI != Begin) {
741 --LastPopI;
Geoff Berry04bf91a2016-02-01 16:29:19 +0000742 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000743 ++LastPopI;
Matthias Braun45419292015-12-17 03:18:47 +0000744 break;
Geoff Berrya5335642016-05-06 16:34:59 +0000745 } else if (CombineSPBump)
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000746 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
Tim Northover3b0846e2014-05-24 12:50:23 +0000747 }
Geoff Berrya5335642016-05-06 16:34:59 +0000748
749 // If there is a single SP update, insert it before the ret and we're done.
750 if (CombineSPBump) {
751 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
752 NumBytes + ArgumentPopSize, TII,
753 MachineInstr::FrameDestroy);
754 return;
755 }
756
757 NumBytes -= CSStackSize;
Tim Northover3b0846e2014-05-24 12:50:23 +0000758 assert(NumBytes >= 0 && "Negative stack allocation size!?");
759
760 if (!hasFP(MF)) {
Geoff Berrya1c62692016-02-23 16:54:36 +0000761 bool RedZone = canUseRedZone(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000762 // If this was a redzone leaf function, we don't need to restore the
Geoff Berrya1c62692016-02-23 16:54:36 +0000763 // stack pointer (but we may need to pop stack args for fastcc).
764 if (RedZone && ArgumentPopSize == 0)
765 return;
766
Geoff Berrya5335642016-05-06 16:34:59 +0000767 bool NoCalleeSaveRestore = CSStackSize == 0;
Geoff Berrya1c62692016-02-23 16:54:36 +0000768 int StackRestoreBytes = RedZone ? 0 : NumBytes;
769 if (NoCalleeSaveRestore)
770 StackRestoreBytes += ArgumentPopSize;
771 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
772 StackRestoreBytes, TII, MachineInstr::FrameDestroy);
773 // If we were able to combine the local stack pop with the argument pop,
774 // then we're done.
775 if (NoCalleeSaveRestore || ArgumentPopSize == 0)
776 return;
777 NumBytes = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000778 }
779
780 // Restore the original stack pointer.
781 // FIXME: Rather than doing the math here, we should instead just use
782 // non-post-indexed loads for the restores if we aren't actually going to
783 // be able to save any instructions.
Matthias Braun941a7052016-07-28 18:40:00 +0000784 if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
Tim Northover3b0846e2014-05-24 12:50:23 +0000785 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
Geoff Berrya5335642016-05-06 16:34:59 +0000786 -CSStackSize + 16, TII, MachineInstr::FrameDestroy);
Chad Rosier6d986552016-03-14 18:17:41 +0000787 else if (NumBytes)
788 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
789 MachineInstr::FrameDestroy);
Geoff Berrya1c62692016-02-23 16:54:36 +0000790
791 // This must be placed after the callee-save restore code because that code
792 // assumes the SP is at the same location as it was after the callee-save save
793 // code in the prologue.
794 if (ArgumentPopSize)
795 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
796 ArgumentPopSize, TII, MachineInstr::FrameDestroy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000797}
798
Tim Northover3b0846e2014-05-24 12:50:23 +0000799/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
800/// debug info. It's the same as what we use for resolving the code-gen
801/// references for now. FIXME: This can go wrong when references are
802/// SP-relative and simple call frames aren't used.
803int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
804 int FI,
805 unsigned &FrameReg) const {
806 return resolveFrameIndexReference(MF, FI, FrameReg);
807}
808
809int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
810 int FI, unsigned &FrameReg,
811 bool PreferFP) const {
Matthias Braun941a7052016-07-28 18:40:00 +0000812 const MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000813 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +0000814 MF.getSubtarget().getRegisterInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000815 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Matthias Braun941a7052016-07-28 18:40:00 +0000816 int FPOffset = MFI.getObjectOffset(FI) + 16;
817 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
818 bool isFixed = MFI.isFixedObjectIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000819
820 // Use frame pointer to reference fixed objects. Use it for locals if
Kristof Beyls17cb8982015-04-09 08:49:47 +0000821 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
822 // reliable as a base). Make sure useFPForScavengingIndex() does the
823 // right thing for the emergency spill slot.
Tim Northover3b0846e2014-05-24 12:50:23 +0000824 bool UseFP = false;
825 if (AFI->hasStackFrame()) {
826 // Note: Keeping the following as multiple 'if' statements rather than
827 // merging to a single expression for readability.
828 //
829 // Argument access should always use the FP.
830 if (isFixed) {
831 UseFP = hasFP(MF);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000832 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
833 !RegInfo->needsStackRealignment(MF)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000834 // Use SP or FP, whichever gives us the best chance of the offset
835 // being in range for direct access. If the FPOffset is positive,
836 // that'll always be best, as the SP will be even further away.
837 // If the FPOffset is negative, we have to keep in mind that the
838 // available offset range for negative offsets is smaller than for
839 // positive ones. If we have variable sized objects, we're stuck with
840 // using the FP regardless, though, as the SP offset is unknown
841 // and we don't have a base pointer available. If an offset is
842 // available via the FP and the SP, use whichever is closest.
Matthias Braun941a7052016-07-28 18:40:00 +0000843 if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 ||
Tim Northover3b0846e2014-05-24 12:50:23 +0000844 (FPOffset >= -256 && Offset > -FPOffset))
845 UseFP = true;
846 }
847 }
848
Kristof Beyls17cb8982015-04-09 08:49:47 +0000849 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
850 "In the presence of dynamic stack pointer realignment, "
851 "non-argument objects cannot be accessed through the frame pointer");
852
Tim Northover3b0846e2014-05-24 12:50:23 +0000853 if (UseFP) {
854 FrameReg = RegInfo->getFrameRegister(MF);
855 return FPOffset;
856 }
857
858 // Use the base pointer if we have one.
859 if (RegInfo->hasBasePointer(MF))
860 FrameReg = RegInfo->getBaseRegister();
861 else {
862 FrameReg = AArch64::SP;
863 // If we're using the red zone for this function, the SP won't actually
864 // be adjusted, so the offsets will be negative. They're also all
865 // within range of the signed 9-bit immediate instructions.
866 if (canUseRedZone(MF))
867 Offset -= AFI->getLocalStackSize();
868 }
869
870 return Offset;
871}
872
873static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
Matthias Braun74a0bd32016-04-13 21:43:16 +0000874 // Do not set a kill flag on values that are also marked as live-in. This
875 // happens with the @llvm-returnaddress intrinsic and with arguments passed in
876 // callee saved registers.
877 // Omitting the kill flags is conservatively correct even if the live-in
878 // is not used after all.
879 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
880 return getKillRegState(!IsLiveIn);
Tim Northover3b0846e2014-05-24 12:50:23 +0000881}
882
Manman Ren57518142016-04-11 21:08:06 +0000883static bool produceCompactUnwindFrame(MachineFunction &MF) {
884 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
Reid Klecknerb5180542017-03-21 16:57:19 +0000885 AttributeList Attrs = MF.getFunction()->getAttributes();
Manman Ren57518142016-04-11 21:08:06 +0000886 return Subtarget.isTargetMachO() &&
887 !(Subtarget.getTargetLowering()->supportSwiftError() &&
888 Attrs.hasAttrSomewhere(Attribute::SwiftError));
889}
890
Benjamin Kramerb7d33112016-08-06 11:13:10 +0000891namespace {
Eugene Zelenko11f69072017-01-25 00:29:26 +0000892
Geoff Berry29d4a692016-02-01 19:07:06 +0000893struct RegPairInfo {
Eugene Zelenko11f69072017-01-25 00:29:26 +0000894 unsigned Reg1 = AArch64::NoRegister;
895 unsigned Reg2 = AArch64::NoRegister;
Geoff Berry29d4a692016-02-01 19:07:06 +0000896 int FrameIdx;
897 int Offset;
898 bool IsGPR;
Eugene Zelenko11f69072017-01-25 00:29:26 +0000899
900 RegPairInfo() = default;
901
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000902 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
Geoff Berry29d4a692016-02-01 19:07:06 +0000903};
Eugene Zelenko11f69072017-01-25 00:29:26 +0000904
Benjamin Kramerb7d33112016-08-06 11:13:10 +0000905} // end anonymous namespace
Geoff Berry29d4a692016-02-01 19:07:06 +0000906
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000907static void computeCalleeSaveRegisterPairs(
908 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
909 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
Geoff Berry29d4a692016-02-01 19:07:06 +0000910
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000911 if (CSI.empty())
912 return;
913
914 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Matthias Braun941a7052016-07-28 18:40:00 +0000915 MachineFrameInfo &MFI = MF.getFrameInfo();
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000916 CallingConv::ID CC = MF.getFunction()->getCallingConv();
Tim Northover3b0846e2014-05-24 12:50:23 +0000917 unsigned Count = CSI.size();
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000918 (void)CC;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000919 // MachO's compact unwind format relies on all registers being stored in
920 // pairs.
Manman Ren57518142016-04-11 21:08:06 +0000921 assert((!produceCompactUnwindFrame(MF) ||
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000922 CC == CallingConv::PreserveMost ||
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000923 (Count & 1) == 0) &&
924 "Odd number of callee-saved regs to spill!");
925 unsigned Offset = AFI->getCalleeSavedStackSize();
Tim Northover775aaeb2015-11-05 21:54:58 +0000926
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000927 for (unsigned i = 0; i < Count; ++i) {
Geoff Berry29d4a692016-02-01 19:07:06 +0000928 RegPairInfo RPI;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000929 RPI.Reg1 = CSI[i].getReg();
930
931 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
932 AArch64::FPR64RegClass.contains(RPI.Reg1));
933 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
934
935 // Add the next reg to the pair if it is in the same register class.
936 if (i + 1 < Count) {
937 unsigned NextReg = CSI[i + 1].getReg();
938 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
939 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
940 RPI.Reg2 = NextReg;
941 }
Geoff Berry29d4a692016-02-01 19:07:06 +0000942
Tim Northover3b0846e2014-05-24 12:50:23 +0000943 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
944 // list to come in sorted by frame index so that we can issue the store
945 // pair instructions directly. Assert if we see anything otherwise.
946 //
947 // The order of the registers in the list is controlled by
948 // getCalleeSavedRegs(), so they will always be in-order, as well.
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000949 assert((!RPI.isPaired() ||
950 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
Tim Northover3b0846e2014-05-24 12:50:23 +0000951 "Out of order callee saved regs!");
Geoff Berry29d4a692016-02-01 19:07:06 +0000952
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000953 // MachO's compact unwind format relies on all registers being stored in
954 // adjacent register pairs.
Manman Ren57518142016-04-11 21:08:06 +0000955 assert((!produceCompactUnwindFrame(MF) ||
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000956 CC == CallingConv::PreserveMost ||
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000957 (RPI.isPaired() &&
958 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
959 RPI.Reg1 + 1 == RPI.Reg2))) &&
960 "Callee-save registers not saved as adjacent register pair!");
961
962 RPI.FrameIdx = CSI[i].getFrameIdx();
963
964 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
965 // Round up size of non-pair to pair size if we need to pad the
966 // callee-save area to ensure 16-byte alignment.
967 Offset -= 16;
Matthias Braun941a7052016-07-28 18:40:00 +0000968 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
969 MFI.setObjectAlignment(RPI.FrameIdx, 16);
Geoff Berry66f6b652016-06-02 16:22:07 +0000970 AFI->setCalleeSaveStackHasFreeSpace(true);
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000971 } else
972 Offset -= RPI.isPaired() ? 16 : 8;
973 assert(Offset % 8 == 0);
974 RPI.Offset = Offset / 8;
Geoff Berry29d4a692016-02-01 19:07:06 +0000975 assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
976 "Offset out of bounds for LDP/STP immediate");
977
978 RegPairs.push_back(RPI);
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000979 if (RPI.isPaired())
980 ++i;
Geoff Berry29d4a692016-02-01 19:07:06 +0000981 }
982}
983
984bool AArch64FrameLowering::spillCalleeSavedRegisters(
985 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
986 const std::vector<CalleeSavedInfo> &CSI,
987 const TargetRegisterInfo *TRI) const {
988 MachineFunction &MF = *MBB.getParent();
989 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
990 DebugLoc DL;
991 SmallVector<RegPairInfo, 8> RegPairs;
992
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000993 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
Geoff Berry29d4a692016-02-01 19:07:06 +0000994
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000995 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
Geoff Berry29d4a692016-02-01 19:07:06 +0000996 ++RPII) {
997 RegPairInfo RPI = *RPII;
998 unsigned Reg1 = RPI.Reg1;
999 unsigned Reg2 = RPI.Reg2;
1000 unsigned StrOpc;
1001
Geoff Berrya5335642016-05-06 16:34:59 +00001002 // Issue sequence of spills for cs regs. The first spill may be converted
1003 // to a pre-decrement store later by emitPrologue if the callee-save stack
1004 // area allocation can't be combined with the local stack area allocation.
Tim Northover3b0846e2014-05-24 12:50:23 +00001005 // For example:
Geoff Berrya5335642016-05-06 16:34:59 +00001006 // stp x22, x21, [sp, #0] // addImm(+0)
Tim Northover3b0846e2014-05-24 12:50:23 +00001007 // stp x20, x19, [sp, #16] // addImm(+2)
1008 // stp fp, lr, [sp, #32] // addImm(+4)
1009 // Rationale: This sequence saves uop updates compared to a sequence of
1010 // pre-increment spills like stp xi,xj,[sp,#-16]!
Geoff Berry29d4a692016-02-01 19:07:06 +00001011 // Note: Similar rationale and sequence for restores in epilog.
Geoff Berrya5335642016-05-06 16:34:59 +00001012 if (RPI.IsGPR)
1013 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1014 else
1015 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001016 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
1017 if (RPI.isPaired())
1018 dbgs() << ", " << TRI->getName(Reg2);
1019 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1020 if (RPI.isPaired())
1021 dbgs() << ", " << RPI.FrameIdx+1;
1022 dbgs() << ")\n");
Geoff Berry29d4a692016-02-01 19:07:06 +00001023
Tim Northover3b0846e2014-05-24 12:50:23 +00001024 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
Geoff Berrya5335642016-05-06 16:34:59 +00001025 MBB.addLiveIn(Reg1);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001026 if (RPI.isPaired()) {
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001027 MBB.addLiveIn(Reg2);
Geoff Berrya5335642016-05-06 16:34:59 +00001028 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
Geoff Berryc3764062016-04-15 15:16:19 +00001029 MIB.addMemOperand(MF.getMachineMemOperand(
1030 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
1031 MachineMemOperand::MOStore, 8, 8));
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001032 }
Geoff Berrya5335642016-05-06 16:34:59 +00001033 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
1034 .addReg(AArch64::SP)
1035 .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
1036 .setMIFlag(MachineInstr::FrameSetup);
Geoff Berryc3764062016-04-15 15:16:19 +00001037 MIB.addMemOperand(MF.getMachineMemOperand(
1038 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
1039 MachineMemOperand::MOStore, 8, 8));
Tim Northover3b0846e2014-05-24 12:50:23 +00001040 }
1041 return true;
1042}
1043
1044bool AArch64FrameLowering::restoreCalleeSavedRegisters(
1045 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1046 const std::vector<CalleeSavedInfo> &CSI,
1047 const TargetRegisterInfo *TRI) const {
1048 MachineFunction &MF = *MBB.getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +00001049 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001050 DebugLoc DL;
Geoff Berry29d4a692016-02-01 19:07:06 +00001051 SmallVector<RegPairInfo, 8> RegPairs;
Tim Northover3b0846e2014-05-24 12:50:23 +00001052
1053 if (MI != MBB.end())
1054 DL = MI->getDebugLoc();
1055
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001056 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
Geoff Berry29d4a692016-02-01 19:07:06 +00001057
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001058 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
Geoff Berry29d4a692016-02-01 19:07:06 +00001059 ++RPII) {
1060 RegPairInfo RPI = *RPII;
1061 unsigned Reg1 = RPI.Reg1;
1062 unsigned Reg2 = RPI.Reg2;
1063
Geoff Berrya5335642016-05-06 16:34:59 +00001064 // Issue sequence of restores for cs regs. The last restore may be converted
1065 // to a post-increment load later by emitEpilogue if the callee-save stack
1066 // area allocation can't be combined with the local stack area allocation.
Tim Northover3b0846e2014-05-24 12:50:23 +00001067 // For example:
1068 // ldp fp, lr, [sp, #32] // addImm(+4)
1069 // ldp x20, x19, [sp, #16] // addImm(+2)
Geoff Berrya5335642016-05-06 16:34:59 +00001070 // ldp x22, x21, [sp, #0] // addImm(+0)
Tim Northover3b0846e2014-05-24 12:50:23 +00001071 // Note: see comment in spillCalleeSavedRegisters()
1072 unsigned LdrOpc;
Geoff Berrya5335642016-05-06 16:34:59 +00001073 if (RPI.IsGPR)
1074 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
1075 else
1076 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001077 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
1078 if (RPI.isPaired())
1079 dbgs() << ", " << TRI->getName(Reg2);
1080 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1081 if (RPI.isPaired())
1082 dbgs() << ", " << RPI.FrameIdx+1;
1083 dbgs() << ")\n");
Tim Northover3b0846e2014-05-24 12:50:23 +00001084
Tim Northover3b0846e2014-05-24 12:50:23 +00001085 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
Geoff Berryc3764062016-04-15 15:16:19 +00001086 if (RPI.isPaired()) {
Geoff Berrya5335642016-05-06 16:34:59 +00001087 MIB.addReg(Reg2, getDefRegState(true));
Geoff Berryc3764062016-04-15 15:16:19 +00001088 MIB.addMemOperand(MF.getMachineMemOperand(
1089 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
1090 MachineMemOperand::MOLoad, 8, 8));
Geoff Berryc3764062016-04-15 15:16:19 +00001091 }
Geoff Berrya5335642016-05-06 16:34:59 +00001092 MIB.addReg(Reg1, getDefRegState(true))
1093 .addReg(AArch64::SP)
1094 .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
1095 .setMIFlag(MachineInstr::FrameDestroy);
Geoff Berryc3764062016-04-15 15:16:19 +00001096 MIB.addMemOperand(MF.getMachineMemOperand(
1097 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
1098 MachineMemOperand::MOLoad, 8, 8));
Tim Northover3b0846e2014-05-24 12:50:23 +00001099 }
1100 return true;
1101}
1102
Matthias Braun02564862015-07-14 17:17:13 +00001103void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
1104 BitVector &SavedRegs,
1105 RegScavenger *RS) const {
1106 // All calls are tail calls in GHC calling conv, and functions have no
1107 // prologue/epilogue.
1108 if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
1109 return;
1110
1111 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
Tim Northover3b0846e2014-05-24 12:50:23 +00001112 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +00001113 MF.getSubtarget().getRegisterInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +00001114 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001115 unsigned UnspilledCSGPR = AArch64::NoRegister;
1116 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
Tim Northover3b0846e2014-05-24 12:50:23 +00001117
1118 // The frame record needs to be created by saving the appropriate registers
1119 if (hasFP(MF)) {
Matthias Braun02564862015-07-14 17:17:13 +00001120 SavedRegs.set(AArch64::FP);
1121 SavedRegs.set(AArch64::LR);
Tim Northover3b0846e2014-05-24 12:50:23 +00001122 }
1123
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001124 unsigned BasePointerReg = AArch64::NoRegister;
Tim Northover3b0846e2014-05-24 12:50:23 +00001125 if (RegInfo->hasBasePointer(MF))
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001126 BasePointerReg = RegInfo->getBaseRegister();
Tim Northover3b0846e2014-05-24 12:50:23 +00001127
Matthias Braund78597e2017-04-21 22:42:08 +00001128 unsigned ExtraCSSpill = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001129 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001130 // Figure out which callee-saved registers to save/restore.
1131 for (unsigned i = 0; CSRegs[i]; ++i) {
1132 const unsigned Reg = CSRegs[i];
Tim Northover3b0846e2014-05-24 12:50:23 +00001133
Geoff Berry7e4ba3d2016-02-19 18:27:32 +00001134 // Add the base pointer register to SavedRegs if it is callee-save.
1135 if (Reg == BasePointerReg)
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001136 SavedRegs.set(Reg);
Tim Northover3b0846e2014-05-24 12:50:23 +00001137
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001138 bool RegUsed = SavedRegs.test(Reg);
1139 unsigned PairedReg = CSRegs[i ^ 1];
1140 if (!RegUsed) {
1141 if (AArch64::GPR64RegClass.contains(Reg) &&
1142 !RegInfo->isReservedReg(MF, Reg)) {
1143 UnspilledCSGPR = Reg;
1144 UnspilledCSGPRPaired = PairedReg;
Tim Northover3b0846e2014-05-24 12:50:23 +00001145 }
1146 continue;
1147 }
1148
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001149 // MachO's compact unwind format relies on all registers being stored in
1150 // pairs.
1151 // FIXME: the usual format is actually better if unwinding isn't needed.
Manman Ren57518142016-04-11 21:08:06 +00001152 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001153 SavedRegs.set(PairedReg);
Geoff Berry74cb7182016-05-16 20:52:28 +00001154 if (AArch64::GPR64RegClass.contains(PairedReg) &&
1155 !RegInfo->isReservedReg(MF, PairedReg))
Matthias Braund78597e2017-04-21 22:42:08 +00001156 ExtraCSSpill = PairedReg;
Tim Northover3b0846e2014-05-24 12:50:23 +00001157 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001158 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001159
1160 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
1161 for (int Reg = SavedRegs.find_first(); Reg != -1;
1162 Reg = SavedRegs.find_next(Reg))
1163 dbgs() << ' ' << PrintReg(Reg, RegInfo);
1164 dbgs() << "\n";);
1165
1166 // If any callee-saved registers are used, the frame cannot be eliminated.
1167 unsigned NumRegsSpilled = SavedRegs.count();
1168 bool CanEliminateFrame = NumRegsSpilled == 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001169
1170 // FIXME: Set BigStack if any stack slot references may be out of range.
1171 // For now, just conservatively guestimate based on unscaled indexing
1172 // range. We'll end up allocating an unnecessary spill slot a lot, but
1173 // realistically that's not a big deal at this stage of the game.
1174 // The CSR spill slots have not been allocated yet, so estimateStackSize
1175 // won't include them.
Matthias Braun941a7052016-07-28 18:40:00 +00001176 MachineFrameInfo &MFI = MF.getFrameInfo();
1177 unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
Tim Northover3b0846e2014-05-24 12:50:23 +00001178 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
1179 bool BigStack = (CFSize >= 256);
1180 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
1181 AFI->setHasStackFrame(true);
1182
1183 // Estimate if we might need to scavenge a register at some point in order
1184 // to materialize a stack offset. If so, either spill one additional
1185 // callee-saved register or reserve a special spill slot to facilitate
1186 // register scavenging. If we already spilled an extra callee-saved register
1187 // above to keep the number of spills even, we don't need to do anything else
1188 // here.
Matthias Braund78597e2017-04-21 22:42:08 +00001189 if (BigStack) {
1190 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001191 DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
1192 << " to get a scratch register.\n");
1193 SavedRegs.set(UnspilledCSGPR);
1194 // MachO's compact unwind format relies on all registers being stored in
1195 // pairs, so if we need to spill one extra for BigStack, then we need to
1196 // store the pair.
Manman Ren57518142016-04-11 21:08:06 +00001197 if (produceCompactUnwindFrame(MF))
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001198 SavedRegs.set(UnspilledCSGPRPaired);
Matthias Braund78597e2017-04-21 22:42:08 +00001199 ExtraCSSpill = UnspilledCSGPRPaired;
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001200 NumRegsSpilled = SavedRegs.count();
Tim Northover3b0846e2014-05-24 12:50:23 +00001201 }
1202
1203 // If we didn't find an extra callee-saved register to spill, create
1204 // an emergency spill slot.
Matthias Braund78597e2017-04-21 22:42:08 +00001205 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001206 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1207 const TargetRegisterClass &RC = AArch64::GPR64RegClass;
1208 unsigned Size = TRI->getSpillSize(RC);
1209 unsigned Align = TRI->getSpillAlignment(RC);
1210 int FI = MFI.CreateStackObject(Size, Align, false);
Tim Northover3b0846e2014-05-24 12:50:23 +00001211 RS->addScavengingFrameIndex(FI);
1212 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
1213 << " as the emergency spill slot.\n");
1214 }
1215 }
Geoff Berry04bf91a2016-02-01 16:29:19 +00001216
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001217 // Round up to register pair alignment to avoid additional SP adjustment
1218 // instructions.
1219 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
Tim Northover3b0846e2014-05-24 12:50:23 +00001220}
Geoff Berry66f6b652016-06-02 16:22:07 +00001221
1222bool AArch64FrameLowering::enableStackSlotScavenging(
1223 const MachineFunction &MF) const {
1224 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1225 return AFI->hasCalleeSaveStackFreeSpace();
1226}