blob: 73944359223aa5e61fc8c624b76bb8626bdbda82 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains the AArch64 implementation of TargetFrameLowering class.
11//
Kristof Beyls17cb8982015-04-09 08:49:47 +000012// On AArch64, stack frames are structured as follows:
13//
14// The stack grows downward.
15//
16// All of the individual frame areas on the frame below are optional, i.e. it's
17// possible to create a function so that the particular area isn't present
18// in the frame.
19//
20// At function entry, the "frame" looks as follows:
21//
22// | | Higher address
23// |-----------------------------------|
24// | |
25// | arguments passed on the stack |
26// | |
27// |-----------------------------------| <- sp
28// | | Lower address
29//
30//
31// After the prologue has run, the frame has the following general structure.
32// Note that this doesn't depict the case where a red-zone is used. Also,
33// technically the last frame area (VLAs) doesn't get created until in the
34// main function body, after the prologue is run. However, it's depicted here
35// for completeness.
36//
37// | | Higher address
38// |-----------------------------------|
39// | |
40// | arguments passed on the stack |
41// | |
42// |-----------------------------------|
43// | |
Martin Storsjo68266fa2017-07-13 17:03:12 +000044// | (Win64 only) varargs from reg |
45// | |
46// |-----------------------------------|
47// | |
Kristof Beyls17cb8982015-04-09 08:49:47 +000048// | prev_fp, prev_lr |
49// | (a.k.a. "frame record") |
50// |-----------------------------------| <- fp(=x29)
51// | |
52// | other callee-saved registers |
53// | |
54// |-----------------------------------|
55// |.empty.space.to.make.part.below....|
56// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
57// |.the.standard.16-byte.alignment....| compile time; if present)
58// |-----------------------------------|
59// | |
60// | local variables of fixed size |
61// | including spill slots |
62// |-----------------------------------| <- bp(not defined by ABI,
63// |.variable-sized.local.variables....| LLVM chooses X19)
64// |.(VLAs)............................| (size of this area is unknown at
65// |...................................| compile time)
66// |-----------------------------------| <- sp
67// | | Lower address
68//
69//
70// To access the data in a frame, at-compile time, a constant offset must be
71// computable from one of the pointers (fp, bp, sp) to access it. The size
72// of the areas with a dotted background cannot be computed at compile-time
73// if they are present, making it required to have all three of fp, bp and
74// sp to be set up to be able to access all contents in the frame areas,
75// assuming all of the frame areas are non-empty.
76//
77// For most functions, some of the frame areas are empty. For those functions,
78// it may not be necessary to set up fp or bp:
Benjamin Kramerdf005cb2015-08-08 18:27:36 +000079// * A base pointer is definitely needed when there are both VLAs and local
Kristof Beyls17cb8982015-04-09 08:49:47 +000080// variables with more-than-default alignment requirements.
Benjamin Kramerdf005cb2015-08-08 18:27:36 +000081// * A frame pointer is definitely needed when there are local variables with
Kristof Beyls17cb8982015-04-09 08:49:47 +000082// more-than-default alignment requirements.
83//
84// In some cases when a base pointer is not strictly needed, it is generated
85// anyway when offsets from the frame pointer to access local variables become
86// so large that the offset can't be encoded in the immediate fields of loads
87// or stores.
88//
89// FIXME: also explain the redzone concept.
90// FIXME: also explain the concept of reserved call frames.
91//
Tim Northover3b0846e2014-05-24 12:50:23 +000092//===----------------------------------------------------------------------===//
93
94#include "AArch64FrameLowering.h"
95#include "AArch64InstrInfo.h"
96#include "AArch64MachineFunctionInfo.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +000097#include "AArch64RegisterInfo.h"
Tim Northover3b0846e2014-05-24 12:50:23 +000098#include "AArch64Subtarget.h"
99#include "AArch64TargetMachine.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000100#include "llvm/ADT/SmallVector.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000101#include "llvm/ADT/Statistic.h"
Matthias Braun332bb5c2016-07-06 21:31:27 +0000102#include "llvm/CodeGen/LivePhysRegs.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000103#include "llvm/CodeGen/MachineBasicBlock.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000104#include "llvm/CodeGen/MachineFrameInfo.h"
105#include "llvm/CodeGen/MachineFunction.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000106#include "llvm/CodeGen/MachineInstr.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000107#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000108#include "llvm/CodeGen/MachineMemOperand.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000109#include "llvm/CodeGen/MachineModuleInfo.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000110#include "llvm/CodeGen/MachineOperand.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000111#include "llvm/CodeGen/MachineRegisterInfo.h"
112#include "llvm/CodeGen/RegisterScavenging.h"
David Blaikie3f833ed2017-11-08 01:01:31 +0000113#include "llvm/CodeGen/TargetInstrInfo.h"
David Blaikieb3bde2e2017-11-17 01:07:10 +0000114#include "llvm/CodeGen/TargetRegisterInfo.h"
115#include "llvm/CodeGen/TargetSubtargetInfo.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000116#include "llvm/IR/Attributes.h"
117#include "llvm/IR/CallingConv.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000118#include "llvm/IR/DataLayout.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000119#include "llvm/IR/DebugLoc.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000120#include "llvm/IR/Function.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000121#include "llvm/MC/MCDwarf.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000122#include "llvm/Support/CommandLine.h"
Benjamin Kramer1f8930e2014-07-25 11:42:14 +0000123#include "llvm/Support/Debug.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000124#include "llvm/Support/ErrorHandling.h"
125#include "llvm/Support/MathExtras.h"
Tim Northover3b0846e2014-05-24 12:50:23 +0000126#include "llvm/Support/raw_ostream.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000127#include "llvm/Target/TargetMachine.h"
128#include "llvm/Target/TargetOptions.h"
Eugene Zelenko11f69072017-01-25 00:29:26 +0000129#include <cassert>
130#include <cstdint>
131#include <iterator>
132#include <vector>
Tim Northover3b0846e2014-05-24 12:50:23 +0000133
134using namespace llvm;
135
136#define DEBUG_TYPE "frame-info"
137
138static cl::opt<bool> EnableRedZone("aarch64-redzone",
139 cl::desc("enable use of redzone on AArch64"),
140 cl::init(false), cl::Hidden);
141
142STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
143
Kristof Beyls2af1e902017-05-30 06:58:41 +0000144/// Look at each instruction that references stack frames and return the stack
145/// size limit beyond which some of these instructions will require a scratch
146/// register during their expansion later.
147static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
148 // FIXME: For now, just conservatively guestimate based on unscaled indexing
149 // range. We'll end up allocating an unnecessary spill slot a lot, but
150 // realistically that's not a big deal at this stage of the game.
151 for (MachineBasicBlock &MBB : MF) {
152 for (MachineInstr &MI : MBB) {
153 if (MI.isDebugValue() || MI.isPseudo() ||
154 MI.getOpcode() == AArch64::ADDXri ||
155 MI.getOpcode() == AArch64::ADDSXri)
156 continue;
157
Javed Absard13d4192017-10-30 22:00:06 +0000158 for (const MachineOperand &MO : MI.operands()) {
159 if (!MO.isFI())
Kristof Beyls2af1e902017-05-30 06:58:41 +0000160 continue;
161
162 int Offset = 0;
163 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
164 AArch64FrameOffsetCannotUpdate)
165 return 0;
166 }
167 }
168 }
169 return 255;
170}
171
Tim Northover3b0846e2014-05-24 12:50:23 +0000172bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
173 if (!EnableRedZone)
174 return false;
175 // Don't use the red zone if the function explicitly asks us not to.
176 // This is typically used for kernel code.
Matthias Braunf1caa282017-12-15 22:22:58 +0000177 if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
Tim Northover3b0846e2014-05-24 12:50:23 +0000178 return false;
179
Matthias Braun941a7052016-07-28 18:40:00 +0000180 const MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000181 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
182 unsigned NumBytes = AFI->getLocalStackSize();
183
Matthias Braun941a7052016-07-28 18:40:00 +0000184 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
Tim Northover3b0846e2014-05-24 12:50:23 +0000185}
186
187/// hasFP - Return true if the specified function should have a dedicated frame
188/// pointer register.
189bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
Matthias Braun941a7052016-07-28 18:40:00 +0000190 const MachineFrameInfo &MFI = MF.getFrameInfo();
Eric Christopherfc6de422014-08-05 02:39:49 +0000191 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
Geoff Berry62c1a1e2016-03-02 17:58:31 +0000192 // Retain behavior of always omitting the FP for leaf functions when possible.
Matthias Braun941a7052016-07-28 18:40:00 +0000193 return (MFI.hasCalls() &&
Geoff Berry62c1a1e2016-03-02 17:58:31 +0000194 MF.getTarget().Options.DisableFramePointerElim(MF)) ||
Matthias Braun941a7052016-07-28 18:40:00 +0000195 MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
196 MFI.hasStackMap() || MFI.hasPatchPoint() ||
Geoff Berry62c1a1e2016-03-02 17:58:31 +0000197 RegInfo->needsStackRealignment(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000198}
199
200/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
201/// not required, we reserve argument space for call sites in the function
202/// immediately on entry to the current function. This eliminates the need for
203/// add/sub sp brackets around call sites. Returns true if the call frame is
204/// included as part of the stack frame.
205bool
206AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
Matthias Braun941a7052016-07-28 18:40:00 +0000207 return !MF.getFrameInfo().hasVarSizedObjects();
Tim Northover3b0846e2014-05-24 12:50:23 +0000208}
209
Hans Wennborge1a2e902016-03-31 18:33:38 +0000210MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
Tim Northover3b0846e2014-05-24 12:50:23 +0000211 MachineFunction &MF, MachineBasicBlock &MBB,
212 MachineBasicBlock::iterator I) const {
Eric Christopherfc6de422014-08-05 02:39:49 +0000213 const AArch64InstrInfo *TII =
214 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000215 DebugLoc DL = I->getDebugLoc();
Matthias Braunfa3872e2015-05-18 20:27:55 +0000216 unsigned Opc = I->getOpcode();
Tim Northover3b0846e2014-05-24 12:50:23 +0000217 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
218 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
219
Eric Christopherfc6de422014-08-05 02:39:49 +0000220 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
Tim Northover3b0846e2014-05-24 12:50:23 +0000221 if (!TFI->hasReservedCallFrame(MF)) {
222 unsigned Align = getStackAlignment();
223
224 int64_t Amount = I->getOperand(0).getImm();
Rui Ueyamada00f2f2016-01-14 21:06:47 +0000225 Amount = alignTo(Amount, Align);
Tim Northover3b0846e2014-05-24 12:50:23 +0000226 if (!IsDestroy)
227 Amount = -Amount;
228
229 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
230 // doesn't have to pop anything), then the first operand will be zero too so
231 // this adjustment is a no-op.
232 if (CalleePopAmount == 0) {
233 // FIXME: in-function stack adjustment for calls is limited to 24-bits
234 // because there's no guaranteed temporary register available.
235 //
Sylvestre Ledru469de192014-08-11 18:04:46 +0000236 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
Tim Northover3b0846e2014-05-24 12:50:23 +0000237 // 1) For offset <= 12-bit, we use LSL #0
238 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
239 // LSL #0, and the other uses LSL #12.
240 //
Chad Rosier401a4ab2016-01-19 16:50:45 +0000241 // Most call frames will be allocated at the start of a function so
Tim Northover3b0846e2014-05-24 12:50:23 +0000242 // this is OK, but it is a limitation that needs dealing with.
243 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
244 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
245 }
246 } else if (CalleePopAmount != 0) {
247 // If the calling convention demands that the callee pops arguments from the
248 // stack, we want to add it back if we have a reserved call frame.
249 assert(CalleePopAmount < 0xffffff && "call frame too large");
250 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
251 TII);
252 }
Hans Wennborge1a2e902016-03-31 18:33:38 +0000253 return MBB.erase(I);
Tim Northover3b0846e2014-05-24 12:50:23 +0000254}
255
256void AArch64FrameLowering::emitCalleeSavedFrameMoves(
Geoff Berry62d47252016-02-25 16:36:08 +0000257 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000258 MachineFunction &MF = *MBB.getParent();
Matthias Braun941a7052016-07-28 18:40:00 +0000259 MachineFrameInfo &MFI = MF.getFrameInfo();
Matthias Braunf23ef432016-11-30 23:48:42 +0000260 const TargetSubtargetInfo &STI = MF.getSubtarget();
261 const MCRegisterInfo *MRI = STI.getRegisterInfo();
262 const TargetInstrInfo *TII = STI.getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000263 DebugLoc DL = MBB.findDebugLoc(MBBI);
264
265 // Add callee saved registers to move list.
Matthias Braun941a7052016-07-28 18:40:00 +0000266 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000267 if (CSI.empty())
268 return;
269
Tim Northover3b0846e2014-05-24 12:50:23 +0000270 for (const auto &Info : CSI) {
271 unsigned Reg = Info.getReg();
Geoff Berry62d47252016-02-25 16:36:08 +0000272 int64_t Offset =
Matthias Braun941a7052016-07-28 18:40:00 +0000273 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
Tim Northover3b0846e2014-05-24 12:50:23 +0000274 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
Matthias Braunf23ef432016-11-30 23:48:42 +0000275 unsigned CFIIndex = MF.addFrameInst(
Geoff Berry62d47252016-02-25 16:36:08 +0000276 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
Tim Northover3b0846e2014-05-24 12:50:23 +0000277 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000278 .addCFIIndex(CFIIndex)
279 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000280 }
281}
282
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000283// Find a scratch register that we can use at the start of the prologue to
284// re-align the stack pointer. We avoid using callee-save registers since they
285// may appear to be free when this is called from canUseAsPrologue (during
286// shrink wrapping), but then no longer be free when this is called from
287// emitPrologue.
288//
289// FIXME: This is a bit conservative, since in the above case we could use one
290// of the callee-save registers as a scratch temp to re-align the stack pointer,
291// but we would then have to make sure that we were in fact saving at least one
292// callee-save register in the prologue, which is additional complexity that
293// doesn't seem worth the benefit.
294static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
295 MachineFunction *MF = MBB->getParent();
296
297 // If MBB is an entry block, use X9 as the scratch register
298 if (&MF->front() == MBB)
299 return AArch64::X9;
300
Eric Christopher60a245e2017-03-31 23:12:27 +0000301 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
Matthias Braunac4307c2017-05-26 21:51:00 +0000302 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
Eric Christopher60a245e2017-03-31 23:12:27 +0000303 LivePhysRegs LiveRegs(TRI);
Matthias Braun332bb5c2016-07-06 21:31:27 +0000304 LiveRegs.addLiveIns(*MBB);
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000305
Matthias Braun332bb5c2016-07-06 21:31:27 +0000306 // Mark callee saved registers as used so we will not choose them.
Matthias Braunac4307c2017-05-26 21:51:00 +0000307 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF);
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000308 for (unsigned i = 0; CSRegs[i]; ++i)
Matthias Braun332bb5c2016-07-06 21:31:27 +0000309 LiveRegs.addReg(CSRegs[i]);
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000310
Matthias Braun332bb5c2016-07-06 21:31:27 +0000311 // Prefer X9 since it was historically used for the prologue scratch reg.
312 const MachineRegisterInfo &MRI = MF->getRegInfo();
313 if (LiveRegs.available(MRI, AArch64::X9))
314 return AArch64::X9;
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000315
Matthias Braun332bb5c2016-07-06 21:31:27 +0000316 for (unsigned Reg : AArch64::GPR64RegClass) {
317 if (LiveRegs.available(MRI, Reg))
318 return Reg;
319 }
Geoff Berry7e4ba3d2016-02-19 18:27:32 +0000320 return AArch64::NoRegister;
321}
322
323bool AArch64FrameLowering::canUseAsPrologue(
324 const MachineBasicBlock &MBB) const {
325 const MachineFunction *MF = MBB.getParent();
326 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
327 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
328 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
329
330 // Don't need a scratch register if we're not going to re-align the stack.
331 if (!RegInfo->needsStackRealignment(*MF))
332 return true;
333 // Otherwise, we can use any block as long as it has a scratch register
334 // available.
335 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
336}
337
Geoff Berrya5335642016-05-06 16:34:59 +0000338bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
339 MachineFunction &MF, unsigned StackBumpBytes) const {
340 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Matthias Braun941a7052016-07-28 18:40:00 +0000341 const MachineFrameInfo &MFI = MF.getFrameInfo();
Geoff Berrya5335642016-05-06 16:34:59 +0000342 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
343 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
344
345 if (AFI->getLocalStackSize() == 0)
346 return false;
347
348 // 512 is the maximum immediate for stp/ldp that will be used for
349 // callee-save save/restores
350 if (StackBumpBytes >= 512)
351 return false;
352
Matthias Braun941a7052016-07-28 18:40:00 +0000353 if (MFI.hasVarSizedObjects())
Geoff Berrya5335642016-05-06 16:34:59 +0000354 return false;
355
356 if (RegInfo->needsStackRealignment(MF))
357 return false;
358
359 // This isn't strictly necessary, but it simplifies things a bit since the
360 // current RedZone handling code assumes the SP is adjusted by the
361 // callee-save save/restore code.
362 if (canUseRedZone(MF))
363 return false;
364
365 return true;
366}
367
368// Convert callee-save register save/restore instruction to do stack pointer
369// decrement/increment to allocate/deallocate the callee-save stack area by
370// converting store/load to use pre/post increment version.
371static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
Benjamin Kramerbdc49562016-06-12 15:39:02 +0000372 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
373 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
Geoff Berrya5335642016-05-06 16:34:59 +0000374 unsigned NewOpc;
375 bool NewIsUnscaled = false;
376 switch (MBBI->getOpcode()) {
377 default:
378 llvm_unreachable("Unexpected callee-save save/restore opcode!");
379 case AArch64::STPXi:
380 NewOpc = AArch64::STPXpre;
381 break;
382 case AArch64::STPDi:
383 NewOpc = AArch64::STPDpre;
384 break;
385 case AArch64::STRXui:
386 NewOpc = AArch64::STRXpre;
387 NewIsUnscaled = true;
388 break;
389 case AArch64::STRDui:
390 NewOpc = AArch64::STRDpre;
391 NewIsUnscaled = true;
392 break;
393 case AArch64::LDPXi:
394 NewOpc = AArch64::LDPXpost;
395 break;
396 case AArch64::LDPDi:
397 NewOpc = AArch64::LDPDpost;
398 break;
399 case AArch64::LDRXui:
400 NewOpc = AArch64::LDRXpost;
401 NewIsUnscaled = true;
402 break;
403 case AArch64::LDRDui:
404 NewOpc = AArch64::LDRDpost;
405 NewIsUnscaled = true;
406 break;
407 }
408
409 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
410 MIB.addReg(AArch64::SP, RegState::Define);
411
412 // Copy all operands other than the immediate offset.
413 unsigned OpndIdx = 0;
414 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
415 ++OpndIdx)
Diana Picus116bbab2017-01-13 09:58:52 +0000416 MIB.add(MBBI->getOperand(OpndIdx));
Geoff Berrya5335642016-05-06 16:34:59 +0000417
418 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
419 "Unexpected immediate offset in first/last callee-save save/restore "
420 "instruction!");
421 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
422 "Unexpected base register in callee-save save/restore instruction!");
423 // Last operand is immediate offset that needs fixing.
424 assert(CSStackSizeInc % 8 == 0);
425 int64_t CSStackSizeIncImm = CSStackSizeInc;
426 if (!NewIsUnscaled)
427 CSStackSizeIncImm /= 8;
428 MIB.addImm(CSStackSizeIncImm);
429
430 MIB.setMIFlags(MBBI->getFlags());
431 MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end());
432
433 return std::prev(MBB.erase(MBBI));
434}
435
436// Fixup callee-save register save/restore instructions to take into account
437// combined SP bump by adding the local stack size to the stack offsets.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000438static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
Geoff Berrya5335642016-05-06 16:34:59 +0000439 unsigned LocalStackSize) {
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000440 unsigned Opc = MI.getOpcode();
Geoff Berrya5335642016-05-06 16:34:59 +0000441 (void)Opc;
442 assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi ||
443 Opc == AArch64::STRXui || Opc == AArch64::STRDui ||
444 Opc == AArch64::LDPXi || Opc == AArch64::LDPDi ||
445 Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) &&
446 "Unexpected callee-save save/restore opcode!");
447
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000448 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
449 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
Geoff Berrya5335642016-05-06 16:34:59 +0000450 "Unexpected base register in callee-save save/restore instruction!");
451 // Last operand is immediate offset that needs fixing.
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000452 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
Geoff Berrya5335642016-05-06 16:34:59 +0000453 // All generated opcodes have scaled offsets.
454 assert(LocalStackSize % 8 == 0);
455 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
456}
457
Quentin Colombet61b305e2015-05-05 17:38:16 +0000458void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
459 MachineBasicBlock &MBB) const {
Tim Northover3b0846e2014-05-24 12:50:23 +0000460 MachineBasicBlock::iterator MBBI = MBB.begin();
Matthias Braun941a7052016-07-28 18:40:00 +0000461 const MachineFrameInfo &MFI = MF.getFrameInfo();
Matthias Braunf1caa282017-12-15 22:22:58 +0000462 const Function &F = MF.getFunction();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000463 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
464 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
465 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000466 MachineModuleInfo &MMI = MF.getMMI();
Tim Northover775aaeb2015-11-05 21:54:58 +0000467 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Matthias Braunf1caa282017-12-15 22:22:58 +0000468 bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
Tim Northover775aaeb2015-11-05 21:54:58 +0000469 bool HasFP = hasFP(MF);
470
471 // Debug location must be unknown since the first debug location is used
472 // to determine the end of the prologue.
473 DebugLoc DL;
474
475 // All calls are tail calls in GHC calling conv, and functions have no
476 // prologue/epilogue.
Matthias Braunf1caa282017-12-15 22:22:58 +0000477 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
Greg Fitzgeraldfa78d082015-01-19 17:40:05 +0000478 return;
479
Matthias Braun941a7052016-07-28 18:40:00 +0000480 int NumBytes = (int)MFI.getStackSize();
Tim Northover3b0846e2014-05-24 12:50:23 +0000481 if (!AFI->hasStackFrame()) {
482 assert(!HasFP && "unexpected function without stack frame but with FP");
483
484 // All of the stack allocation is for locals.
485 AFI->setLocalStackSize(NumBytes);
486
Chad Rosier27c352d2016-03-14 18:24:34 +0000487 if (!NumBytes)
488 return;
Tim Northover3b0846e2014-05-24 12:50:23 +0000489 // REDZONE: If the stack size is less than 128 bytes, we don't need
490 // to actually allocate.
Chad Rosier27c352d2016-03-14 18:24:34 +0000491 if (canUseRedZone(MF))
492 ++NumRedZoneFunctions;
493 else {
Tim Northover3b0846e2014-05-24 12:50:23 +0000494 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
495 MachineInstr::FrameSetup);
496
Chad Rosier27c352d2016-03-14 18:24:34 +0000497 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
498 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
Tim Northover3b0846e2014-05-24 12:50:23 +0000499 // Encode the stack size of the leaf function.
Matthias Braunf23ef432016-11-30 23:48:42 +0000500 unsigned CFIIndex = MF.addFrameInst(
Tim Northover3b0846e2014-05-24 12:50:23 +0000501 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
502 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000503 .addCFIIndex(CFIIndex)
504 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000505 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000506 return;
507 }
508
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000509 bool IsWin64 =
Matthias Braunf1caa282017-12-15 22:22:58 +0000510 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000511 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
512
513 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
Chad Rosier27c352d2016-03-14 18:24:34 +0000514 // All of the remaining stack allocations are for locals.
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000515 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
Tim Northover3b0846e2014-05-24 12:50:23 +0000516
Geoff Berrya5335642016-05-06 16:34:59 +0000517 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
518 if (CombineSPBump) {
519 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
520 MachineInstr::FrameSetup);
521 NumBytes = 0;
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000522 } else if (PrologueSaveSize != 0) {
Geoff Berrya5335642016-05-06 16:34:59 +0000523 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000524 -PrologueSaveSize);
525 NumBytes -= PrologueSaveSize;
Geoff Berrya5335642016-05-06 16:34:59 +0000526 }
527 assert(NumBytes >= 0 && "Negative stack allocation size!?");
528
529 // Move past the saves of the callee-saved registers, fixing up the offsets
530 // and pre-inc if we decided to combine the callee-save and local stack
531 // pointer bump above.
Geoff Berry04bf91a2016-02-01 16:29:19 +0000532 MachineBasicBlock::iterator End = MBB.end();
Geoff Berrya5335642016-05-06 16:34:59 +0000533 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
534 if (CombineSPBump)
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000535 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
Tim Northover3b0846e2014-05-24 12:50:23 +0000536 ++MBBI;
Geoff Berrya5335642016-05-06 16:34:59 +0000537 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000538 if (HasFP) {
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000539 // Only set up FP if we actually need to. Frame pointer is fp =
540 // sp - fixedobject - 16.
541 int FPOffset = AFI->getCalleeSavedStackSize() - 16;
Geoff Berrya5335642016-05-06 16:34:59 +0000542 if (CombineSPBump)
543 FPOffset += AFI->getLocalStackSize();
Chad Rosier27c352d2016-03-14 18:24:34 +0000544
Tim Northover3b0846e2014-05-24 12:50:23 +0000545 // Issue sub fp, sp, FPOffset or
546 // mov fp,sp when FPOffset is zero.
547 // Note: All stores of callee-saved registers are marked as "FrameSetup".
548 // This code marks the instruction(s) that set the FP also.
549 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
550 MachineInstr::FrameSetup);
551 }
552
Tim Northover3b0846e2014-05-24 12:50:23 +0000553 // Allocate space for the rest of the frame.
Chad Rosier27c352d2016-03-14 18:24:34 +0000554 if (NumBytes) {
555 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
556 unsigned scratchSPReg = AArch64::SP;
Kristof Beyls17cb8982015-04-09 08:49:47 +0000557
Chad Rosier27c352d2016-03-14 18:24:34 +0000558 if (NeedsRealignment) {
559 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
560 assert(scratchSPReg != AArch64::NoRegister);
561 }
Kristof Beyls17cb8982015-04-09 08:49:47 +0000562
Chad Rosier27c352d2016-03-14 18:24:34 +0000563 // If we're a leaf function, try using the red zone.
564 if (!canUseRedZone(MF))
565 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
566 // the correct value here, as NumBytes also includes padding bytes,
567 // which shouldn't be counted here.
568 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
569 MachineInstr::FrameSetup);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000570
Chad Rosier27c352d2016-03-14 18:24:34 +0000571 if (NeedsRealignment) {
Matthias Braun941a7052016-07-28 18:40:00 +0000572 const unsigned Alignment = MFI.getMaxAlignment();
Chad Rosier27c352d2016-03-14 18:24:34 +0000573 const unsigned NrBitsToZero = countTrailingZeros(Alignment);
574 assert(NrBitsToZero > 1);
575 assert(scratchSPReg != AArch64::SP);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000576
Chad Rosier27c352d2016-03-14 18:24:34 +0000577 // SUB X9, SP, NumBytes
578 // -- X9 is temporary register, so shouldn't contain any live data here,
579 // -- free to use. This is already produced by emitFrameOffset above.
580 // AND SP, X9, 0b11111...0000
581 // The logical immediates have a non-trivial encoding. The following
582 // formula computes the encoded immediate with all ones but
583 // NrBitsToZero zero bits as least significant bits.
584 uint32_t andMaskEncoded = (1 << 12) // = N
585 | ((64 - NrBitsToZero) << 6) // immr
586 | ((64 - NrBitsToZero - 1) << 0); // imms
587
588 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
589 .addReg(scratchSPReg, RegState::Kill)
590 .addImm(andMaskEncoded);
591 AFI->setStackRealigned(true);
592 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000593 }
594
595 // If we need a base pointer, set it up here. It's whatever the value of the
596 // stack pointer is at this point. Any variable size objects will be allocated
597 // after this, so we can still use the base pointer to reference locals.
598 //
599 // FIXME: Clarify FrameSetup flags here.
600 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
601 // needed.
Kristof Beyls17cb8982015-04-09 08:49:47 +0000602 if (RegInfo->hasBasePointer(MF)) {
603 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
604 false);
605 }
Tim Northover3b0846e2014-05-24 12:50:23 +0000606
607 if (needsFrameMoves) {
Mehdi Aminibd7287e2015-07-16 06:11:10 +0000608 const DataLayout &TD = MF.getDataLayout();
609 const int StackGrowth = -TD.getPointerSize(0);
Tim Northover3b0846e2014-05-24 12:50:23 +0000610 unsigned FramePtr = RegInfo->getFrameRegister(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000611 // An example of the prologue:
612 //
613 // .globl __foo
614 // .align 2
615 // __foo:
616 // Ltmp0:
617 // .cfi_startproc
618 // .cfi_personality 155, ___gxx_personality_v0
619 // Leh_func_begin:
620 // .cfi_lsda 16, Lexception33
621 //
622 // stp xa,bx, [sp, -#offset]!
623 // ...
624 // stp x28, x27, [sp, #offset-32]
625 // stp fp, lr, [sp, #offset-16]
626 // add fp, sp, #offset - 16
627 // sub sp, sp, #1360
628 //
629 // The Stack:
630 // +-------------------------------------------+
631 // 10000 | ........ | ........ | ........ | ........ |
632 // 10004 | ........ | ........ | ........ | ........ |
633 // +-------------------------------------------+
634 // 10008 | ........ | ........ | ........ | ........ |
635 // 1000c | ........ | ........ | ........ | ........ |
636 // +===========================================+
637 // 10010 | X28 Register |
638 // 10014 | X28 Register |
639 // +-------------------------------------------+
640 // 10018 | X27 Register |
641 // 1001c | X27 Register |
642 // +===========================================+
643 // 10020 | Frame Pointer |
644 // 10024 | Frame Pointer |
645 // +-------------------------------------------+
646 // 10028 | Link Register |
647 // 1002c | Link Register |
648 // +===========================================+
649 // 10030 | ........ | ........ | ........ | ........ |
650 // 10034 | ........ | ........ | ........ | ........ |
651 // +-------------------------------------------+
652 // 10038 | ........ | ........ | ........ | ........ |
653 // 1003c | ........ | ........ | ........ | ........ |
654 // +-------------------------------------------+
655 //
656 // [sp] = 10030 :: >>initial value<<
657 // sp = 10020 :: stp fp, lr, [sp, #-16]!
658 // fp = sp == 10020 :: mov fp, sp
659 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
660 // sp == 10010 :: >>final value<<
661 //
662 // The frame pointer (w29) points to address 10020. If we use an offset of
663 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
664 // for w27, and -32 for w28:
665 //
666 // Ltmp1:
667 // .cfi_def_cfa w29, 16
668 // Ltmp2:
669 // .cfi_offset w30, -8
670 // Ltmp3:
671 // .cfi_offset w29, -16
672 // Ltmp4:
673 // .cfi_offset w27, -24
674 // Ltmp5:
675 // .cfi_offset w28, -32
676
677 if (HasFP) {
678 // Define the current CFA rule to use the provided FP.
679 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000680 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
681 nullptr, Reg, 2 * StackGrowth - FixedObject));
Tim Northover3b0846e2014-05-24 12:50:23 +0000682 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000683 .addCFIIndex(CFIIndex)
684 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000685 } else {
686 // Encode the stack size of the leaf function.
Matthias Braunf23ef432016-11-30 23:48:42 +0000687 unsigned CFIIndex = MF.addFrameInst(
Matthias Braun941a7052016-07-28 18:40:00 +0000688 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
Tim Northover3b0846e2014-05-24 12:50:23 +0000689 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
Adrian Prantlb9fa9452014-12-16 00:20:49 +0000690 .addCFIIndex(CFIIndex)
691 .setMIFlags(MachineInstr::FrameSetup);
Tim Northover3b0846e2014-05-24 12:50:23 +0000692 }
693
Geoff Berry62d47252016-02-25 16:36:08 +0000694 // Now emit the moves for whatever callee saved regs we have (including FP,
695 // LR if those are saved).
696 emitCalleeSavedFrameMoves(MBB, MBBI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000697 }
698}
699
Tim Northover3b0846e2014-05-24 12:50:23 +0000700void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
701 MachineBasicBlock &MBB) const {
702 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
Matthias Braun941a7052016-07-28 18:40:00 +0000703 MachineFrameInfo &MFI = MF.getFrameInfo();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000704 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
Ahmed Bougacha66834ec2015-12-16 22:54:06 +0000705 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
Quentin Colombet61b305e2015-05-05 17:38:16 +0000706 DebugLoc DL;
707 bool IsTailCallReturn = false;
708 if (MBB.end() != MBBI) {
709 DL = MBBI->getDebugLoc();
710 unsigned RetOpcode = MBBI->getOpcode();
711 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
712 RetOpcode == AArch64::TCRETURNri;
713 }
Matthias Braun941a7052016-07-28 18:40:00 +0000714 int NumBytes = MFI.getStackSize();
Tim Northover3b0846e2014-05-24 12:50:23 +0000715 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
716
Greg Fitzgeraldfa78d082015-01-19 17:40:05 +0000717 // All calls are tail calls in GHC calling conv, and functions have no
718 // prologue/epilogue.
Matthias Braunf1caa282017-12-15 22:22:58 +0000719 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
Greg Fitzgeraldfa78d082015-01-19 17:40:05 +0000720 return;
721
Kristof Beyls17cb8982015-04-09 08:49:47 +0000722 // Initial and residual are named for consistency with the prologue. Note that
Tim Northover3b0846e2014-05-24 12:50:23 +0000723 // in the epilogue, the residual adjustment is executed first.
724 uint64_t ArgumentPopSize = 0;
Quentin Colombet61b305e2015-05-05 17:38:16 +0000725 if (IsTailCallReturn) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000726 MachineOperand &StackAdjust = MBBI->getOperand(1);
727
728 // For a tail-call in a callee-pops-arguments environment, some or all of
729 // the stack may actually be in use for the call's arguments, this is
730 // calculated during LowerCall and consumed here...
731 ArgumentPopSize = StackAdjust.getImm();
732 } else {
733 // ... otherwise the amount to pop is *all* of the argument space,
734 // conveniently stored in the MachineFunctionInfo by
735 // LowerFormalArguments. This will, of course, be zero for the C calling
736 // convention.
737 ArgumentPopSize = AFI->getArgumentStackToRestore();
738 }
739
740 // The stack frame should be like below,
741 //
742 // ---------------------- ---
743 // | | |
744 // | BytesInStackArgArea| CalleeArgStackSize
745 // | (NumReusableBytes) | (of tail call)
746 // | | ---
747 // | | |
748 // ---------------------| --- |
749 // | | | |
750 // | CalleeSavedReg | | |
Geoff Berry04bf91a2016-02-01 16:29:19 +0000751 // | (CalleeSavedStackSize)| | |
Tim Northover3b0846e2014-05-24 12:50:23 +0000752 // | | | |
753 // ---------------------| | NumBytes
754 // | | StackSize (StackAdjustUp)
755 // | LocalStackSize | | |
756 // | (covering callee | | |
757 // | args) | | |
758 // | | | |
759 // ---------------------- --- ---
760 //
761 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
762 // = StackSize + ArgumentPopSize
763 //
764 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
765 // it as the 2nd argument of AArch64ISD::TC_RETURN.
Tim Northover3b0846e2014-05-24 12:50:23 +0000766
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000767 bool IsWin64 =
Matthias Braunf1caa282017-12-15 22:22:58 +0000768 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000769 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
770
771 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
Geoff Berrya5335642016-05-06 16:34:59 +0000772 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
773
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000774 if (!CombineSPBump && PrologueSaveSize != 0)
Geoff Berrya5335642016-05-06 16:34:59 +0000775 convertCalleeSaveRestoreToSPPrePostIncDec(
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000776 MBB, std::prev(MBB.getFirstTerminator()), DL, TII, PrologueSaveSize);
Geoff Berrya5335642016-05-06 16:34:59 +0000777
Tim Northover3b0846e2014-05-24 12:50:23 +0000778 // Move past the restores of the callee-saved registers.
Quentin Colombet61b305e2015-05-05 17:38:16 +0000779 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
Matthias Braun45419292015-12-17 03:18:47 +0000780 MachineBasicBlock::iterator Begin = MBB.begin();
781 while (LastPopI != Begin) {
782 --LastPopI;
Geoff Berry04bf91a2016-02-01 16:29:19 +0000783 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000784 ++LastPopI;
Matthias Braun45419292015-12-17 03:18:47 +0000785 break;
Geoff Berrya5335642016-05-06 16:34:59 +0000786 } else if (CombineSPBump)
Duncan P. N. Exon Smithab53fd92016-07-08 20:29:42 +0000787 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
Tim Northover3b0846e2014-05-24 12:50:23 +0000788 }
Geoff Berrya5335642016-05-06 16:34:59 +0000789
790 // If there is a single SP update, insert it before the ret and we're done.
791 if (CombineSPBump) {
792 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
793 NumBytes + ArgumentPopSize, TII,
794 MachineInstr::FrameDestroy);
795 return;
796 }
797
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000798 NumBytes -= PrologueSaveSize;
Tim Northover3b0846e2014-05-24 12:50:23 +0000799 assert(NumBytes >= 0 && "Negative stack allocation size!?");
800
801 if (!hasFP(MF)) {
Geoff Berrya1c62692016-02-23 16:54:36 +0000802 bool RedZone = canUseRedZone(MF);
Tim Northover3b0846e2014-05-24 12:50:23 +0000803 // If this was a redzone leaf function, we don't need to restore the
Geoff Berrya1c62692016-02-23 16:54:36 +0000804 // stack pointer (but we may need to pop stack args for fastcc).
805 if (RedZone && ArgumentPopSize == 0)
806 return;
807
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000808 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
Geoff Berrya1c62692016-02-23 16:54:36 +0000809 int StackRestoreBytes = RedZone ? 0 : NumBytes;
810 if (NoCalleeSaveRestore)
811 StackRestoreBytes += ArgumentPopSize;
812 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
813 StackRestoreBytes, TII, MachineInstr::FrameDestroy);
814 // If we were able to combine the local stack pop with the argument pop,
815 // then we're done.
816 if (NoCalleeSaveRestore || ArgumentPopSize == 0)
817 return;
818 NumBytes = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +0000819 }
820
821 // Restore the original stack pointer.
822 // FIXME: Rather than doing the math here, we should instead just use
823 // non-post-indexed loads for the restores if we aren't actually going to
824 // be able to save any instructions.
Matthias Braun941a7052016-07-28 18:40:00 +0000825 if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
Tim Northover3b0846e2014-05-24 12:50:23 +0000826 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000827 -AFI->getCalleeSavedStackSize() + 16, TII,
828 MachineInstr::FrameDestroy);
Chad Rosier6d986552016-03-14 18:17:41 +0000829 else if (NumBytes)
830 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
831 MachineInstr::FrameDestroy);
Geoff Berrya1c62692016-02-23 16:54:36 +0000832
833 // This must be placed after the callee-save restore code because that code
834 // assumes the SP is at the same location as it was after the callee-save save
835 // code in the prologue.
836 if (ArgumentPopSize)
837 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
838 ArgumentPopSize, TII, MachineInstr::FrameDestroy);
Tim Northover3b0846e2014-05-24 12:50:23 +0000839}
840
Tim Northover3b0846e2014-05-24 12:50:23 +0000841/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
842/// debug info. It's the same as what we use for resolving the code-gen
843/// references for now. FIXME: This can go wrong when references are
844/// SP-relative and simple call frames aren't used.
845int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
846 int FI,
847 unsigned &FrameReg) const {
848 return resolveFrameIndexReference(MF, FI, FrameReg);
849}
850
851int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
852 int FI, unsigned &FrameReg,
853 bool PreferFP) const {
Matthias Braun941a7052016-07-28 18:40:00 +0000854 const MachineFrameInfo &MFI = MF.getFrameInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +0000855 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +0000856 MF.getSubtarget().getRegisterInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +0000857 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000858 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
859 bool IsWin64 =
Matthias Braunf1caa282017-12-15 22:22:58 +0000860 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
Martin Storsjoeacf4e42017-08-01 21:13:54 +0000861 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
862 int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16;
Matthias Braun941a7052016-07-28 18:40:00 +0000863 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
864 bool isFixed = MFI.isFixedObjectIndex(FI);
Tim Northover3b0846e2014-05-24 12:50:23 +0000865
866 // Use frame pointer to reference fixed objects. Use it for locals if
Kristof Beyls17cb8982015-04-09 08:49:47 +0000867 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
868 // reliable as a base). Make sure useFPForScavengingIndex() does the
869 // right thing for the emergency spill slot.
Tim Northover3b0846e2014-05-24 12:50:23 +0000870 bool UseFP = false;
871 if (AFI->hasStackFrame()) {
872 // Note: Keeping the following as multiple 'if' statements rather than
873 // merging to a single expression for readability.
874 //
875 // Argument access should always use the FP.
876 if (isFixed) {
877 UseFP = hasFP(MF);
Kristof Beyls17cb8982015-04-09 08:49:47 +0000878 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
879 !RegInfo->needsStackRealignment(MF)) {
Tim Northover3b0846e2014-05-24 12:50:23 +0000880 // Use SP or FP, whichever gives us the best chance of the offset
881 // being in range for direct access. If the FPOffset is positive,
882 // that'll always be best, as the SP will be even further away.
883 // If the FPOffset is negative, we have to keep in mind that the
884 // available offset range for negative offsets is smaller than for
885 // positive ones. If we have variable sized objects, we're stuck with
886 // using the FP regardless, though, as the SP offset is unknown
887 // and we don't have a base pointer available. If an offset is
888 // available via the FP and the SP, use whichever is closest.
Matthias Braun941a7052016-07-28 18:40:00 +0000889 if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 ||
Tim Northover3b0846e2014-05-24 12:50:23 +0000890 (FPOffset >= -256 && Offset > -FPOffset))
891 UseFP = true;
892 }
893 }
894
Kristof Beyls17cb8982015-04-09 08:49:47 +0000895 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
896 "In the presence of dynamic stack pointer realignment, "
897 "non-argument objects cannot be accessed through the frame pointer");
898
Tim Northover3b0846e2014-05-24 12:50:23 +0000899 if (UseFP) {
900 FrameReg = RegInfo->getFrameRegister(MF);
901 return FPOffset;
902 }
903
904 // Use the base pointer if we have one.
905 if (RegInfo->hasBasePointer(MF))
906 FrameReg = RegInfo->getBaseRegister();
907 else {
908 FrameReg = AArch64::SP;
909 // If we're using the red zone for this function, the SP won't actually
910 // be adjusted, so the offsets will be negative. They're also all
911 // within range of the signed 9-bit immediate instructions.
912 if (canUseRedZone(MF))
913 Offset -= AFI->getLocalStackSize();
914 }
915
916 return Offset;
917}
918
919static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
Matthias Braun74a0bd32016-04-13 21:43:16 +0000920 // Do not set a kill flag on values that are also marked as live-in. This
921 // happens with the @llvm-returnaddress intrinsic and with arguments passed in
922 // callee saved registers.
923 // Omitting the kill flags is conservatively correct even if the live-in
924 // is not used after all.
925 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
926 return getKillRegState(!IsLiveIn);
Tim Northover3b0846e2014-05-24 12:50:23 +0000927}
928
Manman Ren57518142016-04-11 21:08:06 +0000929static bool produceCompactUnwindFrame(MachineFunction &MF) {
930 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
Matthias Braunf1caa282017-12-15 22:22:58 +0000931 AttributeList Attrs = MF.getFunction().getAttributes();
Manman Ren57518142016-04-11 21:08:06 +0000932 return Subtarget.isTargetMachO() &&
933 !(Subtarget.getTargetLowering()->supportSwiftError() &&
934 Attrs.hasAttrSomewhere(Attribute::SwiftError));
935}
936
Benjamin Kramerb7d33112016-08-06 11:13:10 +0000937namespace {
Eugene Zelenko11f69072017-01-25 00:29:26 +0000938
Geoff Berry29d4a692016-02-01 19:07:06 +0000939struct RegPairInfo {
Eugene Zelenko11f69072017-01-25 00:29:26 +0000940 unsigned Reg1 = AArch64::NoRegister;
941 unsigned Reg2 = AArch64::NoRegister;
Geoff Berry29d4a692016-02-01 19:07:06 +0000942 int FrameIdx;
943 int Offset;
944 bool IsGPR;
Eugene Zelenko11f69072017-01-25 00:29:26 +0000945
946 RegPairInfo() = default;
947
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000948 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
Geoff Berry29d4a692016-02-01 19:07:06 +0000949};
Eugene Zelenko11f69072017-01-25 00:29:26 +0000950
Benjamin Kramerb7d33112016-08-06 11:13:10 +0000951} // end anonymous namespace
Geoff Berry29d4a692016-02-01 19:07:06 +0000952
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000953static void computeCalleeSaveRegisterPairs(
954 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
955 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
Geoff Berry29d4a692016-02-01 19:07:06 +0000956
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000957 if (CSI.empty())
958 return;
959
960 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Matthias Braun941a7052016-07-28 18:40:00 +0000961 MachineFrameInfo &MFI = MF.getFrameInfo();
Matthias Braunf1caa282017-12-15 22:22:58 +0000962 CallingConv::ID CC = MF.getFunction().getCallingConv();
Tim Northover3b0846e2014-05-24 12:50:23 +0000963 unsigned Count = CSI.size();
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000964 (void)CC;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000965 // MachO's compact unwind format relies on all registers being stored in
966 // pairs.
Manman Ren57518142016-04-11 21:08:06 +0000967 assert((!produceCompactUnwindFrame(MF) ||
Roman Levenstein2792b3f2016-03-10 04:35:09 +0000968 CC == CallingConv::PreserveMost ||
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000969 (Count & 1) == 0) &&
970 "Odd number of callee-saved regs to spill!");
Martin Storsjo68266fa2017-07-13 17:03:12 +0000971 int Offset = AFI->getCalleeSavedStackSize();
972
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000973 for (unsigned i = 0; i < Count; ++i) {
Geoff Berry29d4a692016-02-01 19:07:06 +0000974 RegPairInfo RPI;
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000975 RPI.Reg1 = CSI[i].getReg();
976
977 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
978 AArch64::FPR64RegClass.contains(RPI.Reg1));
979 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
980
981 // Add the next reg to the pair if it is in the same register class.
982 if (i + 1 < Count) {
983 unsigned NextReg = CSI[i + 1].getReg();
984 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
985 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
986 RPI.Reg2 = NextReg;
987 }
Geoff Berry29d4a692016-02-01 19:07:06 +0000988
Tim Northover3b0846e2014-05-24 12:50:23 +0000989 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
990 // list to come in sorted by frame index so that we can issue the store
991 // pair instructions directly. Assert if we see anything otherwise.
992 //
993 // The order of the registers in the list is controlled by
994 // getCalleeSavedRegs(), so they will always be in-order, as well.
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000995 assert((!RPI.isPaired() ||
996 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
Tim Northover3b0846e2014-05-24 12:50:23 +0000997 "Out of order callee saved regs!");
Geoff Berry29d4a692016-02-01 19:07:06 +0000998
Geoff Berryc25d3bd2016-02-12 16:31:41 +0000999 // MachO's compact unwind format relies on all registers being stored in
1000 // adjacent register pairs.
Manman Ren57518142016-04-11 21:08:06 +00001001 assert((!produceCompactUnwindFrame(MF) ||
Roman Levenstein2792b3f2016-03-10 04:35:09 +00001002 CC == CallingConv::PreserveMost ||
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001003 (RPI.isPaired() &&
1004 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1005 RPI.Reg1 + 1 == RPI.Reg2))) &&
1006 "Callee-save registers not saved as adjacent register pair!");
1007
1008 RPI.FrameIdx = CSI[i].getFrameIdx();
1009
1010 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
1011 // Round up size of non-pair to pair size if we need to pad the
1012 // callee-save area to ensure 16-byte alignment.
1013 Offset -= 16;
Matthias Braun941a7052016-07-28 18:40:00 +00001014 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
1015 MFI.setObjectAlignment(RPI.FrameIdx, 16);
Geoff Berry66f6b652016-06-02 16:22:07 +00001016 AFI->setCalleeSaveStackHasFreeSpace(true);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001017 } else
1018 Offset -= RPI.isPaired() ? 16 : 8;
1019 assert(Offset % 8 == 0);
1020 RPI.Offset = Offset / 8;
Geoff Berry29d4a692016-02-01 19:07:06 +00001021 assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
1022 "Offset out of bounds for LDP/STP immediate");
1023
1024 RegPairs.push_back(RPI);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001025 if (RPI.isPaired())
1026 ++i;
Geoff Berry29d4a692016-02-01 19:07:06 +00001027 }
1028}
1029
1030bool AArch64FrameLowering::spillCalleeSavedRegisters(
1031 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1032 const std::vector<CalleeSavedInfo> &CSI,
1033 const TargetRegisterInfo *TRI) const {
1034 MachineFunction &MF = *MBB.getParent();
1035 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1036 DebugLoc DL;
1037 SmallVector<RegPairInfo, 8> RegPairs;
1038
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001039 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
Matthias Braun88c8c982017-05-27 03:38:02 +00001040 const MachineRegisterInfo &MRI = MF.getRegInfo();
Geoff Berry29d4a692016-02-01 19:07:06 +00001041
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001042 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
Geoff Berry29d4a692016-02-01 19:07:06 +00001043 ++RPII) {
1044 RegPairInfo RPI = *RPII;
1045 unsigned Reg1 = RPI.Reg1;
1046 unsigned Reg2 = RPI.Reg2;
1047 unsigned StrOpc;
1048
Geoff Berrya5335642016-05-06 16:34:59 +00001049 // Issue sequence of spills for cs regs. The first spill may be converted
1050 // to a pre-decrement store later by emitPrologue if the callee-save stack
1051 // area allocation can't be combined with the local stack area allocation.
Tim Northover3b0846e2014-05-24 12:50:23 +00001052 // For example:
Geoff Berrya5335642016-05-06 16:34:59 +00001053 // stp x22, x21, [sp, #0] // addImm(+0)
Tim Northover3b0846e2014-05-24 12:50:23 +00001054 // stp x20, x19, [sp, #16] // addImm(+2)
1055 // stp fp, lr, [sp, #32] // addImm(+4)
1056 // Rationale: This sequence saves uop updates compared to a sequence of
1057 // pre-increment spills like stp xi,xj,[sp,#-16]!
Geoff Berry29d4a692016-02-01 19:07:06 +00001058 // Note: Similar rationale and sequence for restores in epilog.
Geoff Berrya5335642016-05-06 16:34:59 +00001059 if (RPI.IsGPR)
1060 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1061 else
1062 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
Francis Visoiu Mistrihc71cced2017-11-30 16:12:24 +00001063 DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001064 if (RPI.isPaired())
Francis Visoiu Mistrihc71cced2017-11-30 16:12:24 +00001065 dbgs() << ", " << printReg(Reg2, TRI);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001066 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1067 if (RPI.isPaired())
1068 dbgs() << ", " << RPI.FrameIdx+1;
1069 dbgs() << ")\n");
Geoff Berry29d4a692016-02-01 19:07:06 +00001070
Tim Northover3b0846e2014-05-24 12:50:23 +00001071 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
Matthias Braun88c8c982017-05-27 03:38:02 +00001072 if (!MRI.isReserved(Reg1))
1073 MBB.addLiveIn(Reg1);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001074 if (RPI.isPaired()) {
Matthias Braun88c8c982017-05-27 03:38:02 +00001075 if (!MRI.isReserved(Reg2))
1076 MBB.addLiveIn(Reg2);
Geoff Berrya5335642016-05-06 16:34:59 +00001077 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
Geoff Berryc3764062016-04-15 15:16:19 +00001078 MIB.addMemOperand(MF.getMachineMemOperand(
1079 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
1080 MachineMemOperand::MOStore, 8, 8));
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001081 }
Geoff Berrya5335642016-05-06 16:34:59 +00001082 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
1083 .addReg(AArch64::SP)
1084 .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
1085 .setMIFlag(MachineInstr::FrameSetup);
Geoff Berryc3764062016-04-15 15:16:19 +00001086 MIB.addMemOperand(MF.getMachineMemOperand(
1087 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
1088 MachineMemOperand::MOStore, 8, 8));
Tim Northover3b0846e2014-05-24 12:50:23 +00001089 }
1090 return true;
1091}
1092
1093bool AArch64FrameLowering::restoreCalleeSavedRegisters(
1094 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
Krzysztof Parzyszekbea30c62017-08-10 16:17:32 +00001095 std::vector<CalleeSavedInfo> &CSI,
Tim Northover3b0846e2014-05-24 12:50:23 +00001096 const TargetRegisterInfo *TRI) const {
1097 MachineFunction &MF = *MBB.getParent();
Eric Christopherfc6de422014-08-05 02:39:49 +00001098 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
Tim Northover3b0846e2014-05-24 12:50:23 +00001099 DebugLoc DL;
Geoff Berry29d4a692016-02-01 19:07:06 +00001100 SmallVector<RegPairInfo, 8> RegPairs;
Tim Northover3b0846e2014-05-24 12:50:23 +00001101
1102 if (MI != MBB.end())
1103 DL = MI->getDebugLoc();
1104
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001105 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
Geoff Berry29d4a692016-02-01 19:07:06 +00001106
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001107 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
Geoff Berry29d4a692016-02-01 19:07:06 +00001108 ++RPII) {
1109 RegPairInfo RPI = *RPII;
1110 unsigned Reg1 = RPI.Reg1;
1111 unsigned Reg2 = RPI.Reg2;
1112
Geoff Berrya5335642016-05-06 16:34:59 +00001113 // Issue sequence of restores for cs regs. The last restore may be converted
1114 // to a post-increment load later by emitEpilogue if the callee-save stack
1115 // area allocation can't be combined with the local stack area allocation.
Tim Northover3b0846e2014-05-24 12:50:23 +00001116 // For example:
1117 // ldp fp, lr, [sp, #32] // addImm(+4)
1118 // ldp x20, x19, [sp, #16] // addImm(+2)
Geoff Berrya5335642016-05-06 16:34:59 +00001119 // ldp x22, x21, [sp, #0] // addImm(+0)
Tim Northover3b0846e2014-05-24 12:50:23 +00001120 // Note: see comment in spillCalleeSavedRegisters()
1121 unsigned LdrOpc;
Geoff Berrya5335642016-05-06 16:34:59 +00001122 if (RPI.IsGPR)
1123 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
1124 else
1125 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
Francis Visoiu Mistrihc71cced2017-11-30 16:12:24 +00001126 DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001127 if (RPI.isPaired())
Francis Visoiu Mistrihc71cced2017-11-30 16:12:24 +00001128 dbgs() << ", " << printReg(Reg2, TRI);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001129 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1130 if (RPI.isPaired())
1131 dbgs() << ", " << RPI.FrameIdx+1;
1132 dbgs() << ")\n");
Tim Northover3b0846e2014-05-24 12:50:23 +00001133
Tim Northover3b0846e2014-05-24 12:50:23 +00001134 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
Geoff Berryc3764062016-04-15 15:16:19 +00001135 if (RPI.isPaired()) {
Geoff Berrya5335642016-05-06 16:34:59 +00001136 MIB.addReg(Reg2, getDefRegState(true));
Geoff Berryc3764062016-04-15 15:16:19 +00001137 MIB.addMemOperand(MF.getMachineMemOperand(
1138 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
1139 MachineMemOperand::MOLoad, 8, 8));
Geoff Berryc3764062016-04-15 15:16:19 +00001140 }
Geoff Berrya5335642016-05-06 16:34:59 +00001141 MIB.addReg(Reg1, getDefRegState(true))
1142 .addReg(AArch64::SP)
1143 .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
1144 .setMIFlag(MachineInstr::FrameDestroy);
Geoff Berryc3764062016-04-15 15:16:19 +00001145 MIB.addMemOperand(MF.getMachineMemOperand(
1146 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
1147 MachineMemOperand::MOLoad, 8, 8));
Tim Northover3b0846e2014-05-24 12:50:23 +00001148 }
1149 return true;
1150}
1151
Matthias Braun02564862015-07-14 17:17:13 +00001152void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
1153 BitVector &SavedRegs,
1154 RegScavenger *RS) const {
1155 // All calls are tail calls in GHC calling conv, and functions have no
1156 // prologue/epilogue.
Matthias Braunf1caa282017-12-15 22:22:58 +00001157 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
Matthias Braun02564862015-07-14 17:17:13 +00001158 return;
1159
1160 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
Tim Northover3b0846e2014-05-24 12:50:23 +00001161 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
Eric Christopherfc6de422014-08-05 02:39:49 +00001162 MF.getSubtarget().getRegisterInfo());
Tim Northover3b0846e2014-05-24 12:50:23 +00001163 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001164 unsigned UnspilledCSGPR = AArch64::NoRegister;
1165 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
Tim Northover3b0846e2014-05-24 12:50:23 +00001166
1167 // The frame record needs to be created by saving the appropriate registers
1168 if (hasFP(MF)) {
Matthias Braun02564862015-07-14 17:17:13 +00001169 SavedRegs.set(AArch64::FP);
1170 SavedRegs.set(AArch64::LR);
Tim Northover3b0846e2014-05-24 12:50:23 +00001171 }
1172
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001173 unsigned BasePointerReg = AArch64::NoRegister;
Tim Northover3b0846e2014-05-24 12:50:23 +00001174 if (RegInfo->hasBasePointer(MF))
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001175 BasePointerReg = RegInfo->getBaseRegister();
Tim Northover3b0846e2014-05-24 12:50:23 +00001176
Matthias Braund78597e2017-04-21 22:42:08 +00001177 unsigned ExtraCSSpill = 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001178 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001179 // Figure out which callee-saved registers to save/restore.
1180 for (unsigned i = 0; CSRegs[i]; ++i) {
1181 const unsigned Reg = CSRegs[i];
Tim Northover3b0846e2014-05-24 12:50:23 +00001182
Geoff Berry7e4ba3d2016-02-19 18:27:32 +00001183 // Add the base pointer register to SavedRegs if it is callee-save.
1184 if (Reg == BasePointerReg)
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001185 SavedRegs.set(Reg);
Tim Northover3b0846e2014-05-24 12:50:23 +00001186
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001187 bool RegUsed = SavedRegs.test(Reg);
1188 unsigned PairedReg = CSRegs[i ^ 1];
1189 if (!RegUsed) {
1190 if (AArch64::GPR64RegClass.contains(Reg) &&
1191 !RegInfo->isReservedReg(MF, Reg)) {
1192 UnspilledCSGPR = Reg;
1193 UnspilledCSGPRPaired = PairedReg;
Tim Northover3b0846e2014-05-24 12:50:23 +00001194 }
1195 continue;
1196 }
1197
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001198 // MachO's compact unwind format relies on all registers being stored in
1199 // pairs.
1200 // FIXME: the usual format is actually better if unwinding isn't needed.
Manman Ren57518142016-04-11 21:08:06 +00001201 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001202 SavedRegs.set(PairedReg);
Geoff Berry74cb7182016-05-16 20:52:28 +00001203 if (AArch64::GPR64RegClass.contains(PairedReg) &&
1204 !RegInfo->isReservedReg(MF, PairedReg))
Matthias Braund78597e2017-04-21 22:42:08 +00001205 ExtraCSSpill = PairedReg;
Tim Northover3b0846e2014-05-24 12:50:23 +00001206 }
Tim Northover3b0846e2014-05-24 12:50:23 +00001207 }
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001208
1209 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
Francis Visoiu Mistrihb52e0362017-05-17 01:07:53 +00001210 for (unsigned Reg : SavedRegs.set_bits())
Francis Visoiu Mistrih9d419d32017-11-28 12:42:37 +00001211 dbgs() << ' ' << printReg(Reg, RegInfo);
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001212 dbgs() << "\n";);
1213
1214 // If any callee-saved registers are used, the frame cannot be eliminated.
1215 unsigned NumRegsSpilled = SavedRegs.count();
1216 bool CanEliminateFrame = NumRegsSpilled == 0;
Tim Northover3b0846e2014-05-24 12:50:23 +00001217
Tim Northover3b0846e2014-05-24 12:50:23 +00001218 // The CSR spill slots have not been allocated yet, so estimateStackSize
1219 // won't include them.
Matthias Braun941a7052016-07-28 18:40:00 +00001220 MachineFrameInfo &MFI = MF.getFrameInfo();
1221 unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
Tim Northover3b0846e2014-05-24 12:50:23 +00001222 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
Kristof Beyls2af1e902017-05-30 06:58:41 +00001223 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
1224 bool BigStack = (CFSize > EstimatedStackSizeLimit);
Tim Northover3b0846e2014-05-24 12:50:23 +00001225 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
1226 AFI->setHasStackFrame(true);
1227
1228 // Estimate if we might need to scavenge a register at some point in order
1229 // to materialize a stack offset. If so, either spill one additional
1230 // callee-saved register or reserve a special spill slot to facilitate
1231 // register scavenging. If we already spilled an extra callee-saved register
1232 // above to keep the number of spills even, we don't need to do anything else
1233 // here.
Matthias Braund78597e2017-04-21 22:42:08 +00001234 if (BigStack) {
1235 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
Francis Visoiu Mistrih9d419d32017-11-28 12:42:37 +00001236 DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
Francis Visoiu Mistrihc71cced2017-11-30 16:12:24 +00001237 << " to get a scratch register.\n");
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001238 SavedRegs.set(UnspilledCSGPR);
1239 // MachO's compact unwind format relies on all registers being stored in
1240 // pairs, so if we need to spill one extra for BigStack, then we need to
1241 // store the pair.
Manman Ren57518142016-04-11 21:08:06 +00001242 if (produceCompactUnwindFrame(MF))
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001243 SavedRegs.set(UnspilledCSGPRPaired);
Matthias Braund78597e2017-04-21 22:42:08 +00001244 ExtraCSSpill = UnspilledCSGPRPaired;
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001245 NumRegsSpilled = SavedRegs.count();
Tim Northover3b0846e2014-05-24 12:50:23 +00001246 }
1247
1248 // If we didn't find an extra callee-saved register to spill, create
1249 // an emergency spill slot.
Matthias Braund78597e2017-04-21 22:42:08 +00001250 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
Krzysztof Parzyszek44e25f32017-04-24 18:55:33 +00001251 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1252 const TargetRegisterClass &RC = AArch64::GPR64RegClass;
1253 unsigned Size = TRI->getSpillSize(RC);
1254 unsigned Align = TRI->getSpillAlignment(RC);
1255 int FI = MFI.CreateStackObject(Size, Align, false);
Tim Northover3b0846e2014-05-24 12:50:23 +00001256 RS->addScavengingFrameIndex(FI);
1257 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
1258 << " as the emergency spill slot.\n");
1259 }
1260 }
Geoff Berry04bf91a2016-02-01 16:29:19 +00001261
Geoff Berryc25d3bd2016-02-12 16:31:41 +00001262 // Round up to register pair alignment to avoid additional SP adjustment
1263 // instructions.
1264 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
Tim Northover3b0846e2014-05-24 12:50:23 +00001265}
Geoff Berry66f6b652016-06-02 16:22:07 +00001266
1267bool AArch64FrameLowering::enableStackSlotScavenging(
1268 const MachineFunction &MF) const {
1269 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1270 return AFI->hasCalleeSaveStackFreeSpace();
1271}