ARM: fold prologue/epilogue sp updates into push/pop for code size
ARM prologues usually look like:
push {r7, lr}
sub sp, sp, #4
If code size is extremely important, this can be optimised to the single
instruction:
push {r6, r7, lr}
where we don't actually care about the contents of r6, but pushing it subtracts
4 from sp as a side effect.
This should implement such a conversion, predicated on the "minsize" function
attribute (-Oz) since I've yet to find any code it actually makes faster.
llvm-svn: 194264
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index cfaa792..d921c82 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -164,11 +164,17 @@
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
NumBytes = DPRCSOffset;
+ int FramePtrOffsetInBlock = 0;
+ if (tryFoldSPUpdateIntoPushPop(MF, prior(MBBI), NumBytes)) {
+ FramePtrOffsetInBlock = NumBytes;
+ NumBytes = 0;
+ }
+
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
- int FramePtrOffset = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size;
+ FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size;
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addReg(ARM::SP).addImm(FramePtrOffset / 4)
+ .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
.setMIFlags(MachineInstr::FrameSetup));
if (NumBytes > 508)
// If offset is > 508 then sp cannot be adjusted in a single instruction,
@@ -292,8 +298,9 @@
&MBB.front() != MBBI &&
prior(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = prior(MBBI);
- emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
- } else
+ if (!tryFoldSPUpdateIntoPushPop(MF, PMBBI, NumBytes))
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else if (!tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
}
}