Sort allocas, compute frame pointer in Cfg pass
Split allocas in the entry block into two categories. The first has alignment <= stack alignment and constant size. The second violates one or both of those conditions. Sort both of these lists in descending alignment order and emit. Also, compute the need for a frame pointer during the pass.
BUG=
R=jpp@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/1414343010 .
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 57b1e73..9eb934b 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -201,26 +201,7 @@
if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Var))
Var64On32->initHiLo(this);
- // Figure out which alloca instructions result in storage at known stack frame
- // offsets. If this is true for all alloca instructions, then a stack pointer
- // can still be used instead of a frame pointer, freeing up the frame pointer
- // for normal register allocation. Additionally, for each such alloca, its
- // address could be rematerialized at each use in terms of the stack/frame
- // pointer, saving a stack slot and a load from that stack slot.
- //
- // This simple implementation is limited to alloca instructions at the start
- // of the entry node.
- for (Inst &Instr : getEntryNode()->getInsts()) {
- if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
- if (llvm::isa<Constant>(Alloca->getSizeInBytes())) {
- Alloca->setKnownFrameOffset();
- continue;
- }
- }
- // The first instruction that is not an alloca with a constant size stops
- // the search.
- break;
- }
+ processAllocas();
// The set of translation passes and their order are determined by the
// target.
@@ -470,6 +451,91 @@
getTarget()->lowerArguments();
}
+void Cfg::sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
+ bool IsKnownFrameOffset) {
+ if (Allocas.empty())
+ return;
+ // Sort by decreasing alignment. This does not really matter at the moment,
+ // but will allow compacting stack allocation when we fuse to one alloca.
+ std::sort(Allocas.begin(), Allocas.end(),
+ [](Inst *I1, Inst *I2) {
+ auto *A1 = llvm::dyn_cast<InstAlloca>(I1);
+ auto *A2 = llvm::dyn_cast<InstAlloca>(I2);
+ return A1->getAlignInBytes() > A2->getAlignInBytes();
+ });
+ for (Inst *Instr: Allocas) {
+ auto *Alloca = llvm::cast<InstAlloca>(Instr);
+ // Move the alloca to its sorted position.
+ InstAlloca *NewAlloca = InstAlloca::create(this,
+ Alloca->getSizeInBytes(),
+ Alloca->getAlignInBytes(),
+ Alloca->getDest());
+ if (IsKnownFrameOffset)
+ NewAlloca->setKnownFrameOffset();
+ Insts.push_front(NewAlloca);
+ Alloca->setDeleted();
+ }
+}
+
+void Cfg::processAllocas() {
+ const uint32_t StackAlignment = getTarget()->getStackAlignment();
+ CfgNode *EntryNode = getEntryNode();
+ // Allocas in the entry block that have constant size and alignment less
+ // than or equal to the function's stack alignment.
+ CfgVector<Inst *> FixedAllocas;
+ // Allocas in the entry block that have constant size and alignment greater
+ // than the function's stack alignment.
+ CfgVector<Inst *> AlignedAllocas;
+ // LLVM enforces power of 2 alignment.
+ assert(llvm::isPowerOf2_32(StackAlignment));
+ // Collect the Allocas into the two vectors.
+ bool RequiresFramePointer = false;
+ for (Inst &Instr : EntryNode->getInsts()) {
+ if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
+ if (!llvm::isa<Constant>(Alloca->getSizeInBytes())) {
+ // Variable-sized allocations require a frame pointer.
+ RequiresFramePointer = true;
+ continue;
+ }
+ uint32_t AlignmentParam = Alloca->getAlignInBytes();
+ // For default align=0, set it to the real value 1, to avoid any
+ // bit-manipulation problems below.
+ AlignmentParam = std::max(AlignmentParam, 1u);
+ assert(llvm::isPowerOf2_32(AlignmentParam));
+ if (AlignmentParam > StackAlignment) {
+ // Allocations aligned more than the stack require a frame pointer.
+ RequiresFramePointer = true;
+ AlignedAllocas.push_back(Alloca);
+ }
+ else
+ FixedAllocas.push_back(Alloca);
+ }
+ }
+ // Look for alloca instructions in other blocks
+ for (CfgNode *Node : Nodes) {
+ if (Node == EntryNode)
+ continue;
+ for (Inst &Instr : Node->getInsts()) {
+ if (llvm::isa<InstAlloca>(&Instr)) {
+ // Allocations outside the entry block require a frame pointer.
+ RequiresFramePointer = true;
+ break;
+ }
+ }
+ if (RequiresFramePointer)
+ break;
+ }
+ // Mark the target as requiring a frame pointer.
+ if (RequiresFramePointer)
+ getTarget()->setHasFramePointer();
+ // Add instructions to the head of the entry block in reverse order.
+ InstList &Insts = getEntryNode()->getInsts();
+ // Fixed, large alignment alloca addresses do not have known offset.
+ sortAllocas(AlignedAllocas, Insts, false);
+ // Fixed, small alignment alloca addresses have known offset.
+ sortAllocas(FixedAllocas, Insts, true);
+}
+
void Cfg::doAddressOpt() {
TimerMarker T(TimerStack::TT_doAddressOpt, this);
for (CfgNode *Node : Nodes)
diff --git a/src/IceCfg.h b/src/IceCfg.h
index 6a1a1c8..37c6eb7 100644
--- a/src/IceCfg.h
+++ b/src/IceCfg.h
@@ -185,6 +185,10 @@
void advancedPhiLowering();
void reorderNodes();
void shuffleNodes();
+ void sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
+ bool IsKnownFrameOffset);
+ /// Merge all the fixed-size allocas in the entry block.
+ void processAllocas();
void doAddressOpt();
void doArgLowering();
void doNopInsertion();
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 605562d..f518225 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -201,9 +201,11 @@
virtual IceString getRegName(SizeT RegNum, Type Ty) const = 0;
virtual bool hasFramePointer() const { return false; }
+ virtual void setHasFramePointer() = 0;
virtual SizeT getStackReg() const = 0;
virtual SizeT getFrameOrStackReg() const = 0;
virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
+ virtual uint32_t getStackAlignment() const = 0;
/// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 0761f02..31ef0c0 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -380,6 +380,10 @@
}
}
+uint32_t TargetARM32::getStackAlignment() const {
+ return ARM32_STACK_ALIGNMENT_BYTES;
+}
+
bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
return Br->optimizeBranch(NextNode);
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 922f68c..62a903d 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -81,6 +81,7 @@
return RegisterAliases[Reg];
}
bool hasFramePointer() const override { return UsesFramePointer; }
+ void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegARM32::Reg_sp; }
SizeT getFrameOrStackReg() const override {
return UsesFramePointer ? RegARM32::Reg_fp : RegARM32::Reg_sp;
@@ -92,6 +93,7 @@
// are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3;
}
+ uint32_t getStackAlignment() const override;
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index 5ac0976..3cd1687 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -49,6 +49,7 @@
return RegisterAliases[Reg];
}
bool hasFramePointer() const override { return UsesFramePointer; }
+ void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegMIPS32::Reg_SP; }
SizeT getFrameOrStackReg() const override {
return UsesFramePointer ? RegMIPS32::Reg_FP : RegMIPS32::Reg_SP;
@@ -58,6 +59,10 @@
// are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3;
}
+ uint32_t getStackAlignment() const override {
+ // TODO(sehr): what is the stack alignment?
+ return 1;
+ }
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index 7380d56..1b52aad 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -83,6 +83,7 @@
}
bool hasFramePointer() const override { return IsEbpBasedFrame; }
+ void setHasFramePointer() override { IsEbpBasedFrame = true; }
SizeT getStackReg() const override { return Traits::RegisterSet::Reg_esp; }
SizeT getFrameOrStackReg() const override {
return IsEbpBasedFrame ? Traits::RegisterSet::Reg_ebp
@@ -93,6 +94,9 @@
const uint32_t WordSizeInBytes = typeWidthInBytes(Traits::WordType);
return Utils::applyAlignment(typeWidthInBytes(Ty), WordSizeInBytes);
}
+ uint32_t getStackAlignment() const override {
+ return Traits::X86_STACK_ALIGNMENT_BYTES;
+ }
bool shouldSplitToVariable64On32(Type Ty) const override {
return Traits::Is64Bit ? false : Ty == IceType_i64;
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 2ecb7d3..b22ec64 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -957,7 +957,6 @@
uint32_t Alignment =
std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
- IsEbpBasedFrame = true;
_and(esp, Ctx->getConstantInt32(-Alignment));
}
if (const auto *ConstantTotalSize =