Merge fixed alloca stack adjustments into the prolog
Also removes reliance on lowerAlloca entirely for the fixed allocations.
BUG=
R=jpp@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/1435363002 .
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 6ae12bb..65844b9 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -494,53 +494,44 @@
uint32_t TotalSize = Utils::applyAlignment(CurrentOffset, CombinedAlignment);
// Ensure every alloca was assigned an offset.
assert(Allocas.size() == Offsets.size());
- Variable *BaseVariable = makeVariable(IceType_i32);
- Variable *AllocaDest = BaseVariable;
- // Emit one addition for each alloca after the first.
- for (size_t i = 0; i < Allocas.size(); ++i) {
- auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
- switch (BaseVariableType) {
- case BVT_FramePointer:
- case BVT_UserPointer: {
+
+ switch (BaseVariableType) {
+ case BVT_UserPointer: {
+ Variable *BaseVariable = makeVariable(IceType_i32);
+ for (SizeT i = 0; i < Allocas.size(); ++i) {
+ auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
// Emit a new addition operation to replace the alloca.
Operand *AllocaOffset = Ctx->getConstantInt32(Offsets[i]);
InstArithmetic *Add =
InstArithmetic::create(this, InstArithmetic::Add, Alloca->getDest(),
BaseVariable, AllocaOffset);
Insts.push_front(Add);
- } break;
- case BVT_StackPointer: {
+ Alloca->setDeleted();
+ }
+ Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
+ InstAlloca *CombinedAlloca =
+ InstAlloca::create(this, BaseVariable, AllocaSize, CombinedAlignment);
+ CombinedAlloca->setKnownFrameOffset();
+ Insts.push_front(CombinedAlloca);
+ } break;
+ case BVT_StackPointer:
+ case BVT_FramePointer: {
+ for (SizeT i = 0; i < Allocas.size(); ++i) {
+ auto *Alloca = llvm::cast<InstAlloca>(Allocas[i]);
// Emit a fake definition of the rematerializable variable.
Variable *Dest = Alloca->getDest();
InstFakeDef *Def = InstFakeDef::create(this, Dest);
- Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
+ if (BaseVariableType == BVT_StackPointer)
+ Dest->setRematerializable(getTarget()->getStackReg(), Offsets[i]);
+ else
+ Dest->setRematerializable(getTarget()->getFrameReg(), Offsets[i]);
Insts.push_front(Def);
- } break;
+ Alloca->setDeleted();
}
- Alloca->setDeleted();
- }
- Operand *AllocaSize = Ctx->getConstantInt32(TotalSize);
- switch (BaseVariableType) {
- case BVT_FramePointer: {
- // Adjust the return of the alloca to the top of the returned region.
- AllocaDest = makeVariable(IceType_i32);
- InstArithmetic *Add = InstArithmetic::create(
- this, InstArithmetic::Add, BaseVariable, AllocaDest, AllocaSize);
- Insts.push_front(Add);
+ // Allocate the fixed area in the function prolog.
+ getTarget()->reserveFixedAllocaArea(TotalSize, CombinedAlignment);
} break;
- case BVT_StackPointer: {
- // Emit a fake use to keep the Alloca live.
- InstFakeUse *Use = InstFakeUse::create(this, AllocaDest);
- Insts.push_front(Use);
- } break;
- case BVT_UserPointer:
- break;
}
- // And insert the fused alloca.
- InstAlloca *CombinedAlloca =
- InstAlloca::create(this, AllocaSize, CombinedAlignment, AllocaDest);
- CombinedAlloca->setKnownFrameOffset();
- Insts.push_front(CombinedAlloca);
}
void Cfg::processAllocas(bool SortAndCombine) {
@@ -595,7 +586,7 @@
// Allocas in the entry block that have constant size and alignment greater
// than the function's stack alignment.
CfgVector<Inst *> AlignedAllocas;
- // Maximum alignment used for the dynamic/aligned allocas.
+ // Maximum alignment used by any alloca.
uint32_t MaxAlignment = StackAlignment;
for (Inst &Instr : EntryNode->getInsts()) {
if (auto *Alloca = llvm::dyn_cast<InstAlloca>(&Instr)) {
@@ -623,14 +614,16 @@
// do not have a known offset from either the stack or frame pointer.
// They grow up from a user pointer from an alloca.
sortAndCombineAllocas(AlignedAllocas, MaxAlignment, Insts, BVT_UserPointer);
+ // Fixed size allocas are addressed relative to the frame pointer.
+ sortAndCombineAllocas(FixedAllocas, StackAlignment, Insts,
+ BVT_FramePointer);
+ } else {
+ // Otherwise, fixed size allocas are addressed relative to the stack unless
+ // there are dynamic allocas.
+ const AllocaBaseVariableType BasePointerType =
+ (HasDynamicAllocation ? BVT_FramePointer : BVT_StackPointer);
+ sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
}
- // Otherwise, fixed size allocas are always addressed relative to the stack
- // unless there are dynamic allocas.
- // TODO(sehr): re-enable frame pointer and decrementing addressing.
- AllocaBaseVariableType BasePointerType =
- (HasDynamicAllocation ? BVT_UserPointer : BVT_StackPointer);
- sortAndCombineAllocas(FixedAllocas, MaxAlignment, Insts, BasePointerType);
-
if (!FixedAllocas.empty() || !AlignedAllocas.empty())
// No use calling findRematerializable() unless there is some
// rematerializable alloca instruction to seed it.
diff --git a/src/IceConverter.cpp b/src/IceConverter.cpp
index 89fa2e3..34bdcb9 100644
--- a/src/IceConverter.cpp
+++ b/src/IceConverter.cpp
@@ -586,7 +586,7 @@
uint32_t Align = Inst->getAlignment();
Ice::Variable *Dest = mapValueToIceVar(Inst, Ice::getPointerType());
- return Ice::InstAlloca::create(Func.get(), ByteCount, Align, Dest);
+ return Ice::InstAlloca::create(Func.get(), Dest, ByteCount, Align);
}
Ice::Inst *convertUnreachableInstruction(const UnreachableInst * /*Inst*/) {
diff --git a/src/IceInst.cpp b/src/IceInst.cpp
index 96755eb..bd68e68 100644
--- a/src/IceInst.cpp
+++ b/src/IceInst.cpp
@@ -218,8 +218,8 @@
return true;
}
-InstAlloca::InstAlloca(Cfg *Func, Operand *ByteCount, uint32_t AlignInBytes,
- Variable *Dest)
+InstAlloca::InstAlloca(Cfg *Func, Variable *Dest, Operand *ByteCount,
+ uint32_t AlignInBytes)
: InstHighLevel(Func, Inst::Alloca, 1, Dest), AlignInBytes(AlignInBytes) {
// Verify AlignInBytes is 0 or a power of 2.
assert(AlignInBytes == 0 || llvm::isPowerOf2_32(AlignInBytes));
diff --git a/src/IceInst.h b/src/IceInst.h
index 5e32904..7505924 100644
--- a/src/IceInst.h
+++ b/src/IceInst.h
@@ -246,10 +246,10 @@
InstAlloca &operator=(const InstAlloca &) = delete;
public:
- static InstAlloca *create(Cfg *Func, Operand *ByteCount,
- uint32_t AlignInBytes, Variable *Dest) {
+ static InstAlloca *create(Cfg *Func, Variable *Dest, Operand *ByteCount,
+ uint32_t AlignInBytes) {
return new (Func->allocate<InstAlloca>())
- InstAlloca(Func, ByteCount, AlignInBytes, Dest);
+ InstAlloca(Func, Dest, ByteCount, AlignInBytes);
}
uint32_t getAlignInBytes() const { return AlignInBytes; }
Operand *getSizeInBytes() const { return getSrc(0); }
@@ -259,8 +259,8 @@
static bool classof(const Inst *Inst) { return Inst->getKind() == Alloca; }
private:
- InstAlloca(Cfg *Func, Operand *ByteCount, uint32_t AlignInBytes,
- Variable *Dest);
+ InstAlloca(Cfg *Func, Variable *Dest, Operand *ByteCount,
+ uint32_t AlignInBytes);
const uint32_t AlignInBytes;
bool KnownFrameOffset = false;
diff --git a/src/IceInstX8632.cpp b/src/IceInstX8632.cpp
index 2c2f09c..07d7e54 100644
--- a/src/IceInstX8632.cpp
+++ b/src/IceInstX8632.cpp
@@ -98,17 +98,34 @@
}
}
+namespace {
+static int32_t GetRematerializableOffset(Variable *Var, bool IgnoreStackAdjust,
+ const Ice::TargetX8632 *Target) {
+ int32_t Disp = 0;
+ Disp += Var->getStackOffset();
+ SizeT RegNum = static_cast<SizeT>(Var->getRegNum());
+ if (RegNum == Target->getStackReg()) {
+ if (!IgnoreStackAdjust)
+ Disp += Target->getStackAdjustment();
+ } else if (RegNum == Target->getFrameReg()) {
+ Disp += Target->getFrameFixedAllocaOffset();
+ } else {
+ llvm::report_fatal_error("Unexpected rematerializable register type");
+ }
+ return Disp;
+}
+} // end of anonymous namespace
+
void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
- const ::Ice::TargetLowering *Target = Func->getTarget();
+ const auto *Target = static_cast<const Ice::TargetX8632 *>(Func->getTarget());
// If the base is rematerializable, we need to replace it with the correct
// physical register (esp or ebp), and update the Offset.
int32_t Disp = 0;
if (getBase() && getBase()->isRematerializable()) {
- Disp += getBase()->getStackOffset();
- if (!getIgnoreStackAdjust())
- Disp += Target->getStackAdjustment();
+ Disp +=
+ GetRematerializableOffset(getBase(), getIgnoreStackAdjust(), Target);
}
// The index should never be rematerializable. But if we ever allow it, then
// we should make sure the rematerialization offset is shifted by the Shift
@@ -135,7 +152,7 @@
// TODO(sehr): ConstantRelocatable still needs updating for
// rematerializable base/index and Disp.
assert(Disp == 0);
- CR->emitWithoutPrefix(Func->getTarget());
+ CR->emitWithoutPrefix(Target);
} else {
llvm_unreachable("Invalid offset type for x86 mem operand");
}
@@ -165,10 +182,10 @@
bool Dumped = false;
Str << "[";
int32_t Disp = 0;
+ const auto *Target = static_cast<const Ice::TargetX8632 *>(Func->getTarget());
if (getBase() && getBase()->isRematerializable()) {
- Disp += getBase()->getStackOffset();
- if (!getIgnoreStackAdjust())
- Disp += Func->getTarget()->getStackAdjustment();
+ Disp +=
+ GetRematerializableOffset(getBase(), getIgnoreStackAdjust(), Target);
}
if (getBase()) {
if (Func)
@@ -230,13 +247,12 @@
MachineTraits<TargetX8632>::Address
MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress(
MachineTraits<TargetX8632>::Assembler *Asm,
- const Ice::TargetLowering *Target) const {
+ const Ice::TargetLowering *TargetLowering) const {
int32_t Disp = 0;
+ const auto *Target = static_cast<const Ice::TargetX8632 *>(TargetLowering);
if (getBase() && getBase()->isRematerializable()) {
- Disp += getBase()->getStackOffset();
- if (!getIgnoreStackAdjust()) {
- Disp += Target->getStackAdjustment();
- }
+ Disp +=
+ GetRematerializableOffset(getBase(), getIgnoreStackAdjust(), Target);
}
// The index should never be rematerializable. But if we ever allow it, then
// we should make sure the rematerialization offset is shifted by the Shift
diff --git a/src/IceOperand.cpp b/src/IceOperand.cpp
index e66a995..bb16967 100644
--- a/src/IceOperand.cpp
+++ b/src/IceOperand.cpp
@@ -487,7 +487,8 @@
Target->emit(this);
}
-void ConstantRelocatable::emitWithoutPrefix(TargetLowering *Target) const {
+void ConstantRelocatable::emitWithoutPrefix(
+ const TargetLowering *Target) const {
Target->emitWithoutPrefix(this);
}
diff --git a/src/IceOperand.h b/src/IceOperand.h
index 9d9f497..0cde5b2 100644
--- a/src/IceOperand.h
+++ b/src/IceOperand.h
@@ -290,7 +290,7 @@
bool getSuppressMangling() const { return SuppressMangling; }
using Constant::emit;
void emit(TargetLowering *Target) const final;
- void emitWithoutPrefix(TargetLowering *Target) const;
+ void emitWithoutPrefix(const TargetLowering *Target) const;
using Constant::dump;
void dump(const Cfg *Func, Ostream &Str) const override;
diff --git a/src/IceTargetLowering.cpp b/src/IceTargetLowering.cpp
index 94821ce..92223c9 100644
--- a/src/IceTargetLowering.cpp
+++ b/src/IceTargetLowering.cpp
@@ -368,7 +368,13 @@
VarList SpilledVariables;
for (Variable *Var : Variables) {
if (Var->hasReg()) {
- RegsUsed[Var->getRegNum()] = true;
+ // Don't consider a rematerializable variable to be an actual register use
+ // (specifically of the frame pointer). Otherwise, the prolog may decide
+ // to save the frame pointer twice - once because of the explicit need for
+ // a frame pointer, and once because of an active use of a callee-save
+ // register.
+ if (!Var->isRematerializable())
+ RegsUsed[Var->getRegNum()] = true;
continue;
}
// An argument either does not need a stack slot (if passed in a register)
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index aff423c..c613d99 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -203,9 +203,12 @@
virtual bool hasFramePointer() const { return false; }
virtual void setHasFramePointer() = 0;
virtual SizeT getStackReg() const = 0;
+ virtual SizeT getFrameReg() const = 0;
virtual SizeT getFrameOrStackReg() const = 0;
virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
virtual uint32_t getStackAlignment() const = 0;
+ virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
+ virtual int32_t getFrameFixedAllocaOffset() const = 0;
/// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index 969e17a..20f3597 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -86,8 +86,9 @@
bool hasFramePointer() const override { return UsesFramePointer; }
void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegARM32::Reg_sp; }
+ SizeT getFrameReg() const override { return RegARM32::Reg_fp; }
SizeT getFrameOrStackReg() const override {
- return UsesFramePointer ? RegARM32::Reg_fp : RegARM32::Reg_sp;
+ return UsesFramePointer ? getFrameReg() : getStackReg();
}
SizeT getReservedTmpReg() const { return RegARM32::Reg_ip; }
@@ -97,6 +98,17 @@
return (typeWidthInBytes(Ty) + 3) & ~3;
}
uint32_t getStackAlignment() const override;
+ void reserveFixedAllocaArea(size_t Size, size_t Align) override {
+ // TODO(sehr,jpp): Implement fixed stack layout.
+ (void)Size;
+ (void)Align;
+ llvm::report_fatal_error("Not yet implemented");
+ }
+ int32_t getFrameFixedAllocaOffset() const override {
+ // TODO(sehr,jpp): Implement fixed stack layout.
+ llvm::report_fatal_error("Not yet implemented");
+ return 0;
+ }
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
diff --git a/src/IceTargetLoweringMIPS32.h b/src/IceTargetLoweringMIPS32.h
index c01c6c2..1f91eee 100644
--- a/src/IceTargetLoweringMIPS32.h
+++ b/src/IceTargetLoweringMIPS32.h
@@ -54,8 +54,9 @@
bool hasFramePointer() const override { return UsesFramePointer; }
void setHasFramePointer() override { UsesFramePointer = true; }
SizeT getStackReg() const override { return RegMIPS32::Reg_SP; }
+ SizeT getFrameReg() const override { return RegMIPS32::Reg_FP; }
SizeT getFrameOrStackReg() const override {
- return UsesFramePointer ? RegMIPS32::Reg_FP : RegMIPS32::Reg_SP;
+ return UsesFramePointer ? getFrameReg() : getStackReg();
}
size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
@@ -66,6 +67,17 @@
// TODO(sehr): what is the stack alignment?
return 1;
}
+ void reserveFixedAllocaArea(size_t Size, size_t Align) override {
+ // TODO(sehr): Implement fixed stack layout.
+ (void)Size;
+ (void)Align;
+ llvm::report_fatal_error("Not yet implemented");
+ }
+ int32_t getFrameFixedAllocaOffset() const override {
+ // TODO(sehr): Implement fixed stack layout.
+ llvm::report_fatal_error("Not yet implemented");
+ return 0;
+ }
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 27f9ae0..e8a86d8 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -506,20 +506,36 @@
SpillAreaSizeBytes = StackSize - StackOffset;
}
- // Generate "sub esp, SpillAreaSizeBytes"
- if (SpillAreaSizeBytes)
+ // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
+ // fixed allocations in the prolog.
+ if (PrologEmitsFixedAllocas)
+ SpillAreaSizeBytes += FixedAllocaSizeBytes;
+ if (SpillAreaSizeBytes) {
+ // Generate "sub esp, SpillAreaSizeBytes"
_sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(SpillAreaSizeBytes));
+ // If the fixed allocas are aligned more than the stack frame, align the
+ // stack pointer accordingly.
+ if (PrologEmitsFixedAllocas &&
+ FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
+ assert(IsEbpBasedFrame);
+ _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
+ Ctx->getConstantInt32(-FixedAllocaAlignBytes));
+ }
+ }
- // Account for alloca instructions with known frame offsets.
- SpillAreaSizeBytes += FixedAllocaSizeBytes;
+ // Account for known-frame-offset alloca instructions that were not already
+ // combined into the prolog.
+ if (!PrologEmitsFixedAllocas)
+ SpillAreaSizeBytes += FixedAllocaSizeBytes;
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
// Initialize the stack adjustment so that after all the known-frame-offset
// alloca instructions are emitted, the stack adjustment will reach zero.
resetStackAdjustment();
- updateStackAdjustment(-FixedAllocaSizeBytes);
+ if (!PrologEmitsFixedAllocas)
+ updateStackAdjustment(-FixedAllocaSizeBytes);
// Fill in stack offsets for stack args, and copy args into registers for
// those that were register-allocated. Args are pushed right to left, so
@@ -539,11 +555,14 @@
++NumXmmArgs;
continue;
}
- // For esp-based frames, the esp value may not stabilize to its home value
- // until after all the fixed-size alloca instructions have executed. In
- // this case, a stack adjustment is needed when accessing in-args in order
- // to copy them into registers.
- size_t StackAdjBytes = IsEbpBasedFrame ? 0 : -FixedAllocaSizeBytes;
+ // For esp-based frames where the allocas are done outside the prolog, the
+ // esp value may not stabilize to its home value until after all the
+ // fixed-size alloca instructions have executed. In this case, a stack
+ // adjustment is needed when accessing in-args in order to copy them into
+ // registers.
+ size_t StackAdjBytes = 0;
+ if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
+ StackAdjBytes -= FixedAllocaSizeBytes;
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
InArgsSizeBytes);
}
diff --git a/src/IceTargetLoweringX86Base.h b/src/IceTargetLoweringX86Base.h
index d23e35b..90bbed2 100644
--- a/src/IceTargetLoweringX86Base.h
+++ b/src/IceTargetLoweringX86Base.h
@@ -89,9 +89,9 @@
bool hasFramePointer() const override { return IsEbpBasedFrame; }
void setHasFramePointer() override { IsEbpBasedFrame = true; }
SizeT getStackReg() const override { return Traits::RegisterSet::Reg_esp; }
+ SizeT getFrameReg() const override { return Traits::RegisterSet::Reg_ebp; }
SizeT getFrameOrStackReg() const override {
- return IsEbpBasedFrame ? Traits::RegisterSet::Reg_ebp
- : Traits::RegisterSet::Reg_esp;
+ return IsEbpBasedFrame ? getFrameReg() : getStackReg();
}
size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of WordType bytes.
@@ -101,6 +101,16 @@
uint32_t getStackAlignment() const override {
return Traits::X86_STACK_ALIGNMENT_BYTES;
}
+ void reserveFixedAllocaArea(size_t Size, size_t Align) override {
+ FixedAllocaSizeBytes = Size;
+ assert(llvm::isPowerOf2_32(Align));
+ FixedAllocaAlignBytes = Align;
+ PrologEmitsFixedAllocas = true;
+ }
+ /// Returns the (negative) offset from ebp/rbp where the fixed Allocas start.
+ int32_t getFrameFixedAllocaOffset() const override {
+ return FixedAllocaSizeBytes - SpillAreaSizeBytes;
+ }
bool shouldSplitToVariable64On32(Type Ty) const override {
return Traits::Is64Bit ? false : Ty == IceType_i64;
@@ -691,6 +701,8 @@
bool NeedsStackAlignment = false;
size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
+ size_t FixedAllocaAlignBytes = 0;
+ bool PrologEmitsFixedAllocas = false;
static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet;
static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM>
RegisterAliases;
diff --git a/src/PNaClTranslator.cpp b/src/PNaClTranslator.cpp
index 755971d..cff46e7 100644
--- a/src/PNaClTranslator.cpp
+++ b/src/PNaClTranslator.cpp
@@ -2626,7 +2626,7 @@
return;
}
CurrentNode->appendInst(Ice::InstAlloca::create(
- Func.get(), ByteCount, Alignment, getNextInstVar(PtrTy)));
+ Func.get(), getNextInstVar(PtrTy), ByteCount, Alignment));
return;
}
case naclbitc::FUNC_CODE_INST_LOAD: {
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index 53e1842..7b08afb 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -6,7 +6,8 @@
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -Om1 -allow-externally-defined-symbols \
-; RUN: | %if --need=target_X8632 --command FileCheck %s
+; RUN: | %if --need=target_X8632 --command FileCheck \
+; RUN: --check-prefix CHECK-OPTM1 %s
; TODO(jvoung): Stop skipping unimplemented parts (via --skip-unimplemented)
; once enough infrastructure is in. Also, switch to --filetype=obj
@@ -33,11 +34,18 @@
ret void
}
; CHECK-LABEL: fixed_416_align_16
-; CHECK: sub esp,0x1a0
+; CHECK: sub esp,0x1ac
; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1
+; CHECK-OPTM1-LABEL: fixed_416_align_16
+; CHECK-OPTM1: sub esp,0xc
+; CHECK-OPTM1: sub esp,0x1a0
+; CHECK-OPTM1: sub esp,0x10
+; CHECK-OPTM1: mov DWORD PTR [esp],eax
+; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
+
; ARM32-LABEL: fixed_416_align_16
; ARM32: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
@@ -52,8 +60,8 @@
; CHECK-LABEL: fixed_416_align_32
; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp
+; CHECK: sub esp,0x1a8
; CHECK: and esp,0xffffffe0
-; CHECK: sub esp,0x1a0
; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1
@@ -72,11 +80,16 @@
ret void
}
; CHECK-LABEL: fixed_351_align_16
-; CHECK: sub esp,0x160
-; CHECK: sub esp,0x10
+; CHECK: sub esp,0x16c
; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1
+; CHECK-OPTM1-LABEL: fixed_351_align_16
+; CHECK-OPTM1: sub esp,0xc
+; CHECK-OPTM1: sub esp,0x160
+; CHECK-OPTM1: mov DWORD PTR [esp],eax
+; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
+
; ARM32-LABEL: fixed_351_align_16
; ARM32: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
@@ -91,8 +104,8 @@
; CHECK-LABEL: fixed_351_align_32
; CHECK: push ebp
; CHECK-NEXT: mov ebp,esp
+; CHECK: sub esp,0x168
; CHECK: and esp,0xffffffe0
-; CHECK: sub esp,0x160
; CHECK: sub esp,0x10
; CHECK: mov DWORD PTR [esp],eax
; CHECK: call {{.*}} R_{{.*}} f1
diff --git a/tests_lit/llvm2ice_tests/fused-alloca-arg.ll b/tests_lit/llvm2ice_tests/fused-alloca-arg.ll
index f1e061c..5d9ec94 100644
--- a/tests_lit/llvm2ice_tests/fused-alloca-arg.ll
+++ b/tests_lit/llvm2ice_tests/fused-alloca-arg.ll
@@ -17,10 +17,8 @@
}
; CHECK-LABEL: caller1
-; CHECK-NEXT: sub esp,0xc
-; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
-; CHECK-NEXT: sub esp,0x20
-; CHECK-NEXT: mov ecx,esp
+; CHECK-NEXT: sub esp,0x2c
+; CHECK-NEXT: mov eax,DWORD PTR [esp+0x30]
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: sub esp,0x20
; CHECK-NEXT: mov DWORD PTR [esp],eax
@@ -51,10 +49,8 @@
}
; CHECK-LABEL: caller2
-; CHECK-NEXT: sub esp,0xc
-; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
-; CHECK-NEXT: sub esp,0x40
-; CHECK-NEXT: mov ecx,esp
+; CHECK-NEXT: sub esp,0x4c
+; CHECK-NEXT: mov eax,DWORD PTR [esp+0x50]
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x20],eax
; CHECK-NEXT: sub esp,0x20
diff --git a/tests_lit/llvm2ice_tests/fused-alloca.ll b/tests_lit/llvm2ice_tests/fused-alloca.ll
index bf80fcd..8daec9c 100644
--- a/tests_lit/llvm2ice_tests/fused-alloca.ll
+++ b/tests_lit/llvm2ice_tests/fused-alloca.ll
@@ -19,14 +19,12 @@
ret void
}
; CHECK-LABEL: fused_small_align
-; CHECK-NEXT: sub esp,0xc
-; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x30
-; CHECK-NEXT: mov {{.*}},esp
+; CHECK-NEXT: mov eax,DWORD PTR [esp+0x34]
; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x18],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax
-; CHECK-NEXT: add esp,0x3c
+; CHECK-NEXT: add esp,0x30
; Test that a sequence of allocas with greater than stack alignment get fused.
define internal void @fused_large_align(i32 %arg) {
@@ -45,11 +43,9 @@
; CHECK-LABEL: fused_large_align
; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp
-; CHECK-NEXT: sub esp,0x8
-; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
-; CHECK-NEXT: and esp,0xffffffc0
; CHECK-NEXT: sub esp,0x80
-; CHECK-NEXT: mov ecx,esp
+; CHECK-NEXT: and esp,0xffffffc0
+; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: mov DWORD PTR [esp+0x40],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x60],eax
@@ -80,13 +76,88 @@
br label %block1
}
; CHECK-LABEL: fused_derived
-; CHECK-NEXT: sub esp,0xc
-; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x180
-; CHECK-NEXT: mov {{.*}},esp
+; CHECK-NEXT: mov [[ARG:e..]],DWORD PTR [esp+0x184]
; CHECK-NEXT: jmp
; CHECK-NEXT: mov DWORD PTR [esp+0x80],[[ARG]]
; CHECK-NEXT: mov DWORD PTR [esp+0x8c],[[ARG]]
; CHECK-NEXT: lea eax,[esp+0x81]
-; CHECK-NEXT: add esp,0x18c
+; CHECK-NEXT: add esp,0x180
; CHECK-NEXT: ret
+
+; Test that a fixed alloca gets referenced by the frame pointer.
+define internal void @fused_small_align_with_dynamic(i32 %arg) {
+entry:
+ %a1 = alloca i8, i32 8, align 16
+ br label %next
+next:
+ %a2 = alloca i8, i32 12, align 1
+ %a3 = alloca i8, i32 16, align 1
+ %p1 = bitcast i8* %a1 to i32*
+ %p2 = bitcast i8* %a2 to i32*
+ %p3 = bitcast i8* %a3 to i32*
+ store i32 %arg, i32* %p1, align 1
+ store i32 %arg, i32* %p2, align 1
+ store i32 %arg, i32* %p3, align 1
+ ret void
+}
+; CHECK-LABEL: fused_small_align_with_dynamic
+; CHECK-NEXT: push ebp
+; CHECK-NEXT: mov ebp,esp
+; CHECK-NEXT: sub esp,0x18
+; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
+; CHECK-NEXT: sub esp,0x10
+; CHECK-NEXT: mov ecx,esp
+; CHECK-NEXT: sub esp,0x10
+; CHECK-NEXT: mov edx,esp
+; CHECK-NEXT: mov DWORD PTR [ebp-0x18],eax
+; CHECK-NEXT: mov DWORD PTR [ecx],eax
+; CHECK-NEXT: mov DWORD PTR [edx],eax
+; CHECK-NEXT: mov esp,ebp
+; CHECK-NEXT: pop ebp
+
+; Test that a sequence with greater than stack alignment and dynamic size
+; get folded and referenced correctly;
+
+define internal void @fused_large_align_with_dynamic(i32 %arg) {
+entry:
+ %a1 = alloca i8, i32 8, align 32
+ %a2 = alloca i8, i32 12, align 32
+ %a3 = alloca i8, i32 16, align 1
+ %a4 = alloca i8, i32 16, align 1
+ br label %next
+next:
+ %a5 = alloca i8, i32 16, align 1
+ %p1 = bitcast i8* %a1 to i32*
+ %p2 = bitcast i8* %a2 to i32*
+ %p3 = bitcast i8* %a3 to i32*
+ %p4 = bitcast i8* %a4 to i32*
+ %p5 = bitcast i8* %a5 to i32*
+ store i32 %arg, i32* %p1, align 1
+ store i32 %arg, i32* %p2, align 1
+ store i32 %arg, i32* %p3, align 1
+ store i32 %arg, i32* %p4, align 1
+ store i32 %arg, i32* %p5, align 1
+ ret void
+}
+; CHECK-LABEL: fused_large_align_with_dynamic
+; CHECK-NEXT: push ebx
+; CHECK-NEXT: push ebp
+; CHECK-NEXT: mov ebp,esp
+; CHECK-NEXT: sub esp,0x64
+; CHECK-NEXT: mov eax,DWORD PTR [ebp+0xc]
+; CHECK-NEXT: and esp,0xffffffe0
+; CHECK-NEXT: sub esp,0x40
+; CHECK-NEXT: mov ecx,esp
+; CHECK-NEXT: mov edx,ecx
+; CHECK-NEXT: add edx,0x20
+; CHECK-NEXT: add ecx,0x0
+; CHECK-NEXT: sub esp,0x10
+; CHECK-NEXT: mov ebx,esp
+; CHECK-NEXT: mov DWORD PTR [ecx],eax
+; CHECK-NEXT: mov DWORD PTR [edx],eax
+; CHECK-NEXT: mov DWORD PTR [ebp-0x14],eax
+; CHECK-NEXT: mov DWORD PTR [ebp-0x24],eax
+; CHECK-NEXT: mov DWORD PTR [ebx],eax
+; CHECK-NEXT: mov esp,ebp
+; CHECK-NEXT: pop ebp