Subzero. ARM32. Combine allocas.
BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org
Review URL: https://codereview.chromium.org/1465213002 .
diff --git a/src/IceCfg.cpp b/src/IceCfg.cpp
index 3fe37f3..f8d40a5 100644
--- a/src/IceCfg.cpp
+++ b/src/IceCfg.cpp
@@ -484,8 +484,14 @@
} else {
// Addressing is relative to the stack pointer or to a user pointer. Add
// the offset before adding the size of the object, because it grows
- // upwards from the stack pointer.
- Offsets.push_back(CurrentOffset);
+ // upwards from the stack pointer. In addition, if the addressing is
+ // relative to the stack pointer, we need to add the pre-computed max out
+ // args size bytes.
+ const uint32_t OutArgsOffsetOrZero =
+ (BaseVariableType == BVT_StackPointer)
+ ? getTarget()->maxOutArgsSizeBytes()
+ : 0;
+ Offsets.push_back(CurrentOffset + OutArgsOffsetOrZero);
}
// Update the running offset of the fused alloca region.
CurrentOffset += Size;
diff --git a/src/IceTargetLowering.h b/src/IceTargetLowering.h
index 1350afa..bac403f 100644
--- a/src/IceTargetLowering.h
+++ b/src/IceTargetLowering.h
@@ -211,6 +211,7 @@
virtual uint32_t getStackAlignment() const = 0;
virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
virtual int32_t getFrameFixedAllocaOffset() const = 0;
+ virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
/// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 12810f6..f23609b 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -265,7 +265,7 @@
}
void TargetARM32::findMaxStackOutArgsSize() {
- // MinNeededOutArgsBytes should be updated if the Target ever creates an
+ // MinNeededOutArgsBytes should be updated if the Target ever creates a
// high-level InstCall that requires more stack bytes.
constexpr size_t MinNeededOutArgsBytes = 0;
MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
@@ -291,7 +291,7 @@
findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack.
- static constexpr bool SortAndCombineAllocas = false;
+ static constexpr bool SortAndCombineAllocas = true;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
@@ -356,6 +356,7 @@
regAlloc(RAK_Global);
if (Func->hasError())
return;
+
copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
Func->dump("After linear scan regalloc");
@@ -364,6 +365,8 @@
Func->dump("After advanced Phi lowering");
}
+ ForbidTemporaryWithoutReg _(this);
+
// Stack frame mapping.
Func->genFrame();
if (Func->hasError())
@@ -399,8 +402,8 @@
findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack.
- static constexpr bool SortAndCombineAllocas = false;
- Func->processAllocas(SortAndCombineAllocas);
+ static constexpr bool DontSortAndCombineAllocas = false;
+ Func->processAllocas(DontSortAndCombineAllocas);
Func->dump("After Alloca processing");
Func->placePhiLoads();
@@ -424,9 +427,12 @@
regAlloc(RAK_InfOnly);
if (Func->hasError())
return;
+
copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
Func->dump("After regalloc of infinite-weight variables");
+ ForbidTemporaryWithoutReg _(this);
+
Func->genFrame();
if (Func->hasError())
return;
@@ -520,6 +526,7 @@
llvm::report_fatal_error(
"Infinite-weight Variable has no register assigned");
}
+ assert(!Var->isRematerializable());
int32_t Offset = Var->getStackOffset();
int32_t BaseRegNum = Var->getBaseRegNum();
if (BaseRegNum == Variable::NoRegister) {
@@ -850,6 +857,9 @@
SpillAreaSizeBytes = StackSize - StackOffset;
}
+ // Combine fixed alloca with SpillAreaSize.
+ SpillAreaSizeBytes += FixedAllocaSizeBytes;
+
// Generate "sub sp, SpillAreaSizeBytes"
if (SpillAreaSizeBytes) {
// Use the scratch register if needed to legalize the immediate.
@@ -857,7 +867,11 @@
Legal_Reg | Legal_Flex, getReservedTmpReg());
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
_sub(SP, SP, SubAmount);
+ if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
+ alignRegisterPow2(SP, FixedAllocaAlignBytes);
+ }
}
+
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
// Fill in stack offsets for stack args, and copy args into registers for
@@ -1034,6 +1048,7 @@
Variable *OrigBaseReg,
Variable **NewBaseReg,
int32_t *NewBaseOffset) {
+ assert(!OrigBaseReg->isRematerializable());
if (isLegalMemOffset(Ty, Offset)) {
return OperandARM32Mem::create(
Func, Ty, OrigBaseReg,
@@ -1053,6 +1068,7 @@
OffsetDiff = 0;
}
+ assert(!(*NewBaseReg)->isRematerializable());
return OperandARM32Mem::create(
Func, Ty, *NewBaseReg,
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),
@@ -1076,8 +1092,9 @@
bool Legalized = false;
if (!Dest->hasReg()) {
- auto *const SrcR = llvm::cast<Variable>(Src);
+ auto *SrcR = llvm::cast<Variable>(Src);
assert(SrcR->hasReg());
+ assert(!SrcR->isRematerializable());
const int32_t Offset = Dest->getStackOffset();
// This is a _mov(Mem(), Variable), i.e., a store.
_str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
@@ -1087,12 +1104,26 @@
Context.insert(InstFakeDef::create(Func, Dest));
Legalized = true;
} else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
- if (!Var->hasReg()) {
- const int32_t Offset = Var->getStackOffset();
- _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
- NewBaseOffset),
- MovInstr->getPredicate());
+ if (Var->isRematerializable()) {
+ // Rematerialization arithmetic.
+ const int32_t ExtraOffset =
+ (static_cast<SizeT>(Var->getRegNum()) == getFrameReg())
+ ? getFrameFixedAllocaOffset()
+ : 0;
+
+ const int32_t Offset = Var->getStackOffset() + ExtraOffset;
+ Operand *OffsetRF = legalize(Ctx->getConstantInt32(Offset),
+ Legal_Reg | Legal_Flex, Dest->getRegNum());
+ _add(Dest, Var, OffsetRF);
Legalized = true;
+ } else {
+ if (!Var->hasReg()) {
+ const int32_t Offset = Var->getStackOffset();
+ _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
+ NewBaseOffset),
+ MovInstr->getPredicate());
+ Legalized = true;
+ }
}
}
@@ -1163,13 +1194,15 @@
// increment) in case of duplication.
assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
Mem->getAddrMode() == OperandARM32Mem::NegOffset);
+ Variable *BaseR = legalizeToReg(Mem->getBase());
if (Mem->isRegReg()) {
- return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
- Mem->getIndex(), Mem->getShiftOp(),
- Mem->getShiftAmt(), Mem->getAddrMode());
+ Variable *IndexR = legalizeToReg(Mem->getIndex());
+ return OperandARM32Mem::create(Func, IceType_i32, BaseR, IndexR,
+ Mem->getShiftOp(), Mem->getShiftAmt(),
+ Mem->getAddrMode());
} else {
- return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
- Mem->getOffset(), Mem->getAddrMode());
+ return OperandARM32Mem::create(Func, IceType_i32, BaseR, Mem->getOffset(),
+ Mem->getAddrMode());
}
}
llvm_unreachable("Unsupported operand type");
@@ -1201,7 +1234,9 @@
Variable *NewBase = Func->makeVariable(Base->getType());
lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
Base, Four));
- return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
+ Variable *BaseR = legalizeToReg(NewBase);
+ Variable *IndexR = legalizeToReg(Mem->getIndex());
+ return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
Mem->getShiftOp(), Mem->getShiftAmt(),
Mem->getAddrMode());
} else {
@@ -1216,16 +1251,17 @@
// mode into a RegReg addressing mode. Since NaCl sandboxing disallows
// RegReg addressing modes, prefer adding to base and replacing
// instead. Thus we leave the old offset alone.
- Constant *Four = Ctx->getConstantInt32(4);
+ Constant *_4 = Ctx->getConstantInt32(4);
Variable *NewBase = Func->makeVariable(Base->getType());
lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
- NewBase, Base, Four));
+ NewBase, Base, _4));
Base = NewBase;
} else {
Offset =
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
}
- return OperandARM32Mem::create(Func, SplitType, Base, Offset,
+ Variable *BaseR = legalizeToReg(Base);
+ return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
Mem->getAddrMode());
}
}
@@ -1264,7 +1300,6 @@
}
void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
- UsesFramePointer = true;
// Conservatively require the stack to be aligned. Some stack adjustment
// operations implemented below assume that the stack is aligned before the
// alloca. All the alloca code ensures that the stack alignment is preserved
@@ -1272,29 +1307,53 @@
// cases.
NeedsStackAlignment = true;
- // TODO(stichnot): minimize the number of adjustments of SP, etc.
- Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
- Variable *Dest = Inst->getDest();
- uint32_t AlignmentParam = Inst->getAlignInBytes();
// For default align=0, set it to the real value 1, to avoid any
// bit-manipulation problems below.
- AlignmentParam = std::max(AlignmentParam, 1u);
+ const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes());
// LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(AlignmentParam));
assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
- uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
- if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
+ const uint32_t Alignment =
+ std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
+ const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
+ const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
+ const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();
+ const bool UseFramePointer =
+ hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
+
+ if (UseFramePointer)
+ setHasFramePointer();
+
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
+ if (OverAligned) {
alignRegisterPow2(SP, Alignment);
}
+
+ Variable *Dest = Inst->getDest();
Operand *TotalSize = Inst->getSizeInBytes();
+
if (const auto *ConstantTotalSize =
llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
- uint32_t Value = ConstantTotalSize->getValue();
- Value = Utils::applyAlignment(Value, Alignment);
- Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
- _sub(SP, SP, SubAmount);
+ const uint32_t Value =
+ Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
+ // Constant size alloca.
+ if (!UseFramePointer) {
+ // If we don't need a Frame Pointer, this alloca has a known offset to the
+ // stack pointer. We don't need adjust the stack pointer, nor assign any
+ // value to Dest, as Dest is rematerializable.
+ assert(Dest->isRematerializable());
+ FixedAllocaSizeBytes += Value;
+ Context.insert(InstFakeDef::create(Func, Dest));
+ return;
+ }
+
+ // If a frame pointer is required, then we need to store the alloca'd result
+ // in Dest.
+ Operand *SubAmountRF =
+ legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
+ _sub(SP, SP, SubAmountRF);
} else {
// Non-constant sizes need to be adjusted to the next highest multiple of
// the required alignment at runtime.
@@ -1306,6 +1365,8 @@
alignRegisterPow2(T, Alignment);
_sub(SP, SP, T);
}
+
+ // Adds back a few bytes to SP to account for the out args area.
Variable *T = SP;
if (MaxOutArgsSizeBytes != 0) {
T = makeReg(getPointerType());
@@ -1313,6 +1374,7 @@
Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
_add(T, SP, OutArgsSizeRF);
}
+
_mov(Dest, T);
}
@@ -1976,6 +2038,12 @@
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
+
+ if (Dest->isRematerializable()) {
+ Context.insert(InstFakeDef::create(Func, Dest));
+ return;
+ }
+
if (Dest->getType() == IceType_i1) {
lowerInt1Arithmetic(Inst);
return;
@@ -2139,8 +2207,8 @@
if (Srcs.hasConstOperand()) {
// TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
// to be used.
- Variable *Src0R = Srcs.src0R(this);
if (Srcs.immediateIsFlexEncodable()) {
+ Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this);
if (Srcs.swappedOperands()) {
_rsb(T, Src0R, Src1RF);
@@ -2151,6 +2219,7 @@
return;
}
if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
+ Variable *Src0R = Srcs.src0R(this);
Operand *Src1F = Srcs.negatedSrc1F(this);
_add(T, Src0R, Src1F);
_mov(Dest, T);
@@ -2215,6 +2284,12 @@
void TargetARM32::lowerAssign(const InstAssign *Inst) {
Variable *Dest = Inst->getDest();
+
+ if (Dest->isRematerializable()) {
+ Context.insert(InstFakeDef::create(Func, Dest));
+ return;
+ }
+
Operand *Src0 = Inst->getSrc(0);
assert(Dest->getType() == Src0->getType());
if (Dest->getType() == IceType_i64) {
@@ -4425,13 +4500,17 @@
assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
: (ValidImmMask & OffsetImm) == OffsetImm);
+ Variable *BaseR = makeReg(getPointerType());
+ Context.insert(InstAssign::create(Func, BaseR, BaseVar));
if (OffsetReg != nullptr) {
- return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetReg, ShiftKind,
+ Variable *OffsetR = makeReg(getPointerType());
+ Context.insert(InstAssign::create(Func, OffsetR, OffsetReg));
+ return OperandARM32Mem::create(Func, Ty, BaseR, OffsetR, ShiftKind,
OffsetRegShamt);
}
return OperandARM32Mem::create(
- Func, Ty, BaseVar,
+ Func, Ty, BaseR,
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
}
@@ -4630,7 +4709,8 @@
if (RegNum == Variable::NoRegister) {
if (Variable *Subst = getContext().availabilityGet(From)) {
// At this point we know there is a potential substitution available.
- if (Subst->mustHaveReg() && !Subst->hasReg()) {
+ if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
+ !Subst->hasReg()) {
// At this point we know the substitution will have a register.
if (From->getType() == Subst->getType()) {
// At this point we know the substitution's register is compatible.
@@ -4788,6 +4868,13 @@
}
if (auto *Var = llvm::dyn_cast<Variable>(From)) {
+ if (Var->isRematerializable()) {
+ // TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
+ // for a Variable in a Mem operand.
+ Variable *T = makeReg(Var->getType(), RegNum);
+ _mov(T, Var);
+ return T;
+ }
// Check if the variable is guaranteed a physical register. This can happen
// either when the variable is pre-colored or when it is assigned infinite
// weight.
@@ -4844,9 +4931,9 @@
// If we didn't do address mode optimization, then we only have a
// base/offset to work with. ARM always requires a base register, so
// just use that to hold the operand.
- Variable *Base = legalizeToReg(Operand);
+ Variable *BaseR = legalizeToReg(Operand);
return OperandARM32Mem::create(
- Func, Ty, Base,
+ Func, Ty, BaseR,
llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
}
@@ -4863,6 +4950,7 @@
Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
// There aren't any 64-bit integer registers for ARM32.
assert(Type != IceType_i64);
+ assert(AllowTemporaryWithNoReg || RegNum != Variable::NoRegister);
Variable *Reg = Func->makeVariable(Type);
if (RegNum == Variable::NoRegister)
Reg->setMustHaveReg();
@@ -4871,7 +4959,8 @@
return Reg;
}
-void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
+void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
+ int32_t TmpRegNum) {
assert(llvm::isPowerOf2_32(Align));
uint32_t RotateAmt;
uint32_t Immed_8;
@@ -4880,10 +4969,12 @@
// it fits at all). Assume Align is usually small, in which case BIC works
// better. Thus, this rounds down to the alignment.
if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
- Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
+ Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
+ TmpRegNum);
_bic(Reg, Reg, Mask);
} else {
- Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
+ Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
+ TmpRegNum);
_and(Reg, Reg, Mask);
}
}
diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h
index cf38fe7..e8f2d1a 100644
--- a/src/IceTargetLoweringARM32.h
+++ b/src/IceTargetLoweringARM32.h
@@ -99,16 +99,15 @@
}
uint32_t getStackAlignment() const override;
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
- // TODO(sehr,jpp): Implement fixed stack layout.
- (void)Size;
- (void)Align;
- llvm::report_fatal_error("Not yet implemented");
+ FixedAllocaSizeBytes = Size;
+ assert(llvm::isPowerOf2_32(Align));
+ FixedAllocaAlignBytes = Align;
+ PrologEmitsFixedAllocas = true;
}
int32_t getFrameFixedAllocaOffset() const override {
- // TODO(sehr,jpp): Implement fixed stack layout.
- llvm::report_fatal_error("Not yet implemented");
- return 0;
+ return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
}
+ uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
@@ -250,7 +249,8 @@
Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
static Type stackSlotType();
Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
- void alignRegisterPow2(Variable *Reg, uint32_t Align);
+ void alignRegisterPow2(Variable *Reg, uint32_t Align,
+ int32_t TmpRegNum = Variable::NoRegister);
/// Returns a vector in a register with the given constant entries.
Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
@@ -811,7 +811,7 @@
}
// Iterates over the CFG and determines the maximum outgoing stack arguments
- // bytes. This information is later used during addProlog() do pre-allocate
+ // bytes. This information is later used during addProlog() to pre-allocate
// the outargs area.
// TODO(jpp): This could live in the Parser, if we provided a Target-specific
// method that the Parser could call.
@@ -852,6 +852,9 @@
bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true;
size_t SpillAreaSizeBytes = 0;
+ size_t FixedAllocaSizeBytes = 0;
+ size_t FixedAllocaAlignBytes = 0;
+ bool PrologEmitsFixedAllocas = false;
uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM];
@@ -970,6 +973,29 @@
};
BoolComputationTracker BoolComputations;
+
+ // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
+ // without specifying a physical register. This is needed for creating unbound
+ // temporaries during Ice -> ARM lowering, but before register allocation.
+ // This a safe-guard that, during the legalization post-passes no unbound
+ // temporaries are created.
+ bool AllowTemporaryWithNoReg = true;
+ // ForbidTemporaryWithoutReg is a RAII class that manages
+ // AllowTemporaryWithNoReg.
+ class ForbidTemporaryWithoutReg {
+ ForbidTemporaryWithoutReg() = delete;
+ ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg&) = delete;
+ ForbidTemporaryWithoutReg &operator=(const ForbidTemporaryWithoutReg&) = delete;
+
+ public:
+ explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
+ Target->AllowTemporaryWithNoReg = false;
+ }
+ ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
+
+ private:
+ TargetARM32 *const Target;
+ };
};
class TargetDataARM32 final : public TargetDataLowering {
diff --git a/tests_lit/assembler/arm32/bic.ll b/tests_lit/assembler/arm32/bic.ll
index 8d50c27..2849a74 100644
--- a/tests_lit/assembler/arm32/bic.ll
+++ b/tests_lit/assembler/arm32/bic.ll
@@ -28,70 +28,65 @@
; ASM-LABEL:AllocBigAlign:
; ASM-NEXT:.LAllocBigAlign$__0:
-; ASM-NEXT: push {fp}
-; ASM-NEXT: mov fp, sp
-; ASM-NEXT: sub sp, sp, #12
-; ASM-NEXT: bic sp, sp, #31
-; ASM-NEXT: sub sp, sp, #32
-; ASM-NEXT: mov r0, sp
-; ASM-NEXT: mov sp, fp
-; ASM-NEXT: pop {fp}
-; ASM-NEXT: # fp = def.pseudo
-; ASM-NEXT: bx lr
-
; DIS-LABEL:00000000 <AllocBigAlign>:
-; DIS-NEXT: 0: e52db004
-; DIS-NEXT: 4: e1a0b00d
-; DIS-NEXT: 8: e24dd00c
-; DIS-NEXT: c: e3cdd01f
-; DIS-NEXT: 10: e24dd020
-; DIS-NEXT: 14: e1a0000d
-; DIS-NEXT: 18: e1a0d00b
-; DIS-NEXT: 1c: e49db004
-; DIS-NEXT: 20: e12fff1e
-
; IASM-LABEL:AllocBigAlign:
; IASM-NEXT:.LAllocBigAlign$__0:
+
+; ASM-NEXT: push {fp}
+; DIS-NEXT: 0: e52db004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xe5
+; ASM-NEXT: mov fp, sp
+; DIS-NEXT: 4: e1a0b00d
; IASM: .byte 0xd
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
-; IASM: .byte 0xc
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0x4d
-; IASM-NEXT: .byte 0xe2
-
-; IASM: .byte 0x1f
-; IASM-NEXT: .byte 0xd0
-; IASM-NEXT: .byte 0xcd
-; IASM-NEXT: .byte 0xe3
-
+; ASM-NEXT: sub sp, sp, #32
+; DIS-NEXT: 8: e24dd020
; IASM: .byte 0x20
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
-; IASM: .byte 0xd
-; IASM-NEXT: .byte 0x0
-; IASM-NEXT: .byte 0xa0
-; IASM-NEXT: .byte 0xe1
+; ASM-NEXT: bic sp, sp, #31
+; DIS-NEXT: c: e3cdd01f
+; IASM: .byte 0x1f
+; IASM-NEXT: .byte 0xd0
+; IASM-NEXT: .byte 0xcd
+; IASM-NEXT: .byte 0xe3
+; ASM-NEXT: # sp = def.pseudo
+
+; ASM-NEXT: add r0, sp, #0
+; DIS-NEXT: 10: e28d0000
+; IASM: .byte 0x0
+; IASM-NEXT: .byte 0x0
+; IASM-NEXT: .byte 0x8d
+; IASM-NEXT: .byte 0xe2
+
+; ASM-NEXT: mov sp, fp
+; DIS-NEXT: 14: e1a0d00b
; IASM: .byte 0xb
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
+; ASM-NEXT: pop {fp}
+; DIS-NEXT: 18: e49db004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe4
+; ASM-NEXT: # fp = def.pseudo
+
+; ASM-NEXT: bx lr
+; DIS-NEXT: 1c: e12fff1e
; IASM: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
diff --git a/tests_lit/llvm2ice_tests/alloc.ll b/tests_lit/llvm2ice_tests/alloc.ll
index 7b08afb..c90b678 100644
--- a/tests_lit/llvm2ice_tests/alloc.ll
+++ b/tests_lit/llvm2ice_tests/alloc.ll
@@ -17,14 +17,14 @@
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command FileCheck --check-prefix ARM32 %s
+; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPT2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
-; RUN: --command FileCheck --check-prefix ARM32 %s
+; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
define internal void @fixed_416_align_16(i32 %n) {
entry:
@@ -47,8 +47,9 @@
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_16
-; ARM32: sub sp, sp, #416
-; ARM32: bl {{.*}} R_{{.*}} f1
+; ARM32-OPT2: sub sp, sp, #428
+; ARM32-OPTM1: sub sp, sp, #416
+; ARM32: bl {{.*}} R_{{.*}} f1
define internal void @fixed_416_align_32(i32 %n) {
entry:
@@ -67,9 +68,10 @@
; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_32
-; ARM32: bic sp, sp, #31
-; ARM32: sub sp, sp, #416
-; ARM32: bl {{.*}} R_{{.*}} f1
+; ARM32-OPT2: sub sp, sp, #424
+; ARM32-OPTM1: sub sp, sp, #416
+; ARM32: bic sp, sp, #31
+; ARM32: bl {{.*}} R_{{.*}} f1
; Show that the amount to allocate will be rounded up.
define internal void @fixed_351_align_16(i32 %n) {
@@ -91,8 +93,9 @@
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_16
-; ARM32: sub sp, sp, #352
-; ARM32: bl {{.*}} R_{{.*}} f1
+; ARM32-OPT2: sub sp, sp, #364
+; ARM32-OPTM1: sub sp, sp, #352
+; ARM32: bl {{.*}} R_{{.*}} f1
define internal void @fixed_351_align_32(i32 %n) {
entry:
@@ -111,9 +114,10 @@
; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_32
-; ARM32: bic sp, sp, #31
-; ARM32: sub sp, sp, #352
-; ARM32: bl {{.*}} R_{{.*}} f1
+; ARM32-OPT2: sub sp, sp, #360
+; ARM32-OPTM1: sub sp, sp, #352
+; ARM32: bic sp, sp, #31
+; ARM32: bl {{.*}} R_{{.*}} f1
declare void @f1(i32 %ignored)