It turns out most of the thumb2 instructions are not allowed to touch SP. The semantics of such instructions are unpredictable. We have just been lucky that tests have been passing.
This patch takes pain to ensure all the PEI lowering code does the right thing when lowering frame indices, insert code to manipulate stack pointers, etc. It's also custom lowering dynamic stack alloc into pseudo instructions so we can insert the right instructions at scheduling time.
This fixes PR4659 and PR4682.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78361 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9c5f3aa..911b84d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -688,7 +688,7 @@
unsigned OpNum = Ops[0];
unsigned Opc = MI->getOpcode();
MachineInstr *NewMI = NULL;
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
+ if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { // FIXME: tMOVgpr2gpr etc.?
// If it is updating CPSR, then it cannot be folded.
if (MI->getOperand(4).getReg() != ARM::CPSR || MI->getOperand(4).isDead()) {
unsigned Pred = MI->getOperand(2).getImm();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 97b9ad1..a72e9dd 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -1289,11 +1289,12 @@
AFI->getDPRCalleeSavedAreaOffset()||
hasFP(MF)) {
if (NumBytes) {
- unsigned SUBriOpc = isARM ? ARM::SUBri : ARM::t2SUBri;
- BuildMI(MBB, MBBI, dl, TII.get(SUBriOpc), ARM::SP)
- .addReg(FramePtr)
- .addImm(NumBytes)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
} else {
// Thumb2 or ARM.
unsigned MOVrOpc = isARM ? ARM::MOVr : ARM::t2MOVr;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index d996b24..04f4dd3 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -122,6 +122,8 @@
SDNode *SelectARMIndexedLoad(SDValue Op);
SDNode *SelectT2IndexedLoad(SDValue Op);
+ /// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
+ SDNode *SelectDYN_ALLOC(SDValue Op);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@@ -868,6 +870,59 @@
return NULL;
}
+SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) {
+ SDNode *N = Op.getNode();
+ DebugLoc dl = N->getDebugLoc();
+ MVT VT = Op.getValueType();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
+ SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32);
+ int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue();
+ if (AlignVal < 0)
+ // We need to align the stack. Use Thumb1 tAND which is the only thumb
+ // instruction that can read and write SP. This matches to a pseudo
+ // instruction that has a chain to ensure the result is written back to
+ // the stack pointer.
+ SP = SDValue(CurDAG->getTargetNode(ARM::tANDsp, dl, VT, SP, Align), 0);
+
+ bool isC = isa<ConstantSDNode>(Size);
+ uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL;
+ // Handle the most common case for both Thumb1 and Thumb2:
+ // tSUBspi - immediate is between 0 ... 508 inclusive.
+ if (C <= 508 && ((C & 3) == 0))
+ // FIXME: tSUBspi encode scale 4 implicitly.
+ return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP,
+ CurDAG->getTargetConstant(C/4, MVT::i32),
+ Chain);
+
+ if (Subtarget->isThumb1Only()) {
+ // Use tADDrSPr since Thumb1 does not have a sub r, sp, r. ARMISelLowering
+ // should have negated the size operand already. FIXME: We can't insert
+ // new target independent node at this stage so we are forced to negate
+ // it earlier. Is there a better solution?
+ return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size,
+ Chain);
+ } else if (Subtarget->isThumb2()) {
+ if (isC && Predicate_t2_so_imm(Size.getNode())) {
+ // t2SUBrSPi
+ SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
+ return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3);
+ } else if (isC && Predicate_imm0_4095(Size.getNode())) {
+ // t2SUBrSPi12
+ SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain };
+ return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3);
+ } else {
+ // t2SUBrSPs
+ SDValue Ops[] = { SP, Size,
+ getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain };
+ return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4);
+ }
+ }
+
+ // FIXME: Add ADD / SUB sp instructions for ARM.
+ return 0;
+}
SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
SDNode *N = Op.getNode();
@@ -941,25 +996,8 @@
return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
}
}
- case ISD::ADD: {
- if (!Subtarget->isThumb1Only())
- break;
- // Select add sp, c to tADDhirr.
- SDValue N0 = Op.getOperand(0);
- SDValue N1 = Op.getOperand(1);
- RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(Op.getOperand(0));
- RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(Op.getOperand(1));
- if (LHSR && LHSR->getReg() == ARM::SP) {
- std::swap(N0, N1);
- std::swap(LHSR, RHSR);
- }
- if (RHSR && RHSR->getReg() == ARM::SP) {
- SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVtgpr2gpr, dl,
- Op.getValueType(), N0, N0),0);
- return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1);
- }
- break;
- }
+ case ARMISD::DYN_ALLOC:
+ return SelectDYN_ALLOC(Op);
case ISD::MUL:
if (Subtarget->isThumb1Only())
break;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0bfe213..4922f7d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -307,7 +307,10 @@
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ if (Subtarget->isThumb())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
@@ -435,6 +438,8 @@
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+ case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
@@ -1399,6 +1404,53 @@
}
SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+ SDNode *Node = Op.getNode();
+ DebugLoc dl = Node->getDebugLoc();
+ MVT VT = Node->getValueType(0);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+ unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
+ unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
+ if (AlignVal > StackAlign)
+ // Do this now since selection pass cannot introduce new target
+ // independent node.
+ Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
+
+ // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
+ // using a "add r, sp, r" instead. Negate the size now so we don't have to
+ // do even more horrible hack later.
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (AFI->isThumb1OnlyFunction()) {
+ bool Negate = true;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
+ if (C) {
+ uint32_t Val = C->getZExtValue();
+ if (Val <= 508 && ((Val & 3) == 0))
+ Negate = false;
+ }
+ if (Negate)
+ Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
+ }
+
+ SDVTList VTList = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops1[] = { Chain, Size, Align };
+ SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
+ Chain = Res.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true), SDValue());
+ SDValue Ops2[] = { Res, Chain };
+ return DAG.getMergeValues(Ops2, 2, dl);
+}
+
+SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
DebugLoc dl) {
@@ -2396,6 +2448,7 @@
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget);
case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
@@ -2454,7 +2507,8 @@
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
switch (MI->getOpcode()) {
- default: assert(false && "Unexpected instr type to insert");
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
case ARM::tMOVCCr: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -2509,6 +2563,78 @@
F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
return BB;
}
+
+ case ARM::tANDsp:
+ case ARM::tADDspr_:
+ case ARM::tSUBspi_:
+ case ARM::t2SUBrSPi_:
+ case ARM::t2SUBrSPi12_:
+ case ARM::t2SUBrSPs_: {
+ MachineFunction *MF = BB->getParent();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool DstIsDead = MI->getOperand(0).isDead();
+ bool SrcIsKill = MI->getOperand(1).isKill();
+
+ if (SrcReg != ARM::SP) {
+ // Copy the source to SP from virtual register.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
+ unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
+ ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
+ BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+ }
+
+ unsigned OpOpc = 0;
+ bool NeedPred = false, NeedCC = false, NeedOp3 = false;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected pseudo instruction!");
+ case ARM::tANDsp:
+ OpOpc = ARM::tAND;
+ NeedPred = true;
+ break;
+ case ARM::tADDspr_:
+ OpOpc = ARM::tADDspr;
+ break;
+ case ARM::tSUBspi_:
+ OpOpc = ARM::tSUBspi;
+ break;
+ case ARM::t2SUBrSPi_:
+ OpOpc = ARM::t2SUBrSPi;
+ NeedPred = true; NeedCC = true;
+ break;
+ case ARM::t2SUBrSPi12_:
+ OpOpc = ARM::t2SUBrSPi12;
+ NeedPred = true;
+ break;
+ case ARM::t2SUBrSPs_:
+ OpOpc = ARM::t2SUBrSPs;
+ NeedPred = true; NeedCC = true; NeedOp3 = true;
+ break;
+ }
+ MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
+ if (OpOpc == ARM::tAND)
+ AddDefaultT1CC(MIB);
+ MIB.addReg(ARM::SP);
+ MIB.addOperand(MI->getOperand(2));
+ if (NeedOp3)
+ MIB.addOperand(MI->getOperand(3));
+ if (NeedPred)
+ AddDefaultPred(MIB);
+ if (NeedCC)
+ AddDefaultCC(MIB);
+
+ // Copy the result from SP to virtual register.
+ const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
+ unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
+ ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
+ BuildMI(BB, dl, TII->get(CopyOpc))
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(ARM::SP);
+ MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return BB;
+ }
}
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 4fe4d8b..4649c18 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -65,11 +65,13 @@
FMRRD, // double to two gprs.
FMDRR, // Two gprs to double.
- EH_SJLJ_SETJMP, // SjLj exception handling setjmp
- EH_SJLJ_LONGJMP, // SjLj exception handling longjmp
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
THREAD_POINTER,
+ DYN_ALLOC, // Dynamic allocation on the stack.
+
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@@ -255,6 +257,7 @@
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 5dfc4fc..9b54e67 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -138,18 +138,37 @@
"add $dst, pc, $rhs * 4", []>;
// ADD rd, sp, #imm8
-// FIXME: hard code sp?
def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALU,
"add $dst, $sp, $rhs * 4 @ addrspi", []>;
// ADD sp, sp, #imm7
-// FIXME: hard code sp?
-def tADDspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
"add $dst, $rhs * 4", []>;
-// FIXME: Make use of the following?
+// SUB sp, sp, #imm7
+def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
+ "sub $dst, $rhs * 4", []>;
+
// ADD rm, sp, rm
+def tADDrSPr : TI<(outs GPR:$dst), (ins GPR:$sp, GPR:$rhs), IIC_iALU,
+ "add $dst, $sp, $rhs", []>;
+
// ADD sp, rm
+def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALU,
+ "add $dst, $rhs", []>;
+
+// Pseudo instruction that will expand into a tSUBspi + a copy.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+ NoItinerary, "@ sub $dst, $rhs * 4", []>;
+
+def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+ NoItinerary, "@ add $dst, $rhs", []>;
+
+let Defs = [CPSR] in
+def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+ NoItinerary, "@ and $dst, $rhs", []>;
+} // usesCustomDAGSchedInserter
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -549,9 +568,6 @@
// TODO: A7-96: STMIA - store multiple.
-def tSUBspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
- "sub $dst, $rhs * 4", []>;
-
// sign-extend byte
def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALU,
"sxtb", " $dst, $src",
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 9305c8a..11b0454 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -75,7 +75,8 @@
}]>;
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
-def imm0_4095 : PatLeaf<(i32 imm), [{
+def imm0_4095 : Operand<i32>,
+ PatLeaf<(i32 imm), [{
return (uint32_t)N->getZExtValue() < 4096;
}]>;
@@ -239,7 +240,7 @@
opc, ".w $dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
// 12-bit imm
- def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU,
+ def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALU,
!strconcat(opc, "w"), " $dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
// register
@@ -431,6 +432,39 @@
(ins i32imm:$label, i32imm:$id, pred:$p), IIC_iALU,
"adr$p.w $dst, #${label}_${id:no_hash}", []>;
+
+// ADD r, sp, {so_imm|i12}
+def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU,
+ "add", ".w $dst, $sp, $imm", []>;
+def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU,
+ "addw", " $dst, $sp, $imm", []>;
+
+// ADD r, sp, so_reg
+def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU,
+ "add", ".w $dst, $sp, $rhs", []>;
+
+// SUB r, sp, {so_imm|i12}
+def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU,
+ "sub", ".w $dst, $sp, $imm", []>;
+def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU,
+ "subw", " $dst, $sp, $imm", []>;
+
+// SUB r, sp, so_reg
+def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU,
+ "sub", " $dst, $sp, $rhs", []>;
+
+
+// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
+ NoItinerary, "@ sub.w $dst, $sp, $imm", []>;
+def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
+ NoItinerary, "@ subw $dst, $sp, $imm", []>;
+def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
+ NoItinerary, "@ sub $dst, $sp, $rhs", []>;
+} // usesCustomDAGSchedInserter
+
+
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a81b790..ea80e47 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -206,9 +206,13 @@
if (NewBase == 0)
return false;
}
- int BaseOpc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+ int BaseOpc = !isThumb2
+ ? ARM::ADDri
+ : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
if (Offset < 0) {
- BaseOpc = isThumb2 ? ARM::t2SUBri : ARM::SUBri;
+ BaseOpc = !isThumb2
+ ? ARM::SUBri
+ : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
Offset = - Offset;
}
int ImmedOffset = isThumb2
@@ -329,6 +333,9 @@
if (!MI)
return false;
if (MI->getOpcode() != ARM::t2SUBri &&
+ MI->getOpcode() != ARM::t2SUBrSPi &&
+ MI->getOpcode() != ARM::t2SUBrSPi12 &&
+ MI->getOpcode() != ARM::tSUBspi &&
MI->getOpcode() != ARM::SUBri)
return false;
@@ -336,9 +343,10 @@
if (Bytes <= 0 || (Limit && Bytes >= Limit))
return false;
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
return (MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- MI->getOperand(2).getImm() == Bytes &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg);
}
@@ -350,6 +358,9 @@
if (!MI)
return false;
if (MI->getOpcode() != ARM::t2ADDri &&
+ MI->getOpcode() != ARM::t2ADDrSPi &&
+ MI->getOpcode() != ARM::t2ADDrSPi12 &&
+ MI->getOpcode() != ARM::tADDspi &&
MI->getOpcode() != ARM::ADDri)
return false;
@@ -357,9 +368,10 @@
// Make sure the offset fits in 8 bits.
return false;
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
return (MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- MI->getOperand(2).getImm() == Bytes &&
+ (MI->getOperand(2).getImm()*Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg);
}
diff --git a/lib/Target/ARM/README-Thumb2.txt b/lib/Target/ARM/README-Thumb2.txt
index 651d393..48b5278 100644
--- a/lib/Target/ARM/README-Thumb2.txt
+++ b/lib/Target/ARM/README-Thumb2.txt
@@ -1,3 +1,7 @@
//===---------------------------------------------------------------------===//
// Random ideas for the ARM backend (Thumb2 specific).
//===---------------------------------------------------------------------===//
+
+We should be using ADD / SUB rd, sp, rm <shift> instructions.
+
+copyRegToReg should use tMOVgpr2gpr instead of t2MOVr?
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index cf2d099..2b329e0 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -65,11 +65,15 @@
if (DestRC == ARM::GPRRegisterClass &&
SrcRC == ARM::GPRRegisterClass) {
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr),
- DestReg).addReg(SrcReg)));
+ // FIXME: Just use tMOVgpr2gpr since it's shorter?
+ if (SrcReg == ARM::SP || DestReg == ARM::SP)
+ BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
+ else
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr),
+ DestReg).addReg(SrcReg)));
return true;
} else if (DestRC == ARM::GPRRegisterClass &&
- SrcRC == ARM::tGPRRegisterClass) {
+ SrcRC == ARM::tGPRRegisterClass) {
BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
return true;
} else if (DestRC == ARM::tGPRRegisterClass &&
@@ -162,26 +166,62 @@
}
while (NumBytes) {
- unsigned Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
unsigned ThisVal = NumBytes;
- if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
- NumBytes = 0;
- } else if (ThisVal < 4096) {
- Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
- NumBytes = 0;
+ unsigned Opc = 0;
+ if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+ // mov sp, rn. Note t2MOVr cannot be used.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg);
+ BaseReg = ARM::SP;
+ continue;
+ }
+
+ if (BaseReg == ARM::SP) {
+ // sub sp, sp, #imm7
+ if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+ assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+ Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+ // FIXME: Fix Thumb1 immediate encoding.
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg).addImm(ThisVal/4);
+ NumBytes = 0;
+ continue;
+ }
+
+ // sub rd, sp, so_imm
+ Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
} else {
- // FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = CountLeadingZeros_32(ThisVal);
- ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
- NumBytes &= ~ThisVal;
- assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
- "Bit extraction didn't work?");
+ assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+ Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+ if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+ NumBytes = 0;
+ } else if (ThisVal < 4096) {
+ Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+ NumBytes = 0;
+ } else {
+ // FIXME: Move this to ARMAddressingModes.h?
+ unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+ ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ NumBytes &= ~ThisVal;
+ assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+ "Bit extraction didn't work?");
+ }
}
// Build the new ADD / SUB.
- BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)));
+
BaseReg = DestReg;
}
}
@@ -288,7 +328,6 @@
unsigned FrameReg, int Offset,
const ARMBaseInstrInfo &TII) {
unsigned Opcode = MI.getOpcode();
- unsigned NewOpc = Opcode;
const TargetInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
bool isSub = false;
@@ -299,9 +338,12 @@
if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+ bool isSP = FrameReg == ARM::SP;
if (Offset == 0) {
// Turn it into a move.
- MI.setDesc(TII.get(ARM::t2MOVr));
+ unsigned NewOpc = isSP ? ARM::tMOVgpr2gpr : ARM::t2MOVr;
+ MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.RemoveOperand(FrameRegIdx+1);
return 0;
@@ -310,23 +352,23 @@
if (Offset < 0) {
Offset = -Offset;
isSub = true;
- MI.setDesc(TII.get(ARM::t2SUBri));
+ MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri));
+ } else {
+ MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri));
}
// Common case: small offset, fits into instruction.
if (ARM_AM::getT2SOImmVal(Offset) != -1) {
- NewOpc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
- if (NewOpc != Opcode)
- MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
return 0;
}
// Another common case: imm12.
if (Offset < 4096) {
- NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
- if (NewOpc != Opcode)
- MI.setDesc(TII.get(NewOpc));
+ unsigned NewOpc = isSP
+ ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12)
+ : (isSub ? ARM::t2SUBri12 : ARM::t2ADDri12);
+ MI.setDesc(TII.get(NewOpc));
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
return 0;
@@ -346,7 +388,7 @@
} else {
// AddrModeT2_so cannot handle any offset. If there is no offset
// register then we change to an immediate version.
- NewOpc = Opcode;
+ unsigned NewOpc = Opcode;
if (AddrMode == ARMII::AddrModeT2_so) {
unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
if (OffsetReg != 0) {