Correctly handle the Thumb-2 imm8 addrmode. Specialize frame index elimination more exactly for Thumb-2 to get better code gen.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@76919 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 72f40a0..6dbd9e5 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -513,7 +513,7 @@
 ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                       int &FrameIndex) const {
   unsigned oc = MI->getOpcode();
-  if (oc == getOpcode(ARMII::LDR)) {
+  if (oc == getOpcode(ARMII::LDRrr)) {
     if (MI->getOperand(1).isFI() &&
         MI->getOperand(2).isReg() &&
         MI->getOperand(3).isImm() &&
@@ -523,6 +523,14 @@
       return MI->getOperand(0).getReg();
     }
   }
+  else if (oc == getOpcode(ARMII::LDRri)) {
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
   else if ((oc == getOpcode(ARMII::FLDD)) ||
            (oc == getOpcode(ARMII::FLDS))) {
     if (MI->getOperand(1).isFI() &&
@@ -540,7 +548,7 @@
 ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
                                      int &FrameIndex) const {
   unsigned oc = MI->getOpcode();
-  if (oc == getOpcode(ARMII::STR)) {
+  if (oc == getOpcode(ARMII::STRrr)) {
     if (MI->getOperand(1).isFI() &&
         MI->getOperand(2).isReg() &&
         MI->getOperand(3).isImm() &&
@@ -550,6 +558,14 @@
       return MI->getOperand(0).getReg();
     }
   }
+  else if (oc == getOpcode(ARMII::STRri)) {
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
   else if ((oc == getOpcode(ARMII::FSTD)) ||
            (oc == getOpcode(ARMII::FSTS))) {
     if (MI->getOperand(1).isFI() &&
@@ -602,7 +618,7 @@
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(getOpcode(ARMII::STR)))
+    AddDefaultPred(BuildMI(MBB, I, DL, get(getOpcode(ARMII::STRrr)))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addReg(0).addImm(0));
   } else if (RC == ARM::DPRRegisterClass) {
@@ -626,7 +642,10 @@
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
   if (RC == ARM::GPRRegisterClass) {
-    Opc = getOpcode(ARMII::STR);
+    if ((Addr.size() > 1) && Addr[1].isImm())
+      Opc = getOpcode(ARMII::STRri);
+    else
+      Opc = getOpcode(ARMII::STRrr);
   } else if (RC == ARM::DPRRegisterClass) {
     Opc = getOpcode(ARMII::FSTD);
   } else {
@@ -651,7 +670,7 @@
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(getOpcode(ARMII::LDR)), DestReg)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(getOpcode(ARMII::LDRrr)), DestReg)
                    .addFrameIndex(FI).addReg(0).addImm(0));
   } else if (RC == ARM::DPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(getOpcode(ARMII::FLDD)), DestReg)
@@ -671,7 +690,10 @@
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
   if (RC == ARM::GPRRegisterClass) {
-    Opc = getOpcode(ARMII::LDR);
+    if ((Addr.size() > 1) && Addr[1].isImm())
+      Opc = getOpcode(ARMII::LDRri);
+    else
+      Opc = getOpcode(ARMII::LDRrr);
   } else if (RC == ARM::DPRRegisterClass) {
       Opc = getOpcode(ARMII::FLDD);
   } else {
@@ -704,14 +726,14 @@
         unsigned SrcReg = MI->getOperand(1).getReg();
         bool isKill = MI->getOperand(1).isKill();
         bool isUndef = MI->getOperand(1).isUndef();
-        NewMI = BuildMI(MF, MI->getDebugLoc(), get(getOpcode(ARMII::STR)))
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(getOpcode(ARMII::STRrr)))
           .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
           .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
       } else {          // move -> load
         unsigned DstReg = MI->getOperand(0).getReg();
         bool isDead = MI->getOperand(0).isDead();
         bool isUndef = MI->getOperand(0).isUndef();
-        NewMI = BuildMI(MF, MI->getDebugLoc(), get(getOpcode(ARMII::LDR)))
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(getOpcode(ARMII::LDRrr)))
           .addReg(DstReg,
                   RegState::Define |
                   getDeadRegState(isDead) |
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index be952d8..6445cc1 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -177,9 +177,11 @@
     FLDS,
     FSTD,
     FSTS,
-    LDR,
+    LDRrr,
+    LDRri,
     MOVr,
-    STR,
+    STRrr,
+    STRri,
     SUBri,
     SUBrs,
     SUBrr,
@@ -215,13 +217,6 @@
   // Return the opcode that implements 'Op', or 0 if no opcode
   virtual unsigned getOpcode(ARMII::Op Op) const =0;
 
-  // If 'opcode' is an instruction with an unsigned offset that also
-  // has a version with a signed offset, return the opcode for the
-  // version with the signed offset. In 'NumBits' return the number of
-  // bits for the signed offset.
-  virtual unsigned unsignedOffsetOpcodeToSigned(unsigned opcode,
-                                                unsigned *NumBits) const = 0;
-
   // Return true if the block does not fall through.
   virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const =0;
 
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index ec5e89f..5591de1 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -142,11 +142,6 @@
   return TII.getOpcode((ARMII::Op)Op);
 }
 
-unsigned ARMBaseRegisterInfo::
-unsignedOffsetOpcodeToSigned(unsigned opcode, unsigned *NumBits) const {
-  return TII.unsignedOffsetOpcodeToSigned(opcode, NumBits);
-}
-
 const unsigned*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   static const unsigned CalleeSavedRegs[] = {
@@ -1031,9 +1026,126 @@
   return Reg;
 }
 
+int ARMBaseRegisterInfo::
+rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                  unsigned FrameReg, int Offset) const 
+{
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+  
+  // Memory operands in inline assembly always use AddrMode2.
+  if (Opcode == ARM::INLINEASM)
+    AddrMode = ARMII::AddrMode2;
+  
+  if (Opcode == getOpcode(ARMII::ADDri)) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(getOpcode(ARMII::MOVr)));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(FrameRegIdx+1);
+      return 0;
+    } else if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setDesc(TII.get(getOpcode(ARMII::SUBri)));
+    }
+
+    // Common case: small offset, fits into instruction.
+    if (ARM_AM::getSOImmVal(Offset) != -1) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      return 0;
+    }
+
+    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
+    // as possible.
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+
+    // Get the properly encoded SOImmVal field.
+    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
+           "Bit extraction didn't work?");
+    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrMode2: {
+      ImmIdx = FrameRegIdx+2;
+      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 12;
+      break;
+    }
+    case ARMII::AddrMode3: {
+      ImmIdx = FrameRegIdx+2;
+      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      break;
+    }
+    case ARMII::AddrMode5: {
+      ImmIdx = FrameRegIdx+1;
+      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      Scale = 4;
+      break;
+    }
+    default:
+      llvm_unreachable("Unsupported addressing mode!");
+      break;
+    }
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+    if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+    }
+
+    // Attempt to fold address comp. if opcode has offset bits
+    if (NumBits > 0) {
+      // Common case: small offset, fits into instruction.
+      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+      int ImmedOffset = Offset / Scale;
+      unsigned Mask = (1 << NumBits) - 1;
+      if ((unsigned)Offset <= Mask * Scale) {
+        // Replace the FrameIndex with sp
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        if (isSub)
+          ImmedOffset |= 1 << NumBits;
+        ImmOp.ChangeToImmediate(ImmedOffset);
+        return 0;
+      }
+      
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      if (isSub)
+        ImmedOffset |= 1 << NumBits;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask*Scale);
+    }
+  }
+
+  return (isSub) ? -Offset : Offset;
+}
+
 void ARMBaseRegisterInfo::
 eliminateFrameIndex(MachineBasicBlock::iterator II,
-                    int SPAdj, RegScavenger *RS) const{
+                    int SPAdj, RegScavenger *RS) const {
   unsigned i = 0;
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
@@ -1065,162 +1177,10 @@
     Offset -= AFI->getFramePtrSpillOffset();
   }
 
-  unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
-  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
-  bool isSub = false;
-
-  // Memory operands in inline assembly always use AddrMode2.
-  if (Opcode == ARM::INLINEASM)
-    AddrMode = ARMII::AddrMode2;
-
-  if (Opcode == getOpcode(ARMII::ADDri)) {
-    Offset += MI.getOperand(i+1).getImm();
-    if (Offset == 0) {
-      // Turn it into a move.
-      MI.setDesc(TII.get(getOpcode(ARMII::MOVr)));
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.RemoveOperand(i+1);
-      return;
-    } else if (Offset < 0) {
-      Offset = -Offset;
-      isSub = true;
-      MI.setDesc(TII.get(getOpcode(ARMII::SUBri)));
-    }
-
-    // Common case: small offset, fits into instruction.
-    if (ARM_AM::getSOImmVal(Offset) != -1) {
-      // Replace the FrameIndex with sp / fp
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Offset);
-      return;
-    }
-
-    // Otherwise, we fallback to common code below to form the imm offset with
-    // a sequence of ADDri instructions.  First though, pull as much of the imm
-    // into this ADDri as possible.
-    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
-    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
-
-    // We will handle these bits from offset, clear them.
-    Offset &= ~ThisImmVal;
-
-    // Get the properly encoded SOImmVal field.
-    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
-           "Bit extraction didn't work?");
-    MI.getOperand(i+1).ChangeToImmediate(ThisImmVal);
-  } else {
-    unsigned ImmIdx = 0;
-    int InstrOffs = 0;
-    unsigned NumBits = 0;
-    unsigned Scale = 1;
-    bool encodedOffset = true;
-    bool HandlesNeg = true;
-    switch (AddrMode) {
-    case ARMII::AddrMode2: {
-      ImmIdx = i+2;
-      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
-      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
-        InstrOffs *= -1;
-      NumBits = 12;
-      break;
-    }
-    case ARMII::AddrMode3: {
-      ImmIdx = i+2;
-      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
-      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
-        InstrOffs *= -1;
-      NumBits = 8;
-      break;
-    }
-    case ARMII::AddrMode5: {
-      ImmIdx = i+1;
-      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
-      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
-        InstrOffs *= -1;
-      NumBits = 8;
-      Scale = 4;
-      break;
-    }
-    case ARMII::AddrModeT2_i12: {
-      ImmIdx = i+1;
-      InstrOffs = MI.getOperand(ImmIdx).getImm();
-      NumBits = 12;
-      encodedOffset = false;
-      HandlesNeg = false;
-      break;
-    }
-    case ARMII::AddrModeT2_i8: {
-      ImmIdx = i+1;
-      InstrOffs = MI.getOperand(ImmIdx).getImm();
-      NumBits = 8;
-      encodedOffset = false;
-      break;
-    }
-    case ARMII::AddrModeT2_so: {
-      ImmIdx = i+2;
-      InstrOffs = MI.getOperand(ImmIdx).getImm();
-      encodedOffset = false;
-      break;
-    }
-    default:
-      llvm_unreachable("Unsupported addressing mode!");
-      break;
-    }
-
-    Offset += InstrOffs * Scale;
-    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
-    if (Offset < 0) {
-      // For addrmodes that cannot handle negative offsets, convert to
-      // an opcode that can, or set NumBits == 0 to avoid folding
-      // address computation
-      if (!HandlesNeg) {
-        unsigned usop = unsignedOffsetOpcodeToSigned(Opcode, &NumBits);
-        if (usop != 0) {
-          MI.setDesc(TII.get(usop));
-          HandlesNeg = true;
-          Opcode = usop;
-        }
-        else {
-          NumBits = 0;
-        }
-      }
-
-      Offset = -Offset;
-      isSub = true;
-    }
-
-    // Attempt to fold address comp. if opcode has offset bits
-    if (NumBits > 0) {
-      // Common case: small offset, fits into instruction.
-      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
-      int ImmedOffset = Offset / Scale;
-      unsigned Mask = (1 << NumBits) - 1;
-      if ((unsigned)Offset <= Mask * Scale) {
-        // Replace the FrameIndex with sp
-        MI.getOperand(i).ChangeToRegister(FrameReg, false);
-        if (isSub) {
-          if (encodedOffset)
-            ImmedOffset |= 1 << NumBits;
-          else
-            ImmedOffset = -ImmedOffset;
-        }
-        ImmOp.ChangeToImmediate(ImmedOffset);
-        return;
-      }
-      
-      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
-      ImmedOffset = ImmedOffset & Mask;
-      if (isSub) {
-          if (encodedOffset)
-            ImmedOffset |= 1 << NumBits;
-          else
-            ImmedOffset = -ImmedOffset;
-      }
-      ImmOp.ChangeToImmediate(ImmedOffset);
-      Offset &= ~(Mask*Scale);
-    }
-  }
+  // modify MI as necessary to handle as much of 'Offset' as possible
+  Offset = rewriteFrameIndex(MI, i, FrameReg, Offset);
+  if (Offset == 0)
+    return;
 
   // If we get here, the immediate doesn't fit into the instruction.  We folded
   // as much as possible above, handle the rest, providing a register that is
@@ -1240,7 +1200,7 @@
     ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
   unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
   emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
-                          isSub ? -Offset : Offset, Pred, PredReg, TII, dl);
+                          Offset, Pred, PredReg, TII, dl);
   MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
 }
 
@@ -1249,10 +1209,11 @@
 /// 3: fp area, 0: don't care).
 static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator &MBBI,
-                                   int Opc, unsigned Area,
+                                   int Opc1, int Opc2, unsigned Area,
                                    const ARMSubtarget &STI) {
   while (MBBI != MBB.end() &&
-         MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFI()) {
+         ((MBBI->getOpcode() == Opc1) || (MBBI->getOpcode() == Opc2)) &&
+         MBBI->getOperand(1).isFI()) {
     if (Area != 0) {
       bool Done = false;
       unsigned Category = 0;
@@ -1342,7 +1303,8 @@
 
   // Build the new SUBri to adjust SP for integer callee-save spill area 1.
   emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS1Size);
-  movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::STR), 1, STI);
+  movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::STRrr),
+                         getOpcode(ARMII::STRri), 1, STI);
 
   // Darwin ABI requires FP to point to the stack slot that contains the
   // previous FP.
@@ -1357,7 +1319,8 @@
   emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS2Size);
 
   // Build the new SUBri to adjust SP for FP callee-save spill area.
-  movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::STR), 2, STI);
+  movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::STRrr),
+                         getOpcode(ARMII::STRri), 2, STI);
   emitSPUpdate(MBB, MBBI, TII, dl, -DPRCSSize);
 
   // Determine starting offsets of spill areas.
@@ -1372,7 +1335,7 @@
   NumBytes = DPRCSOffset;
   if (NumBytes) {
     // Insert it after all the callee-save spills.
-    movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::FSTD), 3, STI);
+    movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::FSTD), 0, 3, STI);
     emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes);
   }
 
@@ -1397,7 +1360,8 @@
                         const ARMBaseInstrInfo &TII, 
                         const unsigned *CSRegs) {
   return ((MI->getOpcode() == (int)TII.getOpcode(ARMII::FLDD) ||
-           MI->getOpcode() == (int)TII.getOpcode(ARMII::LDR)) &&
+           MI->getOpcode() == (int)TII.getOpcode(ARMII::LDRrr) ||
+           MI->getOpcode() == (int)TII.getOpcode(ARMII::LDRri)) &&
           MI->getOperand(1).isFI() &&
           isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
 }
@@ -1458,15 +1422,17 @@
     }
 
     // Move SP to start of integer callee save spill area 2.
-    movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::FLDD), 3, STI);
+    movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::FLDD), 0, 3, STI);
     emitSPUpdate(MBB, MBBI, TII, dl, AFI->getDPRCalleeSavedAreaSize());
 
     // Move SP to start of integer callee save spill area 1.
-    movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::LDR), 2, STI);
+    movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::LDRrr),
+                           getOpcode(ARMII::LDRri), 2, STI);
     emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea2Size());
 
     // Move SP to SP upon entry to the function.
-    movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::LDR), 1, STI);
+    movePastCSLoadStoreOps(MBB, MBBI, getOpcode(ARMII::LDRrr),
+                           getOpcode(ARMII::LDRri), 1, STI);
     emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea1Size());
   }
 
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index ac5e6b6..b725ee6 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -59,13 +59,6 @@
   // Return the opcode that implements 'Op', or 0 if no opcode
   unsigned getOpcode(int Op) const;
 
-  // If 'opcode' is an instruction with an unsigned offset that also
-  // has a version with a signed offset, return the opcode for the
-  // version with the signed offset. In 'NumBits' return the number of
-  // bits for the signed offset.
-  unsigned unsignedOffsetOpcodeToSigned(unsigned opcode,
-                                        unsigned *NumBits) const;
-
 public:
   /// getRegisterNumbering - Given the enum value for some register, e.g.
   /// ARM::LR, return the number that it corresponds to (e.g. 14). It
@@ -133,6 +126,10 @@
                                              MachineBasicBlock &MBB,
                                              MachineBasicBlock::iterator I) const;
 
+  // rewrite MI to access 'Offset' bytes from the FP. Return the offset that
+  // could not be handled directly in MI.
+  virtual int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                                  unsigned FrameReg, int Offset) const;
   virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
                                    int SPAdj, RegScavenger *RS = NULL) const;
 
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1e329ce..cdb27c2 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -628,7 +628,7 @@
       if (N.getOpcode() == ISD::SUB)
         RHSC = -RHSC;
 
-      if ((RHSC >= -255) && (RHSC <= 255)) { // sign + 8 bits.
+      if ((RHSC >= -255) && (RHSC <= 0)) { // 8 bits (always negative)
         Base   = N.getOperand(0);
         OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
         return true;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 45b77c8..ff4474f 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -30,11 +30,6 @@
 }
 
 unsigned ARMInstrInfo::
-unsignedOffsetOpcodeToSigned(unsigned opcode, unsigned *NumBits) const {
-  return 0;
-}
-
-unsigned ARMInstrInfo::
 getUnindexedOpcode(unsigned Opc) const {
   switch (Opc) {
   default: break;
@@ -85,9 +80,11 @@
   case ARMII::FLDS: return ARM::FLDS;
   case ARMII::FSTD: return ARM::FSTD;
   case ARMII::FSTS: return ARM::FSTS;
-  case ARMII::LDR: return ARM::LDR;
+  case ARMII::LDRrr: return ARM::LDR;
+  case ARMII::LDRri: return 0;
   case ARMII::MOVr: return ARM::MOVr;
-  case ARMII::STR: return ARM::STR;
+  case ARMII::STRrr: return ARM::STR;
+  case ARMII::STRri: return 0;
   case ARMII::SUBri: return ARM::SUBri;
   case ARMII::SUBrs: return ARM::SUBrs;
   case ARMII::SUBrr: return ARM::SUBrr;
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 8ff0912..3e9f020 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -35,13 +35,6 @@
   // Return the opcode that implements 'Op', or 0 if no opcode
   unsigned getOpcode(ARMII::Op Op) const;
 
-  // If 'opcode' is an instruction with an unsigned offset that also
-  // has a version with a signed offset, return the opcode for the
-  // version with the signed offset. In 'NumBits' return the number of
-  // bits for the signed offset.
-  unsigned unsignedOffsetOpcodeToSigned(unsigned opcode,
-                                        unsigned *NumBits) const;
-
   // Return true if the block does not fall through.
   bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index bd76fd1..8d241c8 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -109,7 +109,7 @@
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
 
-// t2addrmode_imm8  := reg +/- imm8
+// t2addrmode_imm8  := reg - imm8
 def t2addrmode_imm8 : Operand<i32>,
                       ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let PrintMethod = "printT2AddrModeImm8Operand";
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 7bec736..1593c87 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -30,12 +30,6 @@
   return 0;
 }
 
-unsigned
-Thumb1InstrInfo::unsignedOffsetOpcodeToSigned(unsigned opcode,
-                                              unsigned *NumBits) const {
-  return 0;
-}
-
 unsigned Thumb1InstrInfo::getOpcode(ARMII::Op Op) const {
   switch (Op) {
   case ARMII::ADDri: return ARM::tADDi8;
@@ -53,9 +47,11 @@
   case ARMII::FLDS: return 0;
   case ARMII::FSTD: return 0;
   case ARMII::FSTS: return 0;
-  case ARMII::LDR: return ARM::tLDR;
+  case ARMII::LDRrr: return ARM::tLDR;
+  case ARMII::LDRri: return 0;
   case ARMII::MOVr: return ARM::tMOVr;
-  case ARMII::STR: return ARM::tSTR;
+  case ARMII::STRrr: return ARM::tSTR;
+  case ARMII::STRri: return 0;
   case ARMII::SUBri: return ARM::tSUBi8;
   case ARMII::SUBrs: return 0;
   case ARMII::SUBrr: return ARM::tSUBrr;
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index a1c9f04..67b78fb 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -34,13 +34,6 @@
   // Return the opcode that implements 'Op', or 0 if no opcode
   unsigned getOpcode(ARMII::Op Op) const;
 
-  // If 'opcode' is an instruction with an unsigned offset that also
-  // has a version with a signed offset, return the opcode for the
-  // version with the signed offset. In 'NumBits' return the number of
-  // bits for the signed offset.
-  unsigned unsignedOffsetOpcodeToSigned(unsigned opcode,
-                                        unsigned *NumBits) const;
-
   // Return true if the block does not fall through.
   bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
 
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index bf90758..0ae54b5 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -387,6 +387,15 @@
     MI.RemoveOperand(Op);
 }
 
+int Thumb1RegisterInfo::
+rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                  unsigned FrameReg, int Offset) const 
+{
+  // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo
+  // version then can pull out Thumb1 specific parts here
+  return 0;
+}
+
 void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                              int SPAdj, RegScavenger *RS) const{
   unsigned i = 0;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 31e0df2..2832a5b 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -48,6 +48,10 @@
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
 
+  // rewrite MI to access 'Offset' bytes from the FP. Return the offset that
+  // could not be handled directly in MI.
+  int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                        unsigned FrameReg, int Offset) const;
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index d92856c..91a1dcb 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -48,9 +48,11 @@
   case ARMII::FLDS: return ARM::FLDS;
   case ARMII::FSTD: return ARM::FSTD;
   case ARMII::FSTS: return ARM::FSTS;
-  case ARMII::LDR: return ARM::LDR;   // FIXME
+  case ARMII::LDRrr: return ARM::t2LDRs;
+  case ARMII::LDRri: return ARM::t2LDRi12;
   case ARMII::MOVr: return ARM::t2MOVr;
-  case ARMII::STR: return ARM::STR;   // FIXME
+  case ARMII::STRrr: return ARM::t2STRs;
+  case ARMII::STRri: return ARM::t2STRi12;
   case ARMII::SUBri: return ARM::t2SUBri;
   case ARMII::SUBrs: return ARM::t2SUBrs;
   case ARMII::SUBrr: return ARM::t2SUBrr;
@@ -88,29 +90,6 @@
   return false;
 }
 
-unsigned
-Thumb2InstrInfo::unsignedOffsetOpcodeToSigned(unsigned opcode,
-                                              unsigned *NumBits) const
-{
-  if (NumBits != NULL)
-    *NumBits = 8;
-
-  switch (opcode) {
-  case ARM::t2LDRi12:   return ARM::t2LDRi8;
-  case ARM::t2LDRHi12:  return ARM::t2LDRHi8;
-  case ARM::t2LDRBi12:  return ARM::t2LDRBi8;
-  case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
-  case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
-  case ARM::t2STRi12:   return ARM::t2STRi8;
-  case ARM::t2STRBi12:  return ARM::t2STRBi8;
-  case ARM::t2STRHi12:  return ARM::t2STRHi8;
-  default:
-    break;
-  }
-
-  return 0;
-}
-
 bool
 Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 43ea56e..ac31707 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -34,13 +34,6 @@
   // Return the opcode that implements 'Op', or 0 if no opcode
   unsigned getOpcode(ARMII::Op Op) const;
 
-  // If 'opcode' is an instruction with an unsigned offset that also
-  // has a version with a signed offset, return the opcode for the
-  // version with the signed offset. In 'NumBits' return the number of
-  // bits for the signed offset.
-  unsigned unsignedOffsetOpcodeToSigned(unsigned opcode,
-                                        unsigned *NumBits) const;
-
   // Return true if the block does not fall through.
   bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
 
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 4f2acf8..93ad7bd 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -61,8 +61,218 @@
     .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0);
 }
 
+static unsigned
+negativeOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRi12:   return ARM::t2LDRi8;
+  case ARM::t2LDRHi12:  return ARM::t2LDRHi8;
+  case ARM::t2LDRBi12:  return ARM::t2LDRBi8;
+  case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
+  case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
+  case ARM::t2STRi12:   return ARM::t2STRi8;
+  case ARM::t2STRBi12:  return ARM::t2STRBi8;
+  case ARM::t2STRHi12:  return ARM::t2STRHi8;
+
+  case ARM::t2LDRi8:
+  case ARM::t2LDRHi8:
+  case ARM::t2LDRBi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRSBi8:
+  case ARM::t2STRi8:
+  case ARM::t2STRBi8:
+  case ARM::t2STRHi8:
+    return opcode;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+static unsigned
+positiveOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRi8:   return ARM::t2LDRi12;
+  case ARM::t2LDRHi8:  return ARM::t2LDRHi12;
+  case ARM::t2LDRBi8:  return ARM::t2LDRBi12;
+  case ARM::t2LDRSHi8: return ARM::t2LDRSHi12;
+  case ARM::t2LDRSBi8: return ARM::t2LDRSBi12;
+  case ARM::t2STRi8:   return ARM::t2STRi12;
+  case ARM::t2STRBi8:  return ARM::t2STRBi12;
+  case ARM::t2STRHi8:  return ARM::t2STRHi12;
+
+  case ARM::t2LDRi12:
+  case ARM::t2LDRHi12:
+  case ARM::t2LDRBi12:
+  case ARM::t2LDRSHi12:
+  case ARM::t2LDRSBi12:
+  case ARM::t2STRi12:
+  case ARM::t2STRBi12:
+  case ARM::t2STRHi12:
+    return opcode;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+static unsigned
+immediateOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRs:   return ARM::t2LDRi12;
+  case ARM::t2LDRHs:  return ARM::t2LDRHi12;
+  case ARM::t2LDRBs:  return ARM::t2LDRBi12;
+  case ARM::t2LDRSHs: return ARM::t2LDRSHi12;
+  case ARM::t2LDRSBs: return ARM::t2LDRSBi12;
+  case ARM::t2STRs:   return ARM::t2STRi12;
+  case ARM::t2STRBs:  return ARM::t2STRBi12;
+  case ARM::t2STRHs:  return ARM::t2STRHi12;
+
+  case ARM::t2LDRi12:
+  case ARM::t2LDRHi12:
+  case ARM::t2LDRBi12:
+  case ARM::t2LDRSHi12:
+  case ARM::t2LDRSBi12:
+  case ARM::t2STRi12:
+  case ARM::t2STRBi12:
+  case ARM::t2STRHi12:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRHi8:
+  case ARM::t2LDRBi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRSBi8:
+  case ARM::t2STRi8:
+  case ARM::t2STRBi8:
+  case ARM::t2STRHi8:
+    return opcode;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
 bool Thumb2RegisterInfo::
 requiresRegisterScavenging(const MachineFunction &MF) const {
   // FIXME
   return false;
 }
+
+int Thumb2RegisterInfo::
+rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                  unsigned FrameReg, int Offset) const 
+{
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+
+  // Memory operands in inline assembly always use AddrModeT2_i12
+  if (Opcode == ARM::INLINEASM)
+    AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
+  
+  if (Opcode == getOpcode(ARMII::ADDri)) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(getOpcode(ARMII::MOVr)));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(FrameRegIdx+1);
+      return 0;
+    } else if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setDesc(TII.get(getOpcode(ARMII::SUBri)));
+    }
+
+    // Common case: small offset, fits into instruction.
+    if (ARM_AM::getT2SOImmVal(Offset) != -1) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      return 0;
+    }
+
+    // Otherwise, extract 8 adjacent bits from the immediate into this
+    // t2ADDri/t2SUBri.
+    unsigned RotAmt = CountLeadingZeros_32(Offset);
+    if (RotAmt > 24)
+      RotAmt = 24;
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+
+    assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 &&
+           "Bit extraction didn't work?");
+    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+  } else {
+    // AddrModeT2_so cannot handle any offset. If there is no offset
+    // register then we change to an immediate version.
+    if (AddrMode == ARMII::AddrModeT2_so) {
+      unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+      if (OffsetReg != 0) {
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        return Offset;
+      }
+      
+      MI.RemoveOperand(FrameRegIdx+1);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
+      Opcode = immediateOffsetOpcode(Opcode);
+      AddrMode = ARMII::AddrModeT2_i12;
+    }
+
+    // Neon and FP address modes are handled by the base ARM version...
+    if ((AddrMode != ARMII::AddrModeT2_i8) &&
+        (AddrMode != ARMII::AddrModeT2_i12)) {
+      return ARMBaseRegisterInfo::rewriteFrameIndex(MI, FrameRegIdx,
+                                                    FrameReg, Offset);
+    }
+    
+    unsigned NumBits = 0;
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+    // i8 supports only negative, and i12 supports only positive, so
+    // based on Offset sign convert Opcode to the appropriate
+    // instruction
+    if (Offset < 0) {
+      Opcode = negativeOffsetOpcode(Opcode);
+      NumBits = 8;
+      isSub = true;
+      Offset = -Offset;
+    }
+    else {
+      Opcode = positiveOffsetOpcode(Opcode);
+      NumBits = 12;
+    }
+
+    if (Opcode) {
+      MI.setDesc(TII.get(Opcode));
+      MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
+
+      // Attempt to fold address computation
+      // Common case: small offset, fits into instruction.
+      unsigned Mask = (1 << NumBits) - 1;
+      if ((unsigned)Offset <= Mask) {
+        // Replace the FrameIndex with fp/sp
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        ImmOp.ChangeToImmediate((isSub) ? -Offset : Offset);
+        return 0;
+      }
+      
+      // Otherwise, offset doesn't fit. Pull in what we can to simplify
+      unsigned ImmedOffset = Offset & Mask;
+      ImmOp.ChangeToImmediate((isSub) ? -ImmedOffset : ImmedOffset);
+      Offset &= ~Mask;
+    }
+  }
+
+  return (isSub) ? -Offset : Offset;
+}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 0a9a15a..57c0663 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -27,14 +27,19 @@
 public:
   Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
 
+  // rewrite MI to access 'Offset' bytes from the FP. Return the offset that
+  // could not be handled directly in MI.
+  int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                        unsigned FrameReg, int Offset) const;
+
   /// emitLoadConstPool - Emits a load from constpool to materialize the
   /// specified immediate.
- void emitLoadConstPool(MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator &MBBI,
-                        DebugLoc dl,
-                        unsigned DestReg, unsigned SubIdx, int Val,
-                        ARMCC::CondCodes Pred = ARMCC::AL,
-                        unsigned PredReg = 0) const;
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         DebugLoc dl,
+                         unsigned DestReg, unsigned SubIdx, int Val,
+                         ARMCC::CondCodes Pred = ARMCC::AL,
+                         unsigned PredReg = 0) const;
 
   bool requiresRegisterScavenging(const MachineFunction &MF) const;
 };