Enable load / store multiple pass for Thumb2. It's not using ldrd / strd yet.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78104 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c6afbe9..a81b790 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -615,12 +615,15 @@
     return false;
 
   bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
-  unsigned Offset = isAM5
-    ? ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
-                        true, isDPR ? 2 : 1)
-    : (isAM2
-       ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
-       : Bytes);
+  unsigned Offset = 0;
+  if (isAM5)
+    Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
+                               ? ARM_AM::db
+                               : ARM_AM::ia, true, (isDPR ? 2 : 1));
+  else if (isAM2)
+    Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+  else
+    Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
   if (isLd) {
     if (isAM5)
       // FLDMS, FLDMD
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 918ca78..a6e987b 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -101,8 +101,9 @@
 
 bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel) {
-  // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
-  if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+  // FIXME: temporarily disabling load / store optimization pass for Thumb1 mode.
+  if (OptLevel != CodeGenOpt::None && !DisableLdStOpti &&
+      !Subtarget.isThumb1Only())
     PM.add(createARMLoadStoreOptimizationPass());
 
   if (OptLevel != CodeGenOpt::None &&
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 4438a8c..a326185 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -599,6 +599,7 @@
       // FIXME
       bool isLDM = (MI->getOpcode() == ARM::LDM ||
                     MI->getOpcode() == ARM::LDM_RET ||
+                    MI->getOpcode() == ARM::t2LDM ||
                     MI->getOpcode() == ARM::t2LDM_RET);
       O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
     } else
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
new file mode 100644
index 0000000..c90722c
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | FileCheck %s
+
+@X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: stmfd sp!, {r7, lr}
+; CHECK: ldmfd sp!, {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: stmfd sp!, {r7, lr}
+; CHECK: ldmia
+; CHECK: ldmfd sp!, {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: stmfd sp!, {r7, lr}
+; CHECK: ldmfd sp!, {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)