Add NEON 'laned' operations. This fixes another bunch of gcc testsuite fails and
makes the code faster.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@81220 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 142c3f1..9da847a 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -612,14 +612,24 @@
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC != SrcRC) {
-    if (((DestRC == ARM::DPRRegisterClass) &&
-         (SrcRC == ARM::DPR_VFP2RegisterClass)) ||
-        ((SrcRC == ARM::DPRRegisterClass) &&
-         (DestRC == ARM::DPR_VFP2RegisterClass))) {
-      // Allow copy between DPR and DPR_VFP2.
-    } else {
+    // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies
+    if (DestRC == ARM::DPRRegisterClass) {
+      if (SrcRC == ARM::DPR_VFP2RegisterClass ||
+          SrcRC == ARM::DPR_8RegisterClass) {
+      } else
+        return false;
+    } else if (DestRC == ARM::DPR_VFP2RegisterClass) {
+      if (SrcRC == ARM::DPRRegisterClass ||
+          SrcRC == ARM::DPR_8RegisterClass) {
+      } else
+        return false;
+    } else if (DestRC == ARM::DPR_8RegisterClass) {
+      if (SrcRC == ARM::DPRRegisterClass ||
+          SrcRC == ARM::DPR_VFP2RegisterClass) {
+      } else
+        return false;
+    } else
       return false;
-    }
   }
 
   if (DestRC == ARM::GPRRegisterClass) {
@@ -629,7 +639,8 @@
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
                    .addReg(SrcReg));
   } else if ((DestRC == ARM::DPRRegisterClass) ||
-             (DestRC == ARM::DPR_VFP2RegisterClass)) {
+             (DestRC == ARM::DPR_VFP2RegisterClass) ||
+             (DestRC == ARM::DPR_8RegisterClass)) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
                    .addReg(SrcReg));
   } else if (DestRC == ARM::QPRRegisterClass) {
@@ -652,7 +663,9 @@
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addReg(0).addImm(0));
-  } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass) {
+  } else if (RC == ARM::DPRRegisterClass ||
+             RC == ARM::DPR_VFP2RegisterClass ||
+             RC == ARM::DPR_8RegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0));
@@ -678,7 +691,9 @@
   if (RC == ARM::GPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
                    .addFrameIndex(FI).addReg(0).addImm(0));
-  } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass) {
+  } else if (RC == ARM::DPRRegisterClass ||
+             RC == ARM::DPR_VFP2RegisterClass ||
+             RC == ARM::DPR_8RegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
                    .addFrameIndex(FI).addImm(0));
   } else if (RC == ARM::SPRRegisterClass) {