Use NEON reg-reg moves, where profitable. This reduces "domain-cross" stalls, when we used to mix vfp and neon code (the former were used for reg-reg moves)

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@85764 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6b031da..4cb2407 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -16,15 +16,18 @@
 #include "ARMAddressingModes.h"
 #include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
@@ -32,8 +35,9 @@
 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
                cl::desc("Enable ARM 2-addr to 3-addr conv"));
 
-ARMBaseInstrInfo::ARMBaseInstrInfo()
-  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) {
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
+  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
+    Subtarget(STI) {
 }
 
 MachineInstr *
@@ -504,7 +508,7 @@
   case ARM::FCPYS:
   case ARM::FCPYD:
   case ARM::VMOVD:
-  case  ARM::VMOVQ: {
+  case ARM::VMOVQ: {
     SrcReg = MI.getOperand(1).getReg();
     DstReg = MI.getOperand(0).getReg();
     return true;
@@ -647,11 +651,45 @@
   } else if (DestRC == ARM::SPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
                    .addReg(SrcReg));
-  } else if ((DestRC == ARM::DPRRegisterClass) ||
-             (DestRC == ARM::DPR_VFP2RegisterClass) ||
-             (DestRC == ARM::DPR_8RegisterClass)) {
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
-                   .addReg(SrcReg));
+  } else if (DestRC == ARM::DPR_VFP2RegisterClass ||
+             DestRC == ARM::DPR_8RegisterClass ||
+             SrcRC == ARM::DPR_VFP2RegisterClass ||
+             SrcRC == ARM::DPR_8RegisterClass) {
+    // Always use neon reg-reg move if source or dest is NEON-only regclass.
+    BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg);
+  } else if (DestRC == ARM::DPRRegisterClass) {
+    const ARMBaseRegisterInfo* TRI = &getRegisterInfo();
+
+    // Find the Machine Instruction which defines SrcReg.
+    MachineBasicBlock::iterator J = (I == MBB.begin() ? I : prior(I));
+    while (J != MBB.begin()) {
+      if (J->modifiesRegister(SrcReg, TRI))
+        break;
+      --J;
+    }
+
+    unsigned Domain;
+    if (J->modifiesRegister(SrcReg, TRI)) {
+      Domain = J->getDesc().TSFlags & ARMII::DomainMask;
+      // Instructions in general domain are subreg accesses.
+      // Map them to NEON reg-reg moves.
+      if (Domain == ARMII::DomainGeneral)
+        Domain = ARMII::DomainNEON;
+    } else {
+      // We reached the beginning of the BB and found no instruction defining
+      // the reg. This means that register should be live-in for this BB.
+      // It's always to better to use NEON reg-reg moves.
+      Domain = ARMII::DomainNEON;
+    }
+
+    if ((Domain & ARMII::DomainNEON) && getSubtarget().hasNEON()) {
+      BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
+    } else {
+      assert((Domain & ARMII::DomainVFP ||
+              !getSubtarget().hasNEON()) && "Invalid domain!");
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
+                     .addReg(SrcReg));
+    }
   } else if (DestRC == ARM::QPRRegisterClass ||
              DestRC == ARM::QPR_VFP2RegisterClass) {
     BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);