Properly qualify AVX2 specific parts of execution dependency table. Also enable converting between 256-bit PS/PD operations when AVX1 is enabled. Fixes PR11370.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144622 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 9428fff..9f7b21f 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -3555,7 +3555,11 @@
   { X86::VMOVAPSYrr,   X86::VMOVAPDYrr,   X86::VMOVDQAYrr  },
   { X86::VMOVUPSYmr,   X86::VMOVUPDYmr,   X86::VMOVDQUYmr  },
   { X86::VMOVUPSYrm,   X86::VMOVUPDYrm,   X86::VMOVDQUYrm  },
-  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr },
+  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr }
+};
+
+static const unsigned ReplaceableInstrsAVX2[][3] = {
+  //PackedSingle       PackedDouble       PackedInt
   { X86::VANDNPSYrm,   X86::VANDNPDYrm,   X86::VPANDNYrm   },
   { X86::VANDNPSYrr,   X86::VANDNPDYrr,   X86::VPANDNYrr   },
   { X86::VANDPSYrm,    X86::VANDPDYrm,    X86::VPANDYrm    },
@@ -3563,31 +3567,37 @@
   { X86::VORPSYrm,     X86::VORPDYrm,     X86::VPORYrm     },
   { X86::VORPSYrr,     X86::VORPDYrr,     X86::VPORYrr     },
   { X86::VXORPSYrm,    X86::VXORPDYrm,    X86::VPXORYrm    },
-  { X86::VXORPSYrr,    X86::VXORPDYrr,    X86::VPXORYrr    },
+  { X86::VXORPSYrr,    X86::VXORPDYrr,    X86::VPXORYrr    }
 };
 
 // FIXME: Some shuffle and unpack instructions have equivalents in different
 // domains, but they require a bit more work than just switching opcodes.
 
-static const unsigned *lookup(unsigned opcode, unsigned domain) {
+static const unsigned *lookup(unsigned opcode, unsigned domain, bool hasAVX2) {
   for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
     if (ReplaceableInstrs[i][domain-1] == opcode)
       return ReplaceableInstrs[i];
+  if (domain != 3 || hasAVX2) // only use PackedInt domain if AVX2 is enabled
+    for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
+      if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
+        return ReplaceableInstrsAVX2[i];
   return 0;
 }
 
 std::pair<uint16_t, uint16_t>
 X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
   uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+  bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
   return std::make_pair(domain,
-                        domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+                  domain && lookup(MI->getOpcode(), domain, hasAVX2) ? 0xe : 0);
 }
 
 void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
   assert(Domain>0 && Domain<4 && "Invalid execution domain");
   uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
   assert(dom && "Not an SSE instruction");
-  const unsigned *table = lookup(MI->getOpcode(), dom);
+  bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
+  const unsigned *table = lookup(MI->getOpcode(), dom, hasAVX2);
   assert(table && "Cannot change domain");
   MI->setDesc(get(table[Domain-1]));
 }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 4d4d7c0..feb7155 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -140,10 +140,7 @@
       PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
       ShouldPrint = true;
     }
-    if (Subtarget.hasAVX2()) {
-      // FIXME this should be turned on for just AVX, but the pass doesn't check
-      // that instructions are valid before replacing them and there are AVX2
-      // integer instructions in the table.
+    if (Subtarget.hasAVX()) {
       PM.add(createExecutionDependencyFixPass(&X86::VR256RegClass));
       ShouldPrint = true;
     }