[AVX512] Add initial support for the Execution Domain fixing pass to change some EVEX instructions.
llvm-svn: 276393
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ea6a28d..1e7809c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -7219,7 +7219,12 @@
{ X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
{ X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
{ X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
- { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
+ // AVX512 support
+ { X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr },
+ { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
+ { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
+ { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
};
static const uint16_t ReplaceableInstrsAVX2[][3] = {
@@ -7246,6 +7251,40 @@
{ X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm}
};
+static const uint16_t ReplaceableInstrsAVX512[][4] = {
+ // Two integer columns for 64-bit and 32-bit elements.
+ //PackedSingle PackedDouble PackedInt PackedInt
+ { X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
+ { X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
+ { X86::VANDPSZ128rm, X86::VANDPDZ128rm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
+ { X86::VANDPSZ128rr, X86::VANDPDZ128rr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
+ { X86::VORPSZ128rm, X86::VORPDZ128rm, X86::VPORQZ128rm, X86::VPORDZ128rm },
+ { X86::VORPSZ128rr, X86::VORPDZ128rr, X86::VPORQZ128rr, X86::VPORDZ128rr },
+ { X86::VXORPSZ128rm, X86::VXORPDZ128rm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
+ { X86::VXORPSZ128rr, X86::VXORPDZ128rr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
+ { X86::VANDNPSZ256rm, X86::VANDNPDZ256rm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
+ { X86::VANDNPSZ256rr, X86::VANDNPDZ256rr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
+ { X86::VANDPSZ256rm, X86::VANDPDZ256rm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
+ { X86::VANDPSZ256rr, X86::VANDPDZ256rr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
+ { X86::VORPSZ256rm, X86::VORPDZ256rm, X86::VPORQZ256rm, X86::VPORDZ256rm },
+ { X86::VORPSZ256rr, X86::VORPDZ256rr, X86::VPORQZ256rr, X86::VPORDZ256rr },
+ { X86::VXORPSZ256rm, X86::VXORPDZ256rm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
+ { X86::VXORPSZ256rr, X86::VXORPDZ256rr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
+ { X86::VANDNPSZrm, X86::VANDNPDZrm, X86::VPANDNQZrm, X86::VPANDNDZrm },
+ { X86::VANDNPSZrr, X86::VANDNPDZrr, X86::VPANDNQZrr, X86::VPANDNDZrr },
+ { X86::VANDPSZrm, X86::VANDPDZrm, X86::VPANDQZrm, X86::VPANDDZrm },
+ { X86::VANDPSZrr, X86::VANDPDZrr, X86::VPANDQZrr, X86::VPANDDZrr },
+ { X86::VORPSZrm, X86::VORPDZrm, X86::VPORQZrm, X86::VPORDZrm },
+ { X86::VORPSZrr, X86::VORPDZrr, X86::VPORQZrr, X86::VPORDZrr },
+ { X86::VXORPSZrm, X86::VXORPDZrm, X86::VPXORQZrm, X86::VPXORDZrm },
+ { X86::VXORPSZrr, X86::VXORPDZrr, X86::VPXORQZrr, X86::VPXORDZrr },
+ { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA64Zmr },
+ { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA64Zrm },
+ { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrr },
+ { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU64Zmr },
+ { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU64Zrm },
+};
+
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
@@ -7263,6 +7302,14 @@
return nullptr;
}
+static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain) {
+ // If this is the integer domain make sure to check both integer columns.
+ for (const uint16_t (&Row)[4] : ReplaceableInstrsAVX512)
+ if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode))
+ return Row;
+ return nullptr;
+}
+
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
@@ -7272,6 +7319,8 @@
validDomains = 0xe;
else if (domain && lookupAVX2(MI.getOpcode(), domain))
validDomains = hasAVX2 ? 0xe : 0x6;
+ else if (domain && lookupAVX512(MI.getOpcode(), domain))
+ validDomains = 0xe;
return std::make_pair(domain, validDomains);
}
@@ -7285,6 +7334,12 @@
"256-bit vector operations only available in AVX2");
table = lookupAVX2(MI.getOpcode(), dom);
}
+ if (!table) { // try the AVX512 table
+ table = lookupAVX512(MI.getOpcode(), dom);
+ // Don't change integer Q instructions to D instructions.
+ if (dom == 3 && table[3] == MI.getOpcode())
+ Domain = 4;
+ }
assert(table && "Cannot change domain");
MI.setDesc(get(table[Domain - 1]));
}
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 50c9c25..d58882d 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -321,7 +321,7 @@
void X86PassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
- addPass(createExecutionDependencyFixPass(&X86::VR128RegClass));
+ addPass(createExecutionDependencyFixPass(&X86::VR128XRegClass));
if (UseVZeroUpper)
addPass(createX86IssueVZeroUpperPass());