[X86] Add WriteFMOVMSK/WriteVecMOVMSK/WriteMMXMOVMSK scheduler classes
Currently MOVMSK instructions use the WriteVecLogic class, which is a very poor choice given that MOVMSK involves a SSE->GPR transfer.
Differential Revision: https://reviews.llvm.org/D44924
llvm-svn: 328664
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index 79b58ae..5f08c94 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -635,7 +635,7 @@
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32orGR64:$dst,
(int_x86_mmx_pmovmskb VR64:$src))],
- IIC_MMX_MOVMSK>, Sched<[WriteVecLogic]>;
+ IIC_MMX_MOVMSK>, Sched<[WriteMMXMOVMSK]>;
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 5ad2399..127509b 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -2593,7 +2593,7 @@
def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], IIC_SSE_MOVMSK, d>,
- Sched<[WriteVecLogic]>;
+ Sched<[WriteFMOVMSK]>;
}
let Predicates = [HasAVX] in {
@@ -4271,7 +4271,7 @@
// SSE2 - Packed Mask Creation
//===---------------------------------------------------------------------===//
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecMOVMSK] in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR128:$src),
@@ -4283,8 +4283,8 @@
def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR256:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
- VEX, VEX_L, VEX_WIG;
+ [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))],
+ IIC_SSE_MOVMSK>, VEX, VEX_L, VEX_WIG;
}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 71eb873..db7ae8a 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -227,6 +227,11 @@
let ResourceCycles = [4,3,1,1];
}
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [BWPort0]> { let Latency = 3; }
+def : WriteRes<WriteVecMOVMSK, [BWPort0]> { let Latency = 3; }
+def : WriteRes<WriteMMXMOVMSK, [BWPort0]> { let Latency = 1; }
+
// AES instructions.
def : WriteRes<WriteAESDecEnc, [BWPort5]> { // Decryption, encryption.
let Latency = 7;
@@ -297,7 +302,6 @@
}
def: InstRW<[BWWriteResGroup1], (instregex "MMX_MOVD64from64rr",
"MMX_MOVD64grr",
- "MMX_PMOVMSKBrr",
"MMX_PSLLDri",
"MMX_PSLLDrr",
"MMX_PSLLQri",
@@ -839,15 +843,6 @@
"STOSQ",
"STOSW")>;
-def BWWriteResGroup26 : SchedWriteRes<[BWPort0]> {
- let Latency = 3;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[BWWriteResGroup26], (instregex "(V?)MOVMSKPD(Y?)rr",
- "(V?)MOVMSKPS(Y?)rr",
- "(V?)PMOVMSKB(Y?)rr")>;
-
def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> {
let Latency = 3;
let NumMicroOps = 1;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index e2ea697..9782525 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -216,6 +216,11 @@
let ResourceCycles = [4,3,1,1];
}
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [HWPort0]> { let Latency = 3; }
+def : WriteRes<WriteVecMOVMSK, [HWPort0]> { let Latency = 3; }
+def : WriteRes<WriteMMXMOVMSK, [HWPort0]> { let Latency = 1; }
+
// AES Instructions.
def : WriteRes<WriteAESDecEnc, [HWPort5]> {
let Latency = 7;
@@ -658,7 +663,6 @@
}
def: InstRW<[HWWriteResGroup2], (instregex "MMX_MOVD64from64rr",
"MMX_MOVD64grr",
- "MMX_PMOVMSKBrr",
"MMX_PSLLDri",
"MMX_PSLLDrr",
"MMX_PSLLQri",
@@ -1763,15 +1767,6 @@
def: InstRW<[HWWriteResGroup48], (instregex "CALL(16|32|64)m",
"FARCALL64")>;
-def HWWriteResGroup49 : SchedWriteRes<[HWPort0]> {
- let Latency = 3;
- let NumMicroOps = 1;
- let ResourceCycles = [1];
-}
-def: InstRW<[HWWriteResGroup49], (instregex "(V?)MOVMSKPD(Y?)rr",
- "(V?)MOVMSKPS(Y?)rr",
- "(V?)PMOVMSKB(Y?)rr")>;
-
def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> {
let Latency = 3;
let NumMicroOps = 1;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 3147838..e5fc168 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -204,6 +204,11 @@
let ResourceCycles = [7, 1];
}
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [SBPort0]> { let Latency = 2; }
+def : WriteRes<WriteVecMOVMSK, [SBPort0]> { let Latency = 2; }
+def : WriteRes<WriteMMXMOVMSK, [SBPort0]> { let Latency = 1; }
+
// AES Instructions.
def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
let Latency = 7;
@@ -527,10 +532,7 @@
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup7], (instregex "(V?)PMOVMSKBrr",
- "(V?)MOVMSKPD(Y?)rr",
- "(V?)MOVMSKPS(Y?)rr",
- "(V?)MOVPDI2DIrr",
+def: InstRW<[SBWriteResGroup7], (instregex "(V?)MOVPDI2DIrr",
"(V?)MOVPQIto64rr")>;
def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> {
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 2a16982..bceb435 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -224,6 +224,11 @@
let ResourceCycles = [4,3,1,1];
}
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [SKLPort0]> { let Latency = 2; }
+def : WriteRes<WriteVecMOVMSK, [SKLPort0]> { let Latency = 2; }
+def : WriteRes<WriteMMXMOVMSK, [SKLPort0]> { let Latency = 2; }
+
// AES instructions.
def : WriteRes<WriteAESDecEnc, [SKLPort0]> { // Decryption, encryption.
let Latency = 4;
@@ -692,14 +697,10 @@
}
def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr",
"MMX_MOVD64grr",
- "MMX_PMOVMSKBrr",
"(V?)COMISDrr",
"(V?)COMISSrr",
- "(V?)MOVMSKPD(Y?)rr",
- "(V?)MOVMSKPS(Y?)rr",
"(V?)MOVPDI2DIrr",
"(V?)MOVPQIto64rr",
- "(V?)PMOVMSKB(Y?)rr",
"VTESTPD(Y?)rr",
"VTESTPS(Y?)rr",
"(V?)UCOMISDrr",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index c9a9c60..8bfd213 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -224,6 +224,11 @@
let ResourceCycles = [4,3,1,1];
}
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [SKXPort0]> { let Latency = 2; }
+def : WriteRes<WriteVecMOVMSK, [SKXPort0]> { let Latency = 2; }
+def : WriteRes<WriteMMXMOVMSK, [SKXPort0]> { let Latency = 2; }
+
// AES instructions.
def : WriteRes<WriteAESDecEnc, [SKXPort0]> { // Decryption, encryption.
let Latency = 4;
@@ -1444,28 +1449,18 @@
"COMISSrr",
"MMX_MOVD64from64rr",
"MMX_MOVD64grr",
- "MMX_PMOVMSKBrr",
- "MOVMSKPDrr",
- "MOVMSKPSrr",
"MOVPDI2DIrr",
"MOVPQIto64rr",
- "PMOVMSKBrr",
"UCOMISDrr",
"UCOMISSrr",
"VCOMISDZrr(b?)",
"VCOMISDrr",
"VCOMISSZrr(b?)",
"VCOMISSrr",
- "VMOVMSKPDYrr",
- "VMOVMSKPDrr",
- "VMOVMSKPSYrr",
- "VMOVMSKPSrr",
"VMOVPDI2DIZrr(b?)(k?)(z?)",
"VMOVPDI2DIrr",
"VMOVPQIto64Zrr(b?)(k?)(z?)",
"VMOVPQIto64rr",
- "VPMOVMSKBYrr",
- "VPMOVMSKBrr",
"VTESTPDYrr",
"VTESTPDrr",
"VTESTPSYrr",
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 85ca7a9..6136d96 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -105,6 +105,11 @@
// These are often used on both floating point and integer vectors.
defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
+// MOVMSK operations.
+def WriteFMOVMSK : SchedWrite;
+def WriteVecMOVMSK : SchedWrite;
+def WriteMMXMOVMSK : SchedWrite;
+
// Conversion between integer and float.
defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index af37935..4d88383 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -359,6 +359,14 @@
defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteFMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+def : WriteRes<WriteVecMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+def : WriteRes<WriteMMXMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+
+////////////////////////////////////////////////////////////////////////////////
// AES Instructions.
////////////////////////////////////////////////////////////////////////////////
@@ -771,13 +779,6 @@
}
def : InstRW<[JWriteVMaskMovYSt], (instrs VMASKMOVPDYmr, VMASKMOVPSYmr)>;
-def JWriteVMOVMSK: SchedWriteRes<[JFPU0, JFPA, JALU0]> {
- let Latency = 3;
-}
-def : InstRW<[JWriteVMOVMSK], (instrs MOVMSKPDrr, VMOVMSKPDrr, VMOVMSKPDYrr,
- MOVMSKPSrr, VMOVMSKPSrr, VMOVMSKPSYrr,
- PMOVMSKBrr, VPMOVMSKBrr, MMX_PMOVMSKBrr)>;
-
def JWriteVTESTY: SchedWriteRes<[JFPU01, JFPX, JFPA, JALU0]> {
let Latency = 4;
let ResourceCycles = [2, 2, 2, 1];
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 6fb7e94..87b1bf2 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -185,6 +185,11 @@
let ResourceCycles = [21, 1];
}
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
+def : WriteRes<WriteVecMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
+def : WriteRes<WriteMMXMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
+
// AES Instructions.
def : WriteRes<WriteAESDecEnc, [SLM_FPC_RSV0]> {
let Latency = 8;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 5459485..252243c 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -216,6 +216,11 @@
// Vector Shift Operations
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [ZnFPU2]>;
+def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
+def : WriteRes<WriteMMXMOVMSK, [ZnFPU2]>;
+
// AES Instructions.
defm : ZnWriteResFpuPair<WriteAESDecEnc, [ZnFPU01], 4>;
defm : ZnWriteResFpuPair<WriteAESIMC, [ZnFPU01], 4>;
@@ -1004,14 +1009,12 @@
// m, v,v.
def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
-// PMOVMSKB.
-def ZnWritePMOVMSKB : SchedWriteRes<[ZnFPU2]> {
- let NumMicroOps = 2;
-}
+// PMOVMSKBY.
def ZnWritePMOVMSKBY : SchedWriteRes<[ZnFPU2]> {
+ let NumMicroOps = 2;
let Latency = 2;
+ let ResourceCycles = [2];
}
-def : InstRW<[ZnWritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKBrr")>;
def : InstRW<[ZnWritePMOVMSKBY], (instregex "(V|MMX_)?PMOVMSKBYrr")>;
// PEXTR B/W/D/Q.
@@ -1150,11 +1153,6 @@
//=== Floating Point XMM and YMM Instructions ===//
//-- Move instructions --//
-// MOVMSKP S/D.
-// r32 <- x,y.
-def ZnWriteMOVMSKPr : SchedWriteRes<[ZnFPU2]> ;
-def : InstRW<[ZnWriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)(Y?)rr")>;
-
// VPERM2F128.
def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rr")>;
def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rm")>;