[X86] Split WriteVecALU/WritePHAdd into XMM and YMM/ZMM scheduler classes
llvm-svn: 331453
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 6583a66..807a266 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4684,16 +4684,16 @@
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
loadv2i64, i128mem,
- WritePHAdd, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
loadv2i64, i128mem,
- WritePHAdd, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
loadv2i64, i128mem,
- WritePHAdd, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
loadv2i64, i128mem,
- WritePHAdd, 0>, VEX_4V;
+ SchedWritePHAdd.XMM, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
@@ -4705,10 +4705,10 @@
SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
- WritePHAdd, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
- WritePHAdd, loadv2i64, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG;
}
}
@@ -4730,16 +4730,16 @@
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- WritePHAdd, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
loadv4i64, i256mem,
- WritePHAdd, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- WritePHAdd, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
loadv4i64, i256mem,
- WritePHAdd, 0>, VEX_4V, VEX_L;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
@@ -4748,10 +4748,10 @@
SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw,
- WritePHAdd>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw,
- WritePHAdd>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
}
}
@@ -4759,13 +4759,13 @@
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
- memopv2i64, i128mem, WritePHAdd>;
+ memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
- memopv2i64, i128mem, WritePHAdd>;
+ memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
- memopv2i64, i128mem, WritePHAdd>;
+ memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
- memopv2i64, i128mem, WritePHAdd>;
+ memopv2i64, i128mem, SchedWritePHAdd.XMM>;
defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
SchedWriteVecALU.XMM, memopv2i64>;
defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128,
@@ -4776,10 +4776,10 @@
memopv2i64, i128mem, SchedWriteVarShuffle.XMM>;
defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw",
int_x86_ssse3_phadd_sw_128,
- WritePHAdd, memopv2i64>;
+ SchedWritePHAdd.XMM, memopv2i64>;
defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw",
int_x86_ssse3_phsub_sw_128,
- WritePHAdd, memopv2i64>;
+ SchedWritePHAdd.XMM, memopv2i64>;
defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
v16i8, VR128, memopv2i64, i128mem,
SchedWriteVecIMul.XMM>;
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index a1b6a74..01a92dc 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -199,7 +199,8 @@
def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>;
def : WriteRes<WriteVecMove, [BWPort015]>;
-defm : BWWriteResPair<WriteVecALU, [BWPort15], 1>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts.
@@ -365,9 +366,10 @@
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : BWWriteResPair<WriteFHAdd, [BWPort1,BWPort5], 5, [1,2], 3>;
+defm : BWWriteResPair<WriteFHAdd, [BWPort1,BWPort5], 5, [1,2], 3, 5>;
defm : BWWriteResPair<WriteFHAddY, [BWPort1,BWPort5], 5, [1,2], 3, 6>;
-defm : BWWriteResPair<WritePHAdd, [BWPort5,BWPort15], 3, [2,1], 3>;
+defm : BWWriteResPair<WritePHAdd, [BWPort5,BWPort15], 3, [2,1], 3, 5>;
+defm : BWWriteResPair<WritePHAddY, [BWPort5,BWPort15], 3, [2,1], 3, 6>;
// Remaining instrs.
@@ -1087,55 +1089,6 @@
"FCOMP32m",
"FCOMP64m")>;
-def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[BWWriteResGroup76], (instregex "VPABSBYrm",
- "VPABSDYrm",
- "VPABSWYrm",
- "VPADDBYrm",
- "VPADDDYrm",
- "VPADDQYrm",
- "VPADDSBYrm",
- "VPADDSWYrm",
- "VPADDUSBYrm",
- "VPADDUSWYrm",
- "VPADDWYrm",
- "VPAVGBYrm",
- "VPAVGWYrm",
- "VPCMPEQBYrm",
- "VPCMPEQDYrm",
- "VPCMPEQQYrm",
- "VPCMPEQWYrm",
- "VPCMPGTBYrm",
- "VPCMPGTDYrm",
- "VPCMPGTWYrm",
- "VPMAXSBYrm",
- "VPMAXSDYrm",
- "VPMAXSWYrm",
- "VPMAXUBYrm",
- "VPMAXUDYrm",
- "VPMAXUWYrm",
- "VPMINSBYrm",
- "VPMINSDYrm",
- "VPMINSWYrm",
- "VPMINUBYrm",
- "VPMINUDYrm",
- "VPMINUWYrm",
- "VPSIGNBYrm",
- "VPSIGNDYrm",
- "VPSIGNWYrm",
- "VPSUBBYrm",
- "VPSUBDYrm",
- "VPSUBQYrm",
- "VPSUBSBYrm",
- "VPSUBSWYrm",
- "VPSUBUSBYrm",
- "VPSUBUSWYrm",
- "VPSUBWYrm")>;
-
def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> {
let Latency = 7;
let NumMicroOps = 2;
@@ -1415,18 +1368,6 @@
"VPSRAVDYrm",
"VPSRLVDYrm")>;
-def BWWriteResGroup110 : SchedWriteRes<[BWPort5,BWPort23,BWPort15]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[BWWriteResGroup110], (instregex "VPHADDDYrm",
- "VPHADDSWYrm",
- "VPHADDWYrm",
- "VPHSUBDYrm",
- "VPHSUBSWYrm",
- "VPHSUBWYrm")>;
-
def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 8c4e2f9..085034b 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -197,7 +197,8 @@
defm : HWWriteResPair<WriteVecShift, [HWPort0], 1>;
defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>;
-defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>;
+defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 6>;
+defm : HWWriteResPair<WriteVecALUY, [HWPort15], 1, [1], 1, 7>;
defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteVecIMulY, [HWPort0], 5, [1], 1, 7>;
defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>;
@@ -608,7 +609,8 @@
defm : HWWriteResPair<WriteFHAdd, [HWPort1, HWPort5], 5, [1,2], 3, 6>;
defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
-defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
+defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
+defm : HWWriteResPair<WritePHAddY, [HWPort5, HWPort15], 3, [2,1], 3, 7>;
//=== Floating Point XMM and YMM Instructions ===//
@@ -940,105 +942,14 @@
"BLSI(32|64)rm",
"BLSMSK(32|64)rm",
"BLSR(32|64)rm",
- "MOVBE(16|32|64)rm")>;
-
-def HWWriteResGroup16_1 : SchedWriteRes<[HWPort23,HWPort15]> {
- let Latency = 7;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup16_1], (instregex "(V?)PABSBrm",
- "(V?)PABSDrm",
- "(V?)PABSWrm",
- "(V?)PADDBrm",
- "(V?)PADDDrm",
- "(V?)PADDQrm",
- "(V?)PADDSBrm",
- "(V?)PADDSWrm",
- "(V?)PADDUSBrm",
- "(V?)PADDUSWrm",
- "(V?)PADDWrm",
- "(V?)PAVGBrm",
- "(V?)PAVGWrm",
- "(V?)PCMPEQBrm",
- "(V?)PCMPEQDrm",
- "(V?)PCMPEQQrm",
- "(V?)PCMPEQWrm",
- "(V?)PCMPGTBrm",
- "(V?)PCMPGTDrm",
- "(V?)PCMPGTWrm",
- "(V?)PMAXSBrm",
- "(V?)PMAXSDrm",
- "(V?)PMAXSWrm",
- "(V?)PMAXUBrm",
- "(V?)PMAXUDrm",
- "(V?)PMAXUWrm",
- "(V?)PMINSBrm",
- "(V?)PMINSDrm",
- "(V?)PMINSWrm",
- "(V?)PMINUBrm",
- "(V?)PMINUDrm",
- "(V?)PMINUWrm",
- "(V?)PSIGNBrm",
- "(V?)PSIGNDrm",
- "(V?)PSIGNWrm",
- "(V?)PSUBBrm",
- "(V?)PSUBDrm",
- "(V?)PSUBQrm",
- "(V?)PSUBSBrm",
- "(V?)PSUBSWrm",
- "(V?)PSUBUSBrm",
- "(V?)PSUBUSWrm",
- "(V?)PSUBWrm")>;
-
-def HWWriteResGroup16_2 : SchedWriteRes<[HWPort23,HWPort15]> {
- let Latency = 8;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[HWWriteResGroup16_2], (instregex "VPABSBYrm",
- "VPABSDYrm",
- "VPABSWYrm",
- "VPADDBYrm",
- "VPADDDYrm",
- "VPADDQYrm",
- "VPADDSBYrm",
- "VPADDSWYrm",
- "VPADDUSBYrm",
- "VPADDUSWYrm",
- "VPADDWYrm",
- "VPAVGBYrm",
- "VPAVGWYrm",
- "VPCMPEQBYrm",
- "VPCMPEQDYrm",
- "VPCMPEQQYrm",
- "VPCMPEQWYrm",
- "VPCMPGTBYrm",
- "VPCMPGTDYrm",
- "VPCMPGTWYrm",
- "VPMAXSBYrm",
- "VPMAXSDYrm",
- "VPMAXSWYrm",
- "VPMAXUBYrm",
- "VPMAXUDYrm",
- "VPMAXUWYrm",
- "VPMINSBYrm",
- "VPMINSDYrm",
- "VPMINSWYrm",
- "VPMINUBYrm",
- "VPMINUDYrm",
- "VPMINUWYrm",
- "VPSIGNBYrm",
- "VPSIGNDYrm",
- "VPSIGNWYrm",
- "VPSUBBYrm",
- "VPSUBDYrm",
- "VPSUBQYrm",
- "VPSUBSBYrm",
- "VPSUBSWYrm",
- "VPSUBUSBYrm",
- "VPSUBUSWYrm",
- "VPSUBWYrm")>;
+ "MOVBE(16|32|64)rm",
+ "MMX_PABS(B|D|W)rm",
+ "MMX_P(ADD|SUB)(B|D|W|Q)irm",
+ "MMX_P(ADD|SUB)(U?)S(B|W)irm",
+ "MMX_PAVG(B|W)irm",
+ "MMX_PCMP(EQ|GT)(B|D|W)irm",
+ "MMX_P(MAX|MIN)(SW|UB)irm",
+ "MMX_PSIGN(B|D|W)rm")>;
def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort015]> {
let Latency = 7;
@@ -1514,18 +1425,6 @@
}
def: InstRW<[HWWriteResGroup64], (instregex "MMX_PH(ADD|SUB)(D|SW|W)rm")>;
-def HWWriteResGroup64_1 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[HWWriteResGroup64_1], (instregex "VPHADDDYrm",
- "VPHADDSWYrm",
- "VPHADDWYrm",
- "VPHSUBDYrm",
- "VPHSUBSWYrm",
- "VPHSUBWYrm")>;
-
def HWWriteResGroup65 : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> {
let Latency = 8;
let NumMicroOps = 4;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 354a7aa..2d4985f 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -177,7 +177,8 @@
defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>;
defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
-defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>;
+defm : SBWriteResPair<WriteVecALU, [SBPort15], 1, [1], 1, 6>;
+defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>;
defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 6>;
defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
@@ -222,6 +223,7 @@
defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 6>;
+defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
////////////////////////////////////////////////////////////////////////////////
// String instructions.
@@ -406,47 +408,12 @@
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr",
- "MMX_PADDQirr",
- "MMX_PALIGNRrri",
- "MMX_PSIGN(B|D|W)rr",
- "(V?)PABSBrr",
- "(V?)PABSDrr",
- "(V?)PABSWrr",
+def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNRrri",
"(V?)PACKSSDWrr",
"(V?)PACKSSWBrr",
"(V?)PACKUSDWrr",
"(V?)PACKUSWBrr",
- "(V?)PADDBrr",
- "(V?)PADDDrr",
- "(V?)PADDQrr",
- "(V?)PADDSBrr",
- "(V?)PADDSWrr",
- "(V?)PADDUSBrr",
- "(V?)PADDUSWrr",
- "(V?)PADDWrr",
"(V?)PALIGNRrri",
- "(V?)PAVGBrr",
- "(V?)PAVGWrr",
- "(V?)PCMPEQBrr",
- "(V?)PCMPEQDrr",
- "(V?)PCMPEQQrr",
- "(V?)PCMPEQWrr",
- "(V?)PCMPGTBrr",
- "(V?)PCMPGTDrr",
- "(V?)PCMPGTWrr",
- "(V?)PMAXSBrr",
- "(V?)PMAXSDrr",
- "(V?)PMAXSWrr",
- "(V?)PMAXUBrr",
- "(V?)PMAXUDrr",
- "(V?)PMAXUWrr",
- "(V?)PMINSBrr",
- "(V?)PMINSDrr",
- "(V?)PMINSWrr",
- "(V?)PMINUBrr",
- "(V?)PMINUDrr",
- "(V?)PMINUWrr",
"(V?)PMOVSXBDrr",
"(V?)PMOVSXBQrr",
"(V?)PMOVSXBWrr",
@@ -462,19 +429,8 @@
"(V?)PSHUFDri",
"(V?)PSHUFHWri",
"(V?)PSHUFLWri",
- "(V?)PSIGNBrr",
- "(V?)PSIGNDrr",
- "(V?)PSIGNWrr",
"(V?)PSLLDQri",
"(V?)PSRLDQri",
- "(V?)PSUBBrr",
- "(V?)PSUBDrr",
- "(V?)PSUBQrr",
- "(V?)PSUBSBrr",
- "(V?)PSUBSWrr",
- "(V?)PSUBUSBrr",
- "(V?)PSUBUSWrr",
- "(V?)PSUBWrr",
"(V?)PUNPCKHBWrr",
"(V?)PUNPCKHDQrr",
"(V?)PUNPCKHQDQrr",
@@ -604,6 +560,12 @@
let ResourceCycles = [1];
}
def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr",
+ "MMX_PADD(B|D|W)irr",
+ "MMX_P(ADD|SUB)(U?)S(B|W)irr",
+ "MMX_PAVG(B|W)irr",
+ "MMX_PCMP(EQ|GT)(B|D|W)irr",
+ "MMX_P(MAX|MIN)(SW|UB)irr",
+ "MMX_PSUB(B|D|Q|W)irr",
"PUSHFS64",
"(V?)CVTDQ2PS(Y?)rr")>;
@@ -954,44 +916,11 @@
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm",
- "(V?)PABSBrm",
- "(V?)PABSDrm",
- "(V?)PABSWrm",
- "(V?)PACKSSDWrm",
+def: InstRW<[SBWriteResGroup59], (instregex "(V?)PACKSSDWrm",
"(V?)PACKSSWBrm",
"(V?)PACKUSDWrm",
"(V?)PACKUSWBrm",
- "(V?)PADDBrm",
- "(V?)PADDDrm",
- "(V?)PADDQrm",
- "(V?)PADDSBrm",
- "(V?)PADDSWrm",
- "(V?)PADDUSBrm",
- "(V?)PADDUSWrm",
- "(V?)PADDWrm",
"(V?)PALIGNRrmi",
- "(V?)PAVGBrm",
- "(V?)PAVGWrm",
- "(V?)PCMPEQBrm",
- "(V?)PCMPEQDrm",
- "(V?)PCMPEQQrm",
- "(V?)PCMPEQWrm",
- "(V?)PCMPGTBrm",
- "(V?)PCMPGTDrm",
- "(V?)PCMPGTWrm",
- "(V?)PMAXSBrm",
- "(V?)PMAXSDrm",
- "(V?)PMAXSWrm",
- "(V?)PMAXUBrm",
- "(V?)PMAXUDrm",
- "(V?)PMAXUWrm",
- "(V?)PMINSBrm",
- "(V?)PMINSDrm",
- "(V?)PMINSWrm",
- "(V?)PMINUBrm",
- "(V?)PMINUDrm",
- "(V?)PMINUWrm",
"(V?)PMOVSXBDrm",
"(V?)PMOVSXBQrm",
"(V?)PMOVSXBWrm",
@@ -1007,17 +936,6 @@
"(V?)PSHUFDmi",
"(V?)PSHUFHWmi",
"(V?)PSHUFLWmi",
- "(V?)PSIGNBrm",
- "(V?)PSIGNDrm",
- "(V?)PSIGNWrm",
- "(V?)PSUBBrm",
- "(V?)PSUBDrm",
- "(V?)PSUBQrm",
- "(V?)PSUBSBrm",
- "(V?)PSUBSWrm",
- "(V?)PSUBUSBrm",
- "(V?)PSUBUSWrm",
- "(V?)PSUBWrm",
"(V?)PUNPCKHBWrm",
"(V?)PUNPCKHDQrm",
"(V?)PUNPCKHQDQrm",
@@ -1027,6 +945,18 @@
"(V?)PUNPCKLQDQrm",
"(V?)PUNPCKLWDrm")>;
+def SBWriteResGroup59a : SchedWriteRes<[SBPort23,SBPort1]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[SBWriteResGroup59a], (instregex "MMX_PADD(B|D|W)irm",
+ "MMX_P(ADD|SUB)(U?)S(B|W)irm",
+ "MMX_PAVG(B|W)irm",
+ "MMX_PCMP(EQ|GT)(B|D|W)irm",
+ "MMX_P(MAX|MIN)(SW|UB)irm",
+ "MMX_PSUB(B|D|Q|W)irm")>;
+
def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort05]> {
let Latency = 7;
let NumMicroOps = 3;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 32fd51b..402ae1f 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -195,7 +195,8 @@
def : WriteRes<WriteVecStore, [SKLPort237, SKLPort4]>;
def : WriteRes<WriteVecMove, [SKLPort015]>;
-defm : SKLWriteResPair<WriteVecALU, [SKLPort15], 1>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecALU, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts.
@@ -373,7 +374,8 @@
defm : SKLWriteResPair<WriteFHAdd, [SKLPort5,SKLPort01], 6, [2,1], 3, 6>;
defm : SKLWriteResPair<WriteFHAddY, [SKLPort5,SKLPort01], 6, [2,1], 3, 7>;
-defm : SKLWriteResPair<WritePHAdd, [SKLPort15], 1>;
+defm : SKLWriteResPair<WritePHAdd, [SKLPort5,SKLPort015], 3, [2,1], 3, 6>;
+defm : SKLWriteResPair<WritePHAddY, [SKLPort5,SKLPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@@ -429,38 +431,7 @@
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKLWriteResGroup5], (instregex "(V?)PABSB(Y?)rr",
- "(V?)PABSD(Y?)rr",
- "(V?)PABSW(Y?)rr",
- "(V?)PADDSB(Y?)rr",
- "(V?)PADDSW(Y?)rr",
- "(V?)PADDUSB(Y?)rr",
- "(V?)PADDUSW(Y?)rr",
- "(V?)PAVGB(Y?)rr",
- "(V?)PAVGW(Y?)rr",
- "(V?)PCMPEQB(Y?)rr",
- "(V?)PCMPEQD(Y?)rr",
- "(V?)PCMPEQQ(Y?)rr",
- "(V?)PCMPEQW(Y?)rr",
- "(V?)PCMPGTB(Y?)rr",
- "(V?)PCMPGTD(Y?)rr",
- "(V?)PCMPGTW(Y?)rr",
- "(V?)PMAXSB(Y?)rr",
- "(V?)PMAXSD(Y?)rr",
- "(V?)PMAXSW(Y?)rr",
- "(V?)PMAXUB(Y?)rr",
- "(V?)PMAXUD(Y?)rr",
- "(V?)PMAXUW(Y?)rr",
- "(V?)PMINSB(Y?)rr",
- "(V?)PMINSD(Y?)rr",
- "(V?)PMINSW(Y?)rr",
- "(V?)PMINUB(Y?)rr",
- "(V?)PMINUD(Y?)rr",
- "(V?)PMINUW(Y?)rr",
- "(V?)PSIGNB(Y?)rr",
- "(V?)PSIGND(Y?)rr",
- "(V?)PSIGNW(Y?)rr",
- "(V?)PSLLD(Y?)ri",
+def: InstRW<[SKLWriteResGroup5], (instregex "(V?)PSLLD(Y?)ri",
"(V?)PSLLQ(Y?)ri",
"VPSLLVD(Y?)rr",
"VPSLLVQ(Y?)rr",
@@ -472,11 +443,7 @@
"(V?)PSRLQ(Y?)ri",
"VPSRLVD(Y?)rr",
"VPSRLVQ(Y?)rr",
- "(V?)PSRLW(Y?)ri",
- "(V?)PSUBSB(Y?)rr",
- "(V?)PSUBSW(Y?)rr",
- "(V?)PSUBUSB(Y?)rr",
- "(V?)PSUBUSW(Y?)rr")>;
+ "(V?)PSRLW(Y?)ri")>;
def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> {
let Latency = 1;
@@ -828,16 +795,6 @@
}
def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
-def SKLWriteResGroup38 : SchedWriteRes<[SKLPort5,SKLPort015]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SKLWriteResGroup38], (instregex "(V?)PHADDD(Y?)rr",
- "(V?)PHADDW(Y?)rr",
- "(V?)PHSUBD(Y?)rr",
- "(V?)PHSUBW(Y?)rr")>;
-
def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -1304,38 +1261,7 @@
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup90], (instregex "(V?)PABSBrm",
- "(V?)PABSDrm",
- "(V?)PABSWrm",
- "(V?)PADDSBrm",
- "(V?)PADDSWrm",
- "(V?)PADDUSBrm",
- "(V?)PADDUSWrm",
- "(V?)PAVGBrm",
- "(V?)PAVGWrm",
- "(V?)PCMPEQBrm",
- "(V?)PCMPEQDrm",
- "(V?)PCMPEQQrm",
- "(V?)PCMPEQWrm",
- "(V?)PCMPGTBrm",
- "(V?)PCMPGTDrm",
- "(V?)PCMPGTWrm",
- "(V?)PMAXSBrm",
- "(V?)PMAXSDrm",
- "(V?)PMAXSWrm",
- "(V?)PMAXUBrm",
- "(V?)PMAXUDrm",
- "(V?)PMAXUWrm",
- "(V?)PMINSBrm",
- "(V?)PMINSDrm",
- "(V?)PMINSWrm",
- "(V?)PMINUBrm",
- "(V?)PMINUDrm",
- "(V?)PMINUWrm",
- "(V?)PSIGNBrm",
- "(V?)PSIGNDrm",
- "(V?)PSIGNWrm",
- "(V?)PSLLDrm",
+def: InstRW<[SKLWriteResGroup90], (instregex "(V?)PSLLDrm",
"(V?)PSLLQrm",
"VPSLLVDrm",
"VPSLLVQrm",
@@ -1347,11 +1273,7 @@
"(V?)PSRLQrm",
"(V?)PSRLVDrm",
"VPSRLVQrm",
- "(V?)PSRLWrm",
- "(V?)PSUBSBrm",
- "(V?)PSUBSWrm",
- "(V?)PSUBUSBrm",
- "(V?)PSUBUSWrm")>;
+ "(V?)PSRLWrm")>;
def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 7;
@@ -1514,38 +1436,7 @@
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup109], (instregex "VPABSBYrm",
- "VPABSDYrm",
- "VPABSWYrm",
- "VPADDSBYrm",
- "VPADDSWYrm",
- "VPADDUSBYrm",
- "VPADDUSWYrm",
- "VPAVGBYrm",
- "VPAVGWYrm",
- "VPCMPEQBYrm",
- "VPCMPEQDYrm",
- "VPCMPEQQYrm",
- "VPCMPEQWYrm",
- "VPCMPGTBYrm",
- "VPCMPGTDYrm",
- "VPCMPGTWYrm",
- "VPMAXSBYrm",
- "VPMAXSDYrm",
- "VPMAXSWYrm",
- "VPMAXUBYrm",
- "VPMAXUDYrm",
- "VPMAXUWYrm",
- "VPMINSBYrm",
- "VPMINSDYrm",
- "VPMINSWYrm",
- "VPMINUBYrm",
- "VPMINUDYrm",
- "VPMINUWYrm",
- "VPSIGNBYrm",
- "VPSIGNDYrm",
- "VPSIGNWYrm",
- "VPSLLDYrm",
+def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLDYrm",
"VPSLLQYrm",
"VPSLLVDYrm",
"VPSLLVQYrm",
@@ -1557,11 +1448,7 @@
"VPSRLQYrm",
"VPSRLVDYrm",
"VPSRLVQYrm",
- "VPSRLWYrm",
- "VPSUBSBYrm",
- "VPSUBSWYrm",
- "VPSUBUSBYrm",
- "VPSUBUSWYrm")>;
+ "VPSRLWYrm")>;
def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> {
let Latency = 8;
@@ -1725,16 +1612,6 @@
def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKLWriteResGroup129 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKLWriteResGroup129], (instregex "(V?)PHADDDrm",
- "(V?)PHADDWrm",
- "(V?)PHSUBDrm",
- "(V?)PHSUBWrm")>;
-
def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -1807,16 +1684,6 @@
def: InstRW<[SKLWriteResGroup140], (instregex "VPHADDSWYrm",
"VPHSUBSWYrm")>;
-def SKLWriteResGroup141 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKLWriteResGroup141], (instregex "VPHADDDYrm",
- "VPHADDWYrm",
- "VPHSUBDYrm",
- "VPHSUBWYrm")>;
-
def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index f951309..b1bd040 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -195,7 +195,8 @@
def : WriteRes<WriteVecStore, [SKXPort237, SKXPort4]>;
def : WriteRes<WriteVecMove, [SKXPort015]>;
-defm : SKXWriteResPair<WriteVecALU, [SKXPort15], 1>; // Vector integer ALU op, no logicals.
+defm : SKXWriteResPair<WriteVecALU, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals.
+defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM).
defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM).
defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts.
@@ -373,7 +374,8 @@
defm : SKXWriteResPair<WriteFHAdd, [SKXPort5,SKXPort015], 6, [2,1], 3, 6>;
defm : SKXWriteResPair<WriteFHAddY, [SKXPort5,SKXPort015], 6, [2,1], 3, 7>;
-defm : SKXWriteResPair<WritePHAdd, [SKXPort15], 1>;
+defm : SKXWriteResPair<WritePHAdd, [SKXPort5,SKXPort015], 3, [2,1], 3, 6>;
+defm : SKXWriteResPair<WritePHAddY, [SKXPort5,SKXPort015], 3, [2,1], 3, 7>;
// Remaining instrs.
@@ -477,122 +479,7 @@
let NumMicroOps = 1;
let ResourceCycles = [1];
}
-def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBYrr",
- "VPABSBZ128rr",
- "VPABSBZ256rr",
- "VPABSBZrr",
- "(V?)PABSBrr",
- "VPABSDYrr",
- "VPABSDZ128rr",
- "VPABSDZ256rr",
- "VPABSDZrr",
- "(V?)PABSDrr",
- "VPABSQZ128rr",
- "VPABSQZ256rr",
- "VPABSQZrr",
- "VPABSWYrr",
- "VPABSWZ128rr",
- "VPABSWZ256rr",
- "VPABSWZrr",
- "(V?)PABSWrr",
- "VPADDSBYrr",
- "VPADDSBZ128rr",
- "VPADDSBZ256rr",
- "VPADDSBZrr",
- "(V?)PADDSBrr",
- "VPADDSWYrr",
- "VPADDSWZ128rr",
- "VPADDSWZ256rr",
- "VPADDSWZrr",
- "(V?)PADDSWrr",
- "VPADDUSBYrr",
- "VPADDUSBZ128rr",
- "VPADDUSBZ256rr",
- "VPADDUSBZrr",
- "(V?)PADDUSBrr",
- "VPADDUSWYrr",
- "VPADDUSWZ128rr",
- "VPADDUSWZ256rr",
- "VPADDUSWZrr",
- "(V?)PADDUSWrr",
- "VPAVGBYrr",
- "VPAVGBZ128rr",
- "VPAVGBZ256rr",
- "VPAVGBZrr",
- "(V?)PAVGBrr",
- "VPAVGWYrr",
- "VPAVGWZ128rr",
- "VPAVGWZ256rr",
- "VPAVGWZrr",
- "(V?)PAVGWrr",
- "(V?)PCMPEQB(Y?)rr",
- "(V?)PCMPEQD(Y?)rr",
- "(V?)PCMPEQQ(Y?)rr",
- "(V?)PCMPEQW(Y?)rr",
- "(V?)PCMPGTB(Y?)rr",
- "(V?)PCMPGTD(Y?)rr",
- "(V?)PCMPGTW(Y?)rr",
- "VPMAXSBYrr",
- "VPMAXSBZ128rr",
- "VPMAXSBZ256rr",
- "VPMAXSBZrr",
- "(V?)PMAXSBrr",
- "VPMAXSDYrr",
- "VPMAXSDZ128rr",
- "VPMAXSDZ256rr",
- "VPMAXSDZrr",
- "(V?)PMAXSDrr",
- "VPMAXSWYrr",
- "VPMAXSWZ128rr",
- "VPMAXSWZ256rr",
- "VPMAXSWZrr",
- "(V?)PMAXSWrr",
- "VPMAXUBYrr",
- "VPMAXUBZ128rr",
- "VPMAXUBZ256rr",
- "VPMAXUBZrr",
- "(V?)PMAXUBrr",
- "VPMAXUDYrr",
- "VPMAXUDZ128rr",
- "VPMAXUDZ256rr",
- "VPMAXUDZrr",
- "(V?)PMAXUDrr",
- "VPMAXUWYrr",
- "VPMAXUWZ128rr",
- "VPMAXUWZ256rr",
- "VPMAXUWZrr",
- "(V?)PMAXUWrr",
- "VPMINSBYrr",
- "VPMINSBZ128rr",
- "VPMINSBZ256rr",
- "VPMINSBZrr",
- "(V?)PMINSBrr",
- "VPMINSDYrr",
- "VPMINSDZ128rr",
- "VPMINSDZ256rr",
- "VPMINSDZrr",
- "(V?)PMINSDrr",
- "VPMINSWYrr",
- "VPMINSWZ128rr",
- "VPMINSWZ256rr",
- "VPMINSWZrr",
- "(V?)PMINSWrr",
- "VPMINUBYrr",
- "VPMINUBZ128rr",
- "VPMINUBZ256rr",
- "VPMINUBZrr",
- "(V?)PMINUBrr",
- "VPMINUDYrr",
- "VPMINUDZ128rr",
- "VPMINUDZ256rr",
- "VPMINUDZrr",
- "(V?)PMINUDrr",
- "VPMINUWYrr",
- "VPMINUWZ128rr",
- "VPMINUWZ256rr",
- "VPMINUWZrr",
- "(V?)PMINUWrr",
- "VPROLDZ128ri",
+def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZ128ri",
"VPROLDZ256ri",
"VPROLDZri",
"VPROLQZ128ri",
@@ -616,9 +503,6 @@
"VPRORVQZ128rr",
"VPRORVQZ256rr",
"VPRORVQZrr",
- "(V?)PSIGNB(Y?)rr",
- "(V?)PSIGND(Y?)rr",
- "(V?)PSIGNW(Y?)rr",
"(V?)PSLLDYri",
"VPSLLDZ128ri",
"VPSLLDZ256ri",
@@ -698,27 +582,7 @@
"VPSRLWZ128ri",
"VPSRLWZ256ri",
"VPSRLWZri",
- "(V?)PSRLWri",
- "VPSUBSBYrr",
- "VPSUBSBZ128rr",
- "VPSUBSBZ256rr",
- "VPSUBSBZrr",
- "(V?)PSUBSBrr",
- "VPSUBSWYrr",
- "VPSUBSWZ128rr",
- "VPSUBSWZ256rr",
- "VPSUBSWZrr",
- "(V?)PSUBSWrr",
- "VPSUBUSBYrr",
- "VPSUBUSBZ128rr",
- "VPSUBUSBZ256rr",
- "VPSUBUSBZrr",
- "(V?)PSUBUSBrr",
- "VPSUBUSWYrr",
- "VPSUBUSWZ128rr",
- "VPSUBUSWZ256rr",
- "VPSUBUSWZrr",
- "(V?)PSUBUSWrr")>;
+ "(V?)PSRLWri")>;
def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> {
let Latency = 1;
@@ -1384,13 +1248,6 @@
}
def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PH(ADD|SUB)(D|W)rr")>;
-def SKXWriteResGroup40 : SchedWriteRes<[SKXPort5,SKXPort015]> {
- let Latency = 3;
- let NumMicroOps = 3;
- let ResourceCycles = [2,1];
-}
-def: InstRW<[SKXWriteResGroup40], (instregex "(V?)PH(ADD|SUB)(D|W)(Y?)rr")>;
-
def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -2214,57 +2071,7 @@
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup94], (instregex "VPABSBZ128rm(b?)",
- "(V?)PABSBrm",
- "VPABSDZ128rm(b?)",
- "(V?)PABSDrm",
- "VPABSQZ128rm(b?)",
- "VPABSWZ128rm(b?)",
- "(V?)PABSWrm",
- "VPADDSBZ128rm(b?)",
- "(V?)PADDSBrm",
- "VPADDSWZ128rm(b?)",
- "(V?)PADDSWrm",
- "VPADDUSBZ128rm(b?)",
- "(V?)PADDUSBrm",
- "VPADDUSWZ128rm(b?)",
- "(V?)PADDUSWrm",
- "VPAVGBZ128rm(b?)",
- "(V?)PAVGBrm",
- "VPAVGWZ128rm(b?)",
- "(V?)PAVGWrm",
- "(V?)PCMPEQBrm",
- "(V?)PCMPEQDrm",
- "(V?)PCMPEQQrm",
- "(V?)PCMPEQWrm",
- "(V?)PCMPGTBrm",
- "(V?)PCMPGTDrm",
- "(V?)PCMPGTWrm",
- "VPMAXSBZ128rm(b?)",
- "(V?)PMAXSBrm",
- "VPMAXSDZ128rm(b?)",
- "(V?)PMAXSDrm",
- "VPMAXSWZ128rm(b?)",
- "(V?)PMAXSWrm",
- "VPMAXUBZ128rm(b?)",
- "(V?)PMAXUBrm",
- "VPMAXUDZ128rm(b?)",
- "(V?)PMAXUDrm",
- "VPMAXUWZ128rm(b?)",
- "(V?)PMAXUWrm",
- "VPMINSBZ128rm(b?)",
- "(V?)PMINSBrm",
- "VPMINSDZ128rm(b?)",
- "(V?)PMINSDrm",
- "VPMINSWZ128rm(b?)",
- "(V?)PMINSWrm",
- "VPMINUBZ128rm(b?)",
- "(V?)PMINUBrm",
- "VPMINUDZ128rm(b?)",
- "(V?)PMINUDrm",
- "VPMINUWZ128rm(b?)",
- "(V?)PMINUWrm",
- "VPROLDZ128m(b?)i",
+def: InstRW<[SKXWriteResGroup94], (instregex "VPROLDZ128m(b?)i",
"VPROLQZ128m(b?)i",
"VPROLVDZ128rm(b?)",
"VPROLVQZ128rm(b?)",
@@ -2272,9 +2079,6 @@
"VPRORQZ128m(b?)i",
"VPRORVDZ128rm(b?)",
"VPRORVQZ128rm(b?)",
- "(V?)PSIGNBrm",
- "(V?)PSIGNDrm",
- "(V?)PSIGNWrm",
"VPSLLDZ128m(b?)i",
"VPSLLDZ128rm(b?)",
"(V?)PSLLDrm",
@@ -2314,15 +2118,7 @@
"VPSRLVWZ128rm(b?)",
"VPSRLWZ128mi(b?)",
"VPSRLWZ128rm(b?)",
- "(V?)PSRLWrm",
- "VPSUBSBZ128rm(b?)",
- "(V?)PSUBSBrm",
- "VPSUBSWZ128rm(b?)",
- "(V?)PSUBSWrm",
- "VPSUBUSBZ128rm(b?)",
- "(V?)PSUBUSBrm",
- "VPSUBUSWZ128rm(b?)",
- "(V?)PSUBUSWrm")>;
+ "(V?)PSRLWrm")>;
def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 7;
@@ -2615,79 +2411,7 @@
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBYrm",
- "VPABSBZ256rm(b?)",
- "VPABSBZrm(b?)",
- "VPABSDYrm",
- "VPABSDZ256rm(b?)",
- "VPABSDZrm(b?)",
- "VPABSQZ256rm(b?)",
- "VPABSQZrm(b?)",
- "VPABSWYrm",
- "VPABSWZ256rm(b?)",
- "VPABSWZrm(b?)",
- "VPADDSBYrm",
- "VPADDSBZ256rm(b?)",
- "VPADDSBZrm(b?)",
- "VPADDSWYrm",
- "VPADDSWZ256rm(b?)",
- "VPADDSWZrm(b?)",
- "VPADDUSBYrm",
- "VPADDUSBZ256rm(b?)",
- "VPADDUSBZrm(b?)",
- "VPADDUSWYrm",
- "VPADDUSWZ256rm(b?)",
- "VPADDUSWZrm(b?)",
- "VPAVGBYrm",
- "VPAVGBZ256rm(b?)",
- "VPAVGBZrm(b?)",
- "VPAVGWYrm",
- "VPAVGWZ256rm(b?)",
- "VPAVGWZrm(b?)",
- "VPCMPEQBYrm",
- "VPCMPEQDYrm",
- "VPCMPEQQYrm",
- "VPCMPEQWYrm",
- "VPCMPGTBYrm",
- "VPCMPGTDYrm",
- "VPCMPGTWYrm",
- "VPMAXSBYrm",
- "VPMAXSBZ256rm(b?)",
- "VPMAXSBZrm(b?)",
- "VPMAXSDYrm",
- "VPMAXSDZ256rm(b?)",
- "VPMAXSDZrm(b?)",
- "VPMAXSWYrm",
- "VPMAXSWZ256rm(b?)",
- "VPMAXSWZrm(b?)",
- "VPMAXUBYrm",
- "VPMAXUBZ256rm(b?)",
- "VPMAXUBZrm(b?)",
- "VPMAXUDYrm",
- "VPMAXUDZ256rm(b?)",
- "VPMAXUDZrm(b?)",
- "VPMAXUWYrm",
- "VPMAXUWZ256rm(b?)",
- "VPMAXUWZrm(b?)",
- "VPMINSBYrm",
- "VPMINSBZ256rm(b?)",
- "VPMINSBZrm(b?)",
- "VPMINSDYrm",
- "VPMINSDZ256rm(b?)",
- "VPMINSDZrm(b?)",
- "VPMINSWYrm",
- "VPMINSWZ256rm(b?)",
- "VPMINSWZrm(b?)",
- "VPMINUBYrm",
- "VPMINUBZ256rm(b?)",
- "VPMINUBZrm(b?)",
- "VPMINUDYrm",
- "VPMINUDZ256rm(b?)",
- "VPMINUDZrm(b?)",
- "VPMINUWYrm",
- "VPMINUWZ256rm(b?)",
- "VPMINUWZrm(b?)",
- "VPROLDZ256m(b?)i",
+def: InstRW<[SKXWriteResGroup120], (instregex "VPROLDZ256m(b?)i",
"VPROLDZm(b?)i",
"VPROLQZ256m(b?)i",
"VPROLQZm(b?)i",
@@ -2703,9 +2427,6 @@
"VPRORVDZrm(b?)",
"VPRORVQZ256rm(b?)",
"VPRORVQZrm(b?)",
- "VPSIGNBYrm",
- "VPSIGNDYrm",
- "VPSIGNWYrm",
"VPSLLDYrm",
"VPSLLDZ256m(b?)i",
"VPSLLDZ256rm(b?)",
@@ -2772,19 +2493,7 @@
"VPSRLWZ256mi(b?)",
"VPSRLWZ256rm(b?)",
"VPSRLWZmi(b?)",
- "VPSRLWZrm(b?)",
- "VPSUBSBYrm",
- "VPSUBSBZ256rm(b?)",
- "VPSUBSBZrm(b?)",
- "VPSUBSWYrm",
- "VPSUBSWZ256rm(b?)",
- "VPSUBSWZrm(b?)",
- "VPSUBUSBYrm",
- "VPSUBUSBZ256rm(b?)",
- "VPSUBUSBZrm(b?)",
- "VPSUBUSWYrm",
- "VPSUBUSWZ256rm(b?)",
- "VPSUBUSWZrm(b?)")>;
+ "VPSRLWZrm(b?)")>;
def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
let Latency = 8;
@@ -3138,16 +2847,6 @@
def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm",
"(V?)PHSUBSWrm")>;
-def SKXWriteResGroup144 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
- let Latency = 9;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKXWriteResGroup144], (instregex "(V?)PHADDDrm",
- "(V?)PHADDWrm",
- "(V?)PHSUBDrm",
- "(V?)PHSUBWrm")>;
-
def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
@@ -3315,16 +3014,6 @@
def: InstRW<[SKXWriteResGroup154], (instregex "VPHADDSWYrm",
"VPHSUBSWYrm")>;
-def SKXWriteResGroup155 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> {
- let Latency = 10;
- let NumMicroOps = 4;
- let ResourceCycles = [2,1,1];
-}
-def: InstRW<[SKXWriteResGroup155], (instregex "VPHADDDYrm",
- "VPHADDWYrm",
- "VPHSUBDYrm",
- "VPHSUBWYrm")>;
-
def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> {
let Latency = 9;
let NumMicroOps = 4;
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index b064e63..7e984a5 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -127,12 +127,14 @@
defm WriteFHAdd : X86SchedWritePair;
defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM.
defm WritePHAdd : X86SchedWritePair;
+defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
// Vector integer operations.
def WriteVecLoad : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecMove : SchedWrite;
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM).
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
@@ -249,9 +251,9 @@
WriteFVarBlendY, WriteFVarBlendY>;
def SchedWriteVecALU
- : X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALU, WriteVecALU>;
+ : X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALUY, WriteVecALUY>;
def SchedWritePHAdd
- : X86SchedWriteWidths<WritePHAdd, WritePHAdd, WritePHAdd, WritePHAdd>;
+ : X86SchedWriteWidths<WritePHAdd, WritePHAdd, WritePHAddY, WritePHAddY>;
def SchedWriteVecLogic
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogic,
WriteVecLogicY, WriteVecLogicY>;
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 1e4fcf5..26bad49 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -252,6 +252,7 @@
def : WriteRes<WriteVecMove, [AtomPort01]>;
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecALUY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
@@ -316,6 +317,7 @@
defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 4c0d6dc..c35a53e 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -404,6 +404,7 @@
def : WriteRes<WriteVecMove, [JFPU01, JVALU]>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecALUY, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>;
@@ -468,6 +469,7 @@
defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WritePHAddY, [JFPU01, JVALU], 1>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index ab21fda..6e7a010 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -166,6 +166,7 @@
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
// FIXME: The below is closer to correct, but caused some perf regressions.
@@ -201,6 +202,7 @@
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 451a3b4..2f4a3ef 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -238,7 +238,9 @@
defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WritePHAddY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
+defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>;
defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME