[X86] Add NT load/store scheduler classes
llvm-svn: 332274
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index cb995af..d8991fa 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4373,11 +4373,11 @@
}
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
- SchedWriteVecMoveLS>, PD;
+ SchedWriteVecMoveLSNT>, PD;
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
- SchedWriteFMoveLS>, PD, VEX_W;
+ SchedWriteFMoveLSNT>, PD, VEX_W;
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
- SchedWriteFMoveLS>, PS;
+ SchedWriteFMoveLSNT>, PS;
let Predicates = [HasAVX512], AddedComplexity = 400 in {
def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index 57a5bee..c716bbc 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -272,7 +272,7 @@
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
- Sched<[SchedWriteVecMoveLS.MMX.MR]>;
+ Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
let Predicates = [HasMMX] in {
let AddedComplexity = 15 in
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 0731bdb..cc615e4 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3017,7 +3017,7 @@
let AddedComplexity = 400 in { // Prefer non-temporal versions
let Predicates = [HasAVX, NoVLX] in {
-let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
+let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3030,7 +3030,7 @@
addr:$dst)]>, VEX, VEX_WIG;
} // SchedRW
-let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
+let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3049,17 +3049,17 @@
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src),
addr:$dst)]>, VEX, VEX_WIG,
- Sched<[SchedWriteVecMoveLS.XMM.MR]>;
+ Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)]>, VEX, VEX_L, VEX_WIG,
- Sched<[SchedWriteVecMoveLS.YMM.MR]>;
+ Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
} // ExeDomain
} // Predicates
-let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
+let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -3068,12 +3068,12 @@
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
} // SchedRW
-let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in
+let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
-let SchedRW = [WriteStore] in {
+let SchedRW = [WriteStoreNT] in {
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
@@ -6409,14 +6409,14 @@
let Predicates = [HasAVX, NoVLX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>;
+ Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v8f32 (alignednontemporalload addr:$src)),
@@ -6935,7 +6935,7 @@
// Non-temporal (unaligned) scalar stores.
let AddedComplexity = 400 in { // Prefer non-temporal versions
-let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in {
+let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
"movntss\t{$src, $dst|$dst, $src}", []>, XS;
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 7efc3a2..3af49f6 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -147,9 +147,10 @@
defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
-def : WriteRes<WriteLoad, [BWPort23]> { let Latency = 5; }
-def : WriteRes<WriteStore, [BWPort237, BWPort4]>;
-def : WriteRes<WriteMove, [BWPort0156]>;
+defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1,1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
@@ -171,6 +172,9 @@
defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
@@ -266,11 +270,15 @@
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [BWPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [BWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
@@ -578,8 +586,6 @@
}
def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 0a64f2a..7eba246 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -107,10 +107,11 @@
// Store_addr on 237.
// Store_data on 4.
-def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
-def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 5; }
-def : WriteRes<WriteMove, [HWPort0156]>;
-def : WriteRes<WriteZero, []>;
+defm : X86WriteRes<WriteStore, [HWPort237, HWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [HWPort237, HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
+def : WriteRes<WriteZero, []>;
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
@@ -161,6 +162,9 @@
defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
@@ -257,11 +261,15 @@
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [HWPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [HWPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [HWPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
@@ -754,8 +762,6 @@
}
def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index be380ff..953d0c2 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -96,10 +96,11 @@
// 2/3 cycle to recompute the address.
def : WriteRes<WriteRMW, [SBPort23,SBPort4]>;
-def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
-def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
-def : WriteRes<WriteMove, [SBPort015]>;
-def : WriteRes<WriteZero, []>;
+def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
+def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
+def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
+def : WriteRes<WriteMove, [SBPort015]>;
+def : WriteRes<WriteZero, []>;
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
@@ -153,6 +154,9 @@
defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
@@ -237,11 +241,15 @@
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SBPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SBPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [SBPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [SBPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index e501993..a986d88 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -145,9 +145,10 @@
defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
-def : WriteRes<WriteLoad, [SKLPort23]> { let Latency = 5; }
-def : WriteRes<WriteStore, [SKLPort237, SKLPort4]>;
-def : WriteRes<WriteMove, [SKLPort0156]>;
+defm : X86WriteRes<WriteLoad, [SKLPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [SKLPort237, SKLPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [SKLPort237, SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [SKLPort0156], 1, [1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
@@ -166,6 +167,9 @@
defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
@@ -258,11 +262,15 @@
defm : X86WriteRes<WriteVecLoad, [SKLPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SKLPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SKLPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [SKLPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [SKLPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
@@ -580,8 +588,6 @@
}
def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 4bd95ea..01bbb03 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -145,9 +145,10 @@
defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
-def : WriteRes<WriteLoad, [SKXPort23]> { let Latency = 5; }
-def : WriteRes<WriteStore, [SKXPort237, SKXPort4]>;
-def : WriteRes<WriteMove, [SKXPort0156]>;
+defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
@@ -166,6 +167,9 @@
defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
@@ -258,11 +262,15 @@
defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [SKXPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
@@ -605,8 +613,6 @@
def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm",
"KMOV(B|D|Q|W)mk",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"VMOV(H|L)(PD|PS)Z128mr(b?)",
"(V?)MOV(H|L)(PD|PS)mr",
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index a0a9a91..6bbf456 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -90,9 +90,10 @@
}
// Loads, stores, and moves, not folded with other operations.
-def WriteLoad : SchedWrite;
-def WriteStore : SchedWrite;
-def WriteMove : SchedWrite;
+def WriteLoad : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteStoreNT : SchedWrite;
+def WriteMove : SchedWrite;
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
@@ -145,6 +146,9 @@
def WriteFStore : SchedWrite;
def WriteFStoreX : SchedWrite;
def WriteFStoreY : SchedWrite;
+def WriteFStoreNT : SchedWrite;
+def WriteFStoreNTX : SchedWrite;
+def WriteFStoreNTY : SchedWrite;
def WriteFMaskedStore : SchedWrite;
def WriteFMaskedStoreY : SchedWrite;
def WriteFMove : SchedWrite;
@@ -229,11 +233,15 @@
def WriteVecLoad : SchedWrite;
def WriteVecLoadX : SchedWrite;
def WriteVecLoadY : SchedWrite;
+def WriteVecLoadNT : SchedWrite;
+def WriteVecLoadNTY : SchedWrite;
def WriteVecMaskedLoad : SchedWrite;
def WriteVecMaskedLoadY : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecStoreX : SchedWrite;
def WriteVecStoreY : SchedWrite;
+def WriteVecStoreNT : SchedWrite;
+def WriteVecStoreNTY : SchedWrite;
def WriteVecMaskedStore : SchedWrite;
def WriteVecMaskedStoreY : SchedWrite;
def WriteVecMove : SchedWrite;
@@ -352,6 +360,16 @@
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
WriteFMoveLSY, WriteFMoveLSY>;
+def WriteFMoveLSNT
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
+def WriteFMoveLSNTX
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
+def WriteFMoveLSNTY
+ : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
+def SchedWriteFMoveLSNT
+ : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
+ WriteFMoveLSNTY, WriteFMoveLSNTY>;
+
def WriteVecMoveLS
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
def WriteVecMoveLSX
@@ -362,6 +380,16 @@
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
WriteVecMoveLSY, WriteVecMoveLSY>;
+def WriteVecMoveLSNT
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTX
+ : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTY
+ : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
+def SchedWriteVecMoveLSNT
+ : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
+ WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
+
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>;
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index b69a628..32e84a1 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -144,9 +144,10 @@
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLoad, [AtomPort0]>;
-def : WriteRes<WriteStore, [AtomPort0]>;
-def : WriteRes<WriteMove, [AtomPort01]>;
+def : WriteRes<WriteLoad, [AtomPort0]>;
+def : WriteRes<WriteStore, [AtomPort0]>;
+def : WriteRes<WriteStoreNT, [AtomPort0]>;
+def : WriteRes<WriteMove, [AtomPort01]>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
@@ -190,6 +191,9 @@
def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFStoreX, [AtomPort0]>;
def : WriteRes<WriteFStoreY, [AtomPort0]>;
+def : WriteRes<WriteFStoreNT, [AtomPort0]>;
+def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
+def : WriteRes<WriteFStoreNTY, [AtomPort0]>;
def : WriteRes<WriteFMaskedStore, [AtomPort0]>;
def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>;
@@ -281,12 +285,16 @@
def : WriteRes<WriteVecLoad, [AtomPort0]>;
def : WriteRes<WriteVecLoadX, [AtomPort0]>;
def : WriteRes<WriteVecLoadY, [AtomPort0]>;
+def : WriteRes<WriteVecLoadNT, [AtomPort0]>;
+def : WriteRes<WriteVecLoadNTY, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>;
def : WriteRes<WriteVecStore, [AtomPort0]>;
def : WriteRes<WriteVecStoreX, [AtomPort0]>;
def : WriteRes<WriteVecStoreY, [AtomPort0]>;
+def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
+def : WriteRes<WriteVecStoreNTY, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>;
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 9659745..aa0803f 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -226,9 +226,10 @@
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
-def : WriteRes<WriteStore, [JSAGU]>;
-def : WriteRes<WriteMove, [JALU01]>;
+def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteStore, [JSAGU]>;
+def : WriteRes<WriteStoreNT, [JSAGU]>;
+def : WriteRes<WriteMove, [JALU01]>;
// Load/store MXCSR.
// FIXME: These are copy and pasted from WriteLoad/Store.
@@ -277,6 +278,9 @@
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
@@ -407,12 +411,16 @@
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
@@ -572,28 +580,6 @@
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
VCVTPS2DQYrm, VCVTTPS2DQYrm)>;
-def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 2;
-}
-def : InstRW<[JWriteVMOVNTDQSt], (instrs MMX_MOVNTQmr, MOVNTDQmr, VMOVNTDQmr)>;
-
-def JWriteVMOVNTDQYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 2;
- let ResourceCycles = [2, 2, 2];
-}
-def : InstRW<[JWriteVMOVNTDQYSt], (instrs VMOVNTDQYmr)>;
-
-def JWriteMOVNTSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 3;
-}
-def : InstRW<[JWriteMOVNTSt], (instrs MOVNTPDmr, MOVNTPSmr, MOVNTSD, MOVNTSS, VMOVNTPDmr, VMOVNTPSmr)>;
-
-def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 3;
- let ResourceCycles = [2, 2, 2];
-}
-def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTPDYmr, VMOVNTPSYmr)>;
-
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 4];
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index ab39b24..cec2768 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -79,10 +79,11 @@
// need an extra port cycle to recompute the address.
def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
-def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
-def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
-def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
-def : WriteRes<WriteZero, []>;
+def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
+def : WriteRes<WriteZero, []>;
// Load/store MXCSR.
// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load.
@@ -141,6 +142,9 @@
def : WriteRes<WriteFStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
@@ -214,11 +218,15 @@
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteVecLoadNT, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteVecLoadNTY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 0e68fab..ecf5ea6 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -143,9 +143,10 @@
// operation in codegen
def : WriteRes<WriteRMW, [ZnAGU]>;
-def : WriteRes<WriteStore, [ZnAGU]>;
-def : WriteRes<WriteMove, [ZnALU]>;
-def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
+def : WriteRes<WriteStore, [ZnAGU]>;
+def : WriteRes<WriteStoreNT, [ZnAGU]>;
+def : WriteRes<WriteMove, [ZnALU]>;
+def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
def : WriteRes<WriteZero, []>;
def : WriteRes<WriteLEA, [ZnALU]>;
@@ -197,6 +198,9 @@
defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
@@ -274,11 +278,15 @@
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
@@ -1344,12 +1352,6 @@
}
def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
-// MOVNTSS/MOVNTSD
-def ZnWriteMOVNT: SchedWriteRes<[ZnAGU,ZnFPU2]> {
- let Latency = 8;
-}
-def : InstRW<[ZnWriteMOVNT], (instregex "MOVNTS(S|D)")>;
-
//-- SHA instructions --//
// SHA256MSG2
def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;