[X86] Cleanup WriteFStore/WriteVecStore schedules
MOVNTPD/MOVNTPS should be WriteFStore
Standardized BDW/HSW/SKL/SKX WriteFStore/WriteVecStore - fixes some missed instregex patterns. (V)MASKMOVDQU was already using the default, its costs gets increased but is still nowhere near the real cost of that nasty instruction....
llvm-svn: 331864
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index b52ca8b..473bea5 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3020,7 +3020,7 @@
} // ExeDomain, SchedRW
} // Predicates
-let SchedRW = [WriteVecStore] in {
+let SchedRW = [WriteFStore] in {
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index d97efee..3fa70f1 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -164,7 +164,7 @@
defm : X86WriteRes<WriteFLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
defm : X86WriteRes<WriteFMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
-defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
@@ -258,7 +258,7 @@
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
-defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
@@ -566,26 +566,15 @@
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP(32|64|80)m",
- "VEXTRACTF128mr",
- "VEXTRACTI128mr",
- "(V?)MOVAPD(Y?)mr",
- "(V?)MOVAPS(Y?)mr",
- "(V?)MOVDQA(Y?)mr",
- "(V?)MOVDQU(Y?)mr",
"(V?)MOVHPDmr",
"(V?)MOVHPSmr",
"(V?)MOVLPDmr",
"(V?)MOVLPSmr",
- "(V?)MOVNTDQ(V?)mr",
- "(V?)MOVNTPD(V?)mr",
- "(V?)MOVNTPS(V?)mr",
"(V?)MOVPDI2DImr",
"(V?)MOVPQI2QImr",
"(V?)MOVPQIto64mr",
"(V?)MOVSDmr",
- "(V?)MOVSSmr",
- "(V?)MOVUPD(Y?)mr",
- "(V?)MOVUPS(Y?)mr")>;
+ "(V?)MOVSSmr")>;
def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
let Latency = 2;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 5a612d1..aef283a 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -155,7 +155,7 @@
defm : X86WriteRes<WriteFLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteFMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
-defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
@@ -250,7 +250,7 @@
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
-defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
@@ -759,26 +759,15 @@
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP(32|64|80)m",
- "VEXTRACTF128mr",
- "VEXTRACTI128mr",
- "(V?)MOVAPD(Y?)mr",
- "(V?)MOVAPS(V?)mr",
- "(V?)MOVDQA(Y?)mr",
- "(V?)MOVDQU(Y?)mr",
"(V?)MOVHPDmr",
"(V?)MOVHPSmr",
"(V?)MOVLPDmr",
"(V?)MOVLPSmr",
- "(V?)MOVNTDQ(Y?)mr",
- "(V?)MOVNTPD(Y?)mr",
- "(V?)MOVNTPS(Y?)mr",
"(V?)MOVPDI2DImr",
"(V?)MOVPQI2QImr",
"(V?)MOVPQIto64mr",
"(V?)MOVSDmr",
"(V?)MOVSSmr",
- "(V?)MOVUPD(Y?)mr",
- "(V?)MOVUPS(Y?)mr",
"VMPTRSTm")>;
def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> {
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index fa145d9..84b0055 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -160,7 +160,7 @@
defm : X86WriteRes<WriteFLoad, [SKLPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
@@ -251,7 +251,7 @@
defm : X86WriteRes<WriteVecLoad, [SKLPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
@@ -596,26 +596,15 @@
"MOVNTI_64mr",
"MOVNTImr",
"ST_FP(32|64|80)m",
- "VEXTRACTF128mr",
- "VEXTRACTI128mr",
- "(V?)MOVAPDYmr",
- "(V?)MOVAPS(Y?)mr",
- "(V?)MOVDQA(Y?)mr",
- "(V?)MOVDQU(Y?)mr",
"(V?)MOVHPDmr",
"(V?)MOVHPSmr",
"(V?)MOVLPDmr",
"(V?)MOVLPSmr",
- "(V?)MOVNTDQ(Y?)mr",
- "(V?)MOVNTPD(Y?)mr",
- "(V?)MOVNTPS(Y?)mr",
"(V?)MOVPDI2DImr",
"(V?)MOVPQI2QImr",
"(V?)MOVPQIto64mr",
"(V?)MOVSDmr",
"(V?)MOVSSmr",
- "(V?)MOVUPD(Y?)mr",
- "(V?)MOVUPS(Y?)mr",
"VMPTRSTm")>;
def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> {
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 0699937..76755c6 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -160,7 +160,7 @@
defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
@@ -251,7 +251,7 @@
defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
@@ -701,10 +701,6 @@
"MMX_MOVD64mr",
"MMX_MOVNTQmr",
"MMX_MOVQ64mr",
- "MOVAPDmr",
- "MOVAPSmr",
- "MOVDQAmr",
- "MOVDQUmr",
"MOVHPDmr",
"MOVHPSmr",
"MOVLPDmr",
@@ -712,63 +708,14 @@
"MOVNTDQmr",
"MOVNTI_64mr",
"MOVNTImr",
- "MOVNTPDmr",
- "MOVNTPSmr",
"MOVPDI2DImr",
"MOVPQI2QImr",
"MOVPQIto64mr",
"MOVSDmr",
"MOVSSmr",
- "MOVUPDmr",
- "MOVUPSmr",
"ST_FP32m",
"ST_FP64m",
"ST_FP80m",
- "VEXTRACTF128mr",
- "VEXTRACTF32x4Z256mr(b?)",
- "VEXTRACTF32x4Zmr(b?)",
- "VEXTRACTF32x8Zmr(b?)",
- "VEXTRACTF64x2Z256mr(b?)",
- "VEXTRACTF64x2Zmr(b?)",
- "VEXTRACTF64x4Zmr(b?)",
- "VEXTRACTI128mr",
- "VEXTRACTI32x4Z256mr(b?)",
- "VEXTRACTI32x4Zmr(b?)",
- "VEXTRACTI32x8Zmr(b?)",
- "VEXTRACTI64x2Z256mr(b?)",
- "VEXTRACTI64x2Zmr(b?)",
- "VEXTRACTI64x4Zmr(b?)",
- "VMOVAPDYmr",
- "VMOVAPDZ128mr(b?)",
- "VMOVAPDZ256mr(b?)",
- "VMOVAPDZmr(b?)",
- "VMOVAPDmr",
- "VMOVAPSYmr",
- "VMOVAPSZ128mr(b?)",
- "VMOVAPSZ256mr(b?)",
- "VMOVAPSZmr(b?)",
- "VMOVAPSmr",
- "VMOVDQA32Z128mr(b?)",
- "VMOVDQA32Z256mr(b?)",
- "VMOVDQA32Zmr(b?)",
- "VMOVDQA64Z128mr(b?)",
- "VMOVDQA64Z256mr(b?)",
- "VMOVDQA64Zmr(b?)",
- "VMOVDQAYmr",
- "VMOVDQAmr",
- "VMOVDQU16Z128mr(b?)",
- "VMOVDQU16Z256mr(b?)",
- "VMOVDQU16Zmr(b?)",
- "VMOVDQU32Z128mr(b?)",
- "VMOVDQU32Z256mr(b?)",
- "VMOVDQU32Zmr(b?)",
- "VMOVDQU64Z128mr(b?)",
- "VMOVDQU64Z256mr(b?)",
- "VMOVDQU64Zmr(b?)",
- "VMOVDQU8Z128mr(b?)",
- "VMOVDQU8Z256mr(b?)",
- "VMOVDQUYmr",
- "VMOVDQUmr",
"VMOVHPDZ128mr(b?)",
"VMOVHPDmr",
"VMOVHPSZ128mr(b?)",
@@ -777,21 +724,6 @@
"VMOVLPDmr",
"VMOVLPSZ128mr(b?)",
"VMOVLPSmr",
- "VMOVNTDQYmr",
- "VMOVNTDQZ128mr(b?)",
- "VMOVNTDQZ256mr(b?)",
- "VMOVNTDQZmr(b?)",
- "VMOVNTDQmr",
- "VMOVNTPDYmr",
- "VMOVNTPDZ128mr(b?)",
- "VMOVNTPDZ256mr(b?)",
- "VMOVNTPDZmr(b?)",
- "VMOVNTPDmr",
- "VMOVNTPSYmr",
- "VMOVNTPSZ128mr(b?)",
- "VMOVNTPSZ256mr(b?)",
- "VMOVNTPSZmr(b?)",
- "VMOVNTPSmr",
"VMOVPDI2DIZmr(b?)",
"VMOVPDI2DImr",
"VMOVPQI(2QI|to64)Zmr(b?)",
@@ -801,16 +733,6 @@
"VMOVSDmr",
"VMOVSSZmr(b?)",
"VMOVSSmr",
- "VMOVUPDYmr",
- "VMOVUPDZ128mr(b?)",
- "VMOVUPDZ256mr(b?)",
- "VMOVUPDZmr(b?)",
- "VMOVUPDmr",
- "VMOVUPSYmr",
- "VMOVUPSZ128mr(b?)",
- "VMOVUPSZ256mr(b?)",
- "VMOVUPSZmr(b?)",
- "VMOVUPSmr",
"VMPTRSTm")>;
def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> {
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index ce99b58..3572cc1 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -128,9 +128,9 @@
; CHECK-NEXT: movzbl (%rax), %eax ## encoding: [0x0f,0xb6,0x00]
; CHECK-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
-; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
; CHECK-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x44,0x24,0xd8]
-; CHECK-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x4c,0x24,0xe8]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x44,0x24,0xe8]
; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]
entry:
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 459bce3..5336995 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1207,7 +1207,7 @@
# CHECK-NEXT: 1 5 0.50 * vlddqu (%rax), %xmm2
# CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %ymm2
# CHECK-NEXT: 3 7 1.00 * * * vldmxcsr (%rax)
-# CHECK-NEXT: 1 1 1.00 * * * vmaskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 2 1 1.00 * * * vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 3 7 2.00 * vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 3 8 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2
# CHECK-NEXT: 4 5 1.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
@@ -1287,13 +1287,13 @@
# CHECK-NEXT: 1 3 1.00 vmovmskps %xmm0, %ecx
# CHECK-NEXT: 1 3 1.00 vmovmskps %ymm0, %ecx
# CHECK-NEXT: 2 1 1.00 * vmovntdq %xmm0, (%rax)
-# CHECK-NEXT: 1 1 1.00 * vmovntdq %ymm0, (%rax)
+# CHECK-NEXT: 2 1 1.00 * vmovntdq %ymm0, (%rax)
# CHECK-NEXT: 1 5 0.50 * vmovntdqa (%rax), %xmm2
# CHECK-NEXT: 1 6 0.50 * vmovntdqa (%rax), %ymm2
# CHECK-NEXT: 2 1 1.00 * vmovntpd %xmm0, (%rax)
-# CHECK-NEXT: 1 1 1.00 * vmovntpd %ymm0, (%rax)
+# CHECK-NEXT: 2 1 1.00 * vmovntpd %ymm0, (%rax)
# CHECK-NEXT: 2 1 1.00 * vmovntps %xmm0, (%rax)
-# CHECK-NEXT: 1 1 1.00 * vmovntps %ymm0, (%rax)
+# CHECK-NEXT: 2 1 1.00 * vmovntps %ymm0, (%rax)
# CHECK-NEXT: 1 1 0.33 vmovq %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 vmovq %rax, %xmm2
# CHECK-NEXT: 1 5 0.50 * vmovq (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
index a9eb62c..2342e99 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
@@ -457,7 +457,7 @@
# CHECK-NEXT: 1 14 4.00 divsd %xmm0, %xmm2
# CHECK-NEXT: 2 19 8.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 2 2 0.50 * * * lfence
-# CHECK-NEXT: 1 1 1.00 * * * maskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 2 1 1.00 * * * maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 3 1.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
index 07884ce..bf2b5ea 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
@@ -1207,7 +1207,7 @@
# CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * vlddqu (%rax), %ymm2
# CHECK-NEXT: 3 7 1.00 * * * vldmxcsr (%rax)
-# CHECK-NEXT: 1 1 1.00 * * * vmaskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 2 1 1.00 * * * vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 3 8 2.00 * vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 3 9 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2
# CHECK-NEXT: 4 5 1.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
@@ -1250,7 +1250,7 @@
# CHECK-NEXT: 2 1 1.00 * vmovaps %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * vmovaps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 vmovaps %ymm0, %ymm2
-# CHECK-NEXT: 1 1 1.00 * vmovaps %ymm0, (%rax)
+# CHECK-NEXT: 2 1 1.00 * vmovaps %ymm0, (%rax)
# CHECK-NEXT: 1 7 0.50 * vmovaps (%rax), %ymm2
# CHECK-NEXT: 1 1 1.00 vmovd %eax, %xmm2
# CHECK-NEXT: 1 5 0.50 * vmovd (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s
index a190193..883690b 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s
@@ -457,7 +457,7 @@
# CHECK-NEXT: 1 20 14.00 divsd %xmm0, %xmm2
# CHECK-NEXT: 2 25 14.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 2 2 0.50 * * * lfence
-# CHECK-NEXT: 1 1 1.00 * * * maskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 2 1 1.00 * * * maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 3 1.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index 42685ba..9b0e41d 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1207,7 +1207,7 @@
# CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * vlddqu (%rax), %ymm2
# CHECK-NEXT: 3 7 1.00 * * * vldmxcsr (%rax)
-# CHECK-NEXT: 1 1 1.00 * * * vmaskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 2 1 1.00 * * * vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 2 7 0.50 * vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 8 0.50 * vmaskmovpd (%rax), %ymm0, %ymm2
# CHECK-NEXT: 2 2 1.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
@@ -1241,7 +1241,7 @@
# CHECK-NEXT: 1 4 0.50 vminss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 0.50 * vminss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vmovapd %xmm0, %xmm2
-# CHECK-NEXT: 1 1 1.00 * vmovapd %xmm0, (%rax)
+# CHECK-NEXT: 2 1 1.00 * vmovapd %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * vmovapd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 vmovapd %ymm0, %ymm2
# CHECK-NEXT: 2 1 1.00 * vmovapd %ymm0, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
index a0a06f2..060c4e1 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
@@ -457,7 +457,7 @@
# CHECK-NEXT: 1 14 3.00 divsd %xmm0, %xmm2
# CHECK-NEXT: 2 19 4.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 2 2 0.50 * * * lfence
-# CHECK-NEXT: 1 1 1.00 * * * maskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 2 1 1.00 * * * maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 4 0.50 maxpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.50 maxsd %xmm0, %xmm2
@@ -467,7 +467,7 @@
# CHECK-NEXT: 1 4 0.50 minsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 0.50 * minsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movapd %xmm0, %xmm2
-# CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax)
+# CHECK-NEXT: 2 1 1.00 * movapd %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movapd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movd %eax, %xmm2
# CHECK-NEXT: 1 5 0.50 * movd (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
index fea3250..c30c862 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
@@ -1207,7 +1207,7 @@
# CHECK-NEXT: 1 6 0.50 * vlddqu (%rax), %xmm2
# CHECK-NEXT: 1 7 0.50 * vlddqu (%rax), %ymm2
# CHECK-NEXT: 3 7 1.00 * * * vldmxcsr (%rax)
-# CHECK-NEXT: 1 1 1.00 * * * vmaskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 2 1 1.00 * * * vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 2 7 0.50 * vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 8 0.50 * vmaskmovpd (%rax), %ymm0, %ymm2
# CHECK-NEXT: 2 2 1.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s
index 281cf14..dd7e8fd 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s
@@ -457,7 +457,7 @@
# CHECK-NEXT: 1 14 3.00 divsd %xmm0, %xmm2
# CHECK-NEXT: 2 19 4.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 2 2 0.50 * * * lfence
-# CHECK-NEXT: 1 1 1.00 * * * maskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 2 1 1.00 * * * maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 4 0.33 maxpd %xmm0, %xmm2
# CHECK-NEXT: 2 10 0.50 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 4 0.33 maxsd %xmm0, %xmm2