[X86] Cleanup WriteFStore/WriteVecStore schedules

MOVNTPD/MOVNTPS should be WriteFStore

Standardized BDW/HSW/SKL/SKX WriteFStore/WriteVecStore - fixes some missed instregex patterns. (V)MASKMOVDQU was already using the default, its costs gets increased but is still nowhere near the real cost of that nasty instruction....

llvm-svn: 331864
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index b52ca8b..473bea5 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3020,7 +3020,7 @@
 } // ExeDomain, SchedRW
 } // Predicates
 
-let SchedRW = [WriteVecStore] in {
+let SchedRW = [WriteFStore] in {
 def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     "movntps\t{$src, $dst|$dst, $src}",
                     [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index d97efee..3fa70f1 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -164,7 +164,7 @@
 defm : X86WriteRes<WriteFLoad,         [BWPort23], 5, [1], 1>;
 defm : X86WriteRes<WriteFMaskedLoad,   [BWPort23,BWPort5], 7, [1,2], 3>;
 defm : X86WriteRes<WriteFMaskedLoadY,  [BWPort23,BWPort5], 8, [1,2], 3>;
-defm : X86WriteRes<WriteFStore,        [BWPort237,BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStore,        [BWPort237,BWPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteFMaskedStore,  [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
 defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
 defm : X86WriteRes<WriteFMove,         [BWPort5], 1, [1], 1>;
@@ -258,7 +258,7 @@
 defm : X86WriteRes<WriteVecLoad,         [BWPort23], 5, [1], 1>;
 defm : X86WriteRes<WriteVecMaskedLoad,   [BWPort23,BWPort5], 7, [1,2], 3>;
 defm : X86WriteRes<WriteVecMaskedLoadY,  [BWPort23,BWPort5], 8, [1,2], 3>;
-defm : X86WriteRes<WriteVecStore,        [BWPort237,BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStore,        [BWPort237,BWPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStore,  [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
 defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
 defm : X86WriteRes<WriteVecMove,         [BWPort015], 1, [1], 1>;
@@ -566,26 +566,15 @@
                                             "MOVNTI_64mr",
                                             "MOVNTImr",
                                             "ST_FP(32|64|80)m",
-                                            "VEXTRACTF128mr",
-                                            "VEXTRACTI128mr",
-                                            "(V?)MOVAPD(Y?)mr",
-                                            "(V?)MOVAPS(Y?)mr",
-                                            "(V?)MOVDQA(Y?)mr",
-                                            "(V?)MOVDQU(Y?)mr",
                                             "(V?)MOVHPDmr",
                                             "(V?)MOVHPSmr",
                                             "(V?)MOVLPDmr",
                                             "(V?)MOVLPSmr",
-                                            "(V?)MOVNTDQ(V?)mr",
-                                            "(V?)MOVNTPD(V?)mr",
-                                            "(V?)MOVNTPS(V?)mr",
                                             "(V?)MOVPDI2DImr",
                                             "(V?)MOVPQI2QImr",
                                             "(V?)MOVPQIto64mr",
                                             "(V?)MOVSDmr",
-                                            "(V?)MOVSSmr",
-                                            "(V?)MOVUPD(Y?)mr",
-                                            "(V?)MOVUPS(Y?)mr")>;
+                                            "(V?)MOVSSmr")>;
 
 def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
   let Latency = 2;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 5a612d1..aef283a 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -155,7 +155,7 @@
 defm : X86WriteRes<WriteFLoad,         [HWPort23], 5, [1], 1>;
 defm : X86WriteRes<WriteFMaskedLoad,   [HWPort23,HWPort5], 8, [1,2], 3>;
 defm : X86WriteRes<WriteFMaskedLoadY,  [HWPort23,HWPort5], 9, [1,2], 3>;
-defm : X86WriteRes<WriteFStore,        [HWPort237,HWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStore,        [HWPort237,HWPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteFMaskedStore,  [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
 defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
 defm : X86WriteRes<WriteFMove,         [HWPort5], 1, [1], 1>;
@@ -250,7 +250,7 @@
 defm : X86WriteRes<WriteVecLoad,         [HWPort23], 5, [1], 1>;
 defm : X86WriteRes<WriteVecMaskedLoad,   [HWPort23,HWPort5], 8, [1,2], 3>;
 defm : X86WriteRes<WriteVecMaskedLoadY,  [HWPort23,HWPort5], 9, [1,2], 3>;
-defm : X86WriteRes<WriteVecStore,        [HWPort237,HWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStore,        [HWPort237,HWPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStore,  [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
 defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
 defm : X86WriteRes<WriteVecMove,         [HWPort015], 1, [1], 1>;
@@ -759,26 +759,15 @@
                                            "MOVNTI_64mr",
                                            "MOVNTImr",
                                            "ST_FP(32|64|80)m",
-                                           "VEXTRACTF128mr",
-                                           "VEXTRACTI128mr",
-                                           "(V?)MOVAPD(Y?)mr",
-                                           "(V?)MOVAPS(V?)mr",
-                                           "(V?)MOVDQA(Y?)mr",
-                                           "(V?)MOVDQU(Y?)mr",
                                            "(V?)MOVHPDmr",
                                            "(V?)MOVHPSmr",
                                            "(V?)MOVLPDmr",
                                            "(V?)MOVLPSmr",
-                                           "(V?)MOVNTDQ(Y?)mr",
-                                           "(V?)MOVNTPD(Y?)mr",
-                                           "(V?)MOVNTPS(Y?)mr",
                                            "(V?)MOVPDI2DImr",
                                            "(V?)MOVPQI2QImr",
                                            "(V?)MOVPQIto64mr",
                                            "(V?)MOVSDmr",
                                            "(V?)MOVSSmr",
-                                           "(V?)MOVUPD(Y?)mr",
-                                           "(V?)MOVUPS(Y?)mr",
                                            "VMPTRSTm")>;
 
 def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> {
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index fa145d9..84b0055 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -160,7 +160,7 @@
 defm : X86WriteRes<WriteFLoad,         [SKLPort23], 6, [1], 1>;
 defm : X86WriteRes<WriteFMaskedLoad,   [SKLPort23,SKLPort015], 7, [1,1], 2>;
 defm : X86WriteRes<WriteFMaskedLoadY,  [SKLPort23,SKLPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteFStore,        [SKLPort237,SKLPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStore,        [SKLPort237,SKLPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteFMaskedStore,  [SKLPort237,SKLPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteFMove,         [SKLPort015], 1, [1], 1>;
@@ -251,7 +251,7 @@
 defm : X86WriteRes<WriteVecLoad,         [SKLPort23], 6, [1], 1>;
 defm : X86WriteRes<WriteVecMaskedLoad,   [SKLPort23,SKLPort015], 7, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedLoadY,  [SKLPort23,SKLPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteVecStore,        [SKLPort237,SKLPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStore,        [SKLPort237,SKLPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStore,  [SKLPort237,SKLPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteVecMove,         [SKLPort015], 1, [1], 1>;
@@ -596,26 +596,15 @@
                                              "MOVNTI_64mr",
                                              "MOVNTImr",
                                              "ST_FP(32|64|80)m",
-                                             "VEXTRACTF128mr",
-                                             "VEXTRACTI128mr",
-                                             "(V?)MOVAPDYmr",
-                                             "(V?)MOVAPS(Y?)mr",
-                                             "(V?)MOVDQA(Y?)mr",
-                                             "(V?)MOVDQU(Y?)mr",
                                              "(V?)MOVHPDmr",
                                              "(V?)MOVHPSmr",
                                              "(V?)MOVLPDmr",
                                              "(V?)MOVLPSmr",
-                                             "(V?)MOVNTDQ(Y?)mr",
-                                             "(V?)MOVNTPD(Y?)mr",
-                                             "(V?)MOVNTPS(Y?)mr",
                                              "(V?)MOVPDI2DImr",
                                              "(V?)MOVPQI2QImr",
                                              "(V?)MOVPQIto64mr",
                                              "(V?)MOVSDmr",
                                              "(V?)MOVSSmr",
-                                             "(V?)MOVUPD(Y?)mr",
-                                             "(V?)MOVUPS(Y?)mr",
                                              "VMPTRSTm")>;
 
 def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> {
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 0699937..76755c6 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -160,7 +160,7 @@
 defm : X86WriteRes<WriteFLoad,         [SKXPort23], 5, [1], 1>;
 defm : X86WriteRes<WriteFMaskedLoad,   [SKXPort23,SKXPort015], 7, [1,1], 2>;
 defm : X86WriteRes<WriteFMaskedLoadY,  [SKXPort23,SKXPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteFStore,        [SKXPort237,SKXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStore,        [SKXPort237,SKXPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteFMaskedStore,  [SKXPort237,SKXPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteFMove,         [SKXPort015], 1, [1], 1>;
@@ -251,7 +251,7 @@
 defm : X86WriteRes<WriteVecLoad,         [SKXPort23], 5, [1], 1>;
 defm : X86WriteRes<WriteVecMaskedLoad,   [SKXPort23,SKXPort015], 7, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedLoadY,  [SKXPort23,SKXPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteVecStore,        [SKXPort237,SKXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStore,        [SKXPort237,SKXPort4], 1, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStore,  [SKXPort237,SKXPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
 defm : X86WriteRes<WriteVecMove,         [SKXPort015], 1, [1], 1>;
@@ -701,10 +701,6 @@
                                              "MMX_MOVD64mr",
                                              "MMX_MOVNTQmr",
                                              "MMX_MOVQ64mr",
-                                             "MOVAPDmr",
-                                             "MOVAPSmr",
-                                             "MOVDQAmr",
-                                             "MOVDQUmr",
                                              "MOVHPDmr",
                                              "MOVHPSmr",
                                              "MOVLPDmr",
@@ -712,63 +708,14 @@
                                              "MOVNTDQmr",
                                              "MOVNTI_64mr",
                                              "MOVNTImr",
-                                             "MOVNTPDmr",
-                                             "MOVNTPSmr",
                                              "MOVPDI2DImr",
                                              "MOVPQI2QImr",
                                              "MOVPQIto64mr",
                                              "MOVSDmr",
                                              "MOVSSmr",
-                                             "MOVUPDmr",
-                                             "MOVUPSmr",
                                              "ST_FP32m",
                                              "ST_FP64m",
                                              "ST_FP80m",
-                                             "VEXTRACTF128mr",
-                                             "VEXTRACTF32x4Z256mr(b?)",
-                                             "VEXTRACTF32x4Zmr(b?)",
-                                             "VEXTRACTF32x8Zmr(b?)",
-                                             "VEXTRACTF64x2Z256mr(b?)",
-                                             "VEXTRACTF64x2Zmr(b?)",
-                                             "VEXTRACTF64x4Zmr(b?)",
-                                             "VEXTRACTI128mr",
-                                             "VEXTRACTI32x4Z256mr(b?)",
-                                             "VEXTRACTI32x4Zmr(b?)",
-                                             "VEXTRACTI32x8Zmr(b?)",
-                                             "VEXTRACTI64x2Z256mr(b?)",
-                                             "VEXTRACTI64x2Zmr(b?)",
-                                             "VEXTRACTI64x4Zmr(b?)",
-                                             "VMOVAPDYmr",
-                                             "VMOVAPDZ128mr(b?)",
-                                             "VMOVAPDZ256mr(b?)",
-                                             "VMOVAPDZmr(b?)",
-                                             "VMOVAPDmr",
-                                             "VMOVAPSYmr",
-                                             "VMOVAPSZ128mr(b?)",
-                                             "VMOVAPSZ256mr(b?)",
-                                             "VMOVAPSZmr(b?)",
-                                             "VMOVAPSmr",
-                                             "VMOVDQA32Z128mr(b?)",
-                                             "VMOVDQA32Z256mr(b?)",
-                                             "VMOVDQA32Zmr(b?)",
-                                             "VMOVDQA64Z128mr(b?)",
-                                             "VMOVDQA64Z256mr(b?)",
-                                             "VMOVDQA64Zmr(b?)",
-                                             "VMOVDQAYmr",
-                                             "VMOVDQAmr",
-                                             "VMOVDQU16Z128mr(b?)",
-                                             "VMOVDQU16Z256mr(b?)",
-                                             "VMOVDQU16Zmr(b?)",
-                                             "VMOVDQU32Z128mr(b?)",
-                                             "VMOVDQU32Z256mr(b?)",
-                                             "VMOVDQU32Zmr(b?)",
-                                             "VMOVDQU64Z128mr(b?)",
-                                             "VMOVDQU64Z256mr(b?)",
-                                             "VMOVDQU64Zmr(b?)",
-                                             "VMOVDQU8Z128mr(b?)",
-                                             "VMOVDQU8Z256mr(b?)",
-                                             "VMOVDQUYmr",
-                                             "VMOVDQUmr",
                                              "VMOVHPDZ128mr(b?)",
                                              "VMOVHPDmr",
                                              "VMOVHPSZ128mr(b?)",
@@ -777,21 +724,6 @@
                                              "VMOVLPDmr",
                                              "VMOVLPSZ128mr(b?)",
                                              "VMOVLPSmr",
-                                             "VMOVNTDQYmr",
-                                             "VMOVNTDQZ128mr(b?)",
-                                             "VMOVNTDQZ256mr(b?)",
-                                             "VMOVNTDQZmr(b?)",
-                                             "VMOVNTDQmr",
-                                             "VMOVNTPDYmr",
-                                             "VMOVNTPDZ128mr(b?)",
-                                             "VMOVNTPDZ256mr(b?)",
-                                             "VMOVNTPDZmr(b?)",
-                                             "VMOVNTPDmr",
-                                             "VMOVNTPSYmr",
-                                             "VMOVNTPSZ128mr(b?)",
-                                             "VMOVNTPSZ256mr(b?)",
-                                             "VMOVNTPSZmr(b?)",
-                                             "VMOVNTPSmr",
                                              "VMOVPDI2DIZmr(b?)",
                                              "VMOVPDI2DImr",
                                              "VMOVPQI(2QI|to64)Zmr(b?)",
@@ -801,16 +733,6 @@
                                              "VMOVSDmr",
                                              "VMOVSSZmr(b?)",
                                              "VMOVSSmr",
-                                             "VMOVUPDYmr",
-                                             "VMOVUPDZ128mr(b?)",
-                                             "VMOVUPDZ256mr(b?)",
-                                             "VMOVUPDZmr(b?)",
-                                             "VMOVUPDmr",
-                                             "VMOVUPSYmr",
-                                             "VMOVUPSZ128mr(b?)",
-                                             "VMOVUPSZ256mr(b?)",
-                                             "VMOVUPSZmr(b?)",
-                                             "VMOVUPSmr",
                                              "VMPTRSTm")>;
 
 def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> {
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index ce99b58..3572cc1 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -128,9 +128,9 @@
 ; CHECK-NEXT:    movzbl (%rax), %eax ## encoding: [0x0f,0xb6,0x00]
 ; CHECK-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; CHECK-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
-; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9]
 ; CHECK-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x44,0x24,0xd8]
-; CHECK-NEXT:    vmovdqa %xmm1, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x4c,0x24,0xe8]
+; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x44,0x24,0xe8]
 ; CHECK-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
 entry:
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 459bce3..5336995 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1207,7 +1207,7 @@
 # CHECK-NEXT:  1      5     0.50    *               	vlddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      6     0.50    *               	vlddqu	(%rax), %ymm2
 # CHECK-NEXT:  3      7     1.00    *      *      * 	vldmxcsr	(%rax)
-# CHECK-NEXT:  1      1     1.00    *      *      * 	vmaskmovdqu	%xmm0, %xmm1
+# CHECK-NEXT:  2      1     1.00    *      *      * 	vmaskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  3      7     2.00    *               	vmaskmovpd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      8     2.00    *               	vmaskmovpd	(%rax), %ymm0, %ymm2
 # CHECK-NEXT:  4      5     1.00    *      *        	vmaskmovpd	%xmm0, %xmm1, (%rax)
@@ -1287,13 +1287,13 @@
 # CHECK-NEXT:  1      3     1.00                    	vmovmskps	%xmm0, %ecx
 # CHECK-NEXT:  1      3     1.00                    	vmovmskps	%ymm0, %ecx
 # CHECK-NEXT:  2      1     1.00           *        	vmovntdq	%xmm0, (%rax)
-# CHECK-NEXT:  1      1     1.00           *        	vmovntdq	%ymm0, (%rax)
+# CHECK-NEXT:  2      1     1.00           *        	vmovntdq	%ymm0, (%rax)
 # CHECK-NEXT:  1      5     0.50    *               	vmovntdqa	(%rax), %xmm2
 # CHECK-NEXT:  1      6     0.50    *               	vmovntdqa	(%rax), %ymm2
 # CHECK-NEXT:  2      1     1.00           *        	vmovntpd	%xmm0, (%rax)
-# CHECK-NEXT:  1      1     1.00           *        	vmovntpd	%ymm0, (%rax)
+# CHECK-NEXT:  2      1     1.00           *        	vmovntpd	%ymm0, (%rax)
 # CHECK-NEXT:  2      1     1.00           *        	vmovntps	%xmm0, (%rax)
-# CHECK-NEXT:  1      1     1.00           *        	vmovntps	%ymm0, (%rax)
+# CHECK-NEXT:  2      1     1.00           *        	vmovntps	%ymm0, (%rax)
 # CHECK-NEXT:  1      1     0.33                    	vmovq	%xmm0, %xmm2
 # CHECK-NEXT:  1      1     1.00                    	vmovq	%rax, %xmm2
 # CHECK-NEXT:  1      5     0.50    *               	vmovq	(%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
index a9eb62c..2342e99 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
@@ -457,7 +457,7 @@
 # CHECK-NEXT:  1      14    4.00                    	divsd	%xmm0, %xmm2
 # CHECK-NEXT:  2      19    8.00    *               	divsd	(%rax), %xmm2
 # CHECK-NEXT:  2      2     0.50    *      *      * 	lfence
-# CHECK-NEXT:  1      1     1.00    *      *      * 	maskmovdqu	%xmm0, %xmm1
+# CHECK-NEXT:  2      1     1.00    *      *      * 	maskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  1      3     1.00                    	maxpd	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *               	maxpd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                    	maxsd	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
index 07884ce..bf2b5ea 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
@@ -1207,7 +1207,7 @@
 # CHECK-NEXT:  1      6     0.50    *               	vlddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      7     0.50    *               	vlddqu	(%rax), %ymm2
 # CHECK-NEXT:  3      7     1.00    *      *      * 	vldmxcsr	(%rax)
-# CHECK-NEXT:  1      1     1.00    *      *      * 	vmaskmovdqu	%xmm0, %xmm1
+# CHECK-NEXT:  2      1     1.00    *      *      * 	vmaskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  3      8     2.00    *               	vmaskmovpd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      9     2.00    *               	vmaskmovpd	(%rax), %ymm0, %ymm2
 # CHECK-NEXT:  4      5     1.00    *      *        	vmaskmovpd	%xmm0, %xmm1, (%rax)
@@ -1250,7 +1250,7 @@
 # CHECK-NEXT:  2      1     1.00           *        	vmovaps	%xmm0, (%rax)
 # CHECK-NEXT:  1      6     0.50    *               	vmovaps	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                    	vmovaps	%ymm0, %ymm2
-# CHECK-NEXT:  1      1     1.00           *        	vmovaps	%ymm0, (%rax)
+# CHECK-NEXT:  2      1     1.00           *        	vmovaps	%ymm0, (%rax)
 # CHECK-NEXT:  1      7     0.50    *               	vmovaps	(%rax), %ymm2
 # CHECK-NEXT:  1      1     1.00                    	vmovd	%eax, %xmm2
 # CHECK-NEXT:  1      5     0.50    *               	vmovd	(%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s
index a190193..883690b 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s
@@ -457,7 +457,7 @@
 # CHECK-NEXT:  1      20    14.00                   	divsd	%xmm0, %xmm2
 # CHECK-NEXT:  2      25    14.00   *               	divsd	(%rax), %xmm2
 # CHECK-NEXT:  2      2     0.50    *      *      * 	lfence
-# CHECK-NEXT:  1      1     1.00    *      *      * 	maskmovdqu	%xmm0, %xmm1
+# CHECK-NEXT:  2      1     1.00    *      *      * 	maskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  1      3     1.00                    	maxpd	%xmm0, %xmm2
 # CHECK-NEXT:  2      9     1.00    *               	maxpd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                    	maxsd	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index 42685ba..9b0e41d 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1207,7 +1207,7 @@
 # CHECK-NEXT:  1      6     0.50    *               	vlddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      7     0.50    *               	vlddqu	(%rax), %ymm2
 # CHECK-NEXT:  3      7     1.00    *      *      * 	vldmxcsr	(%rax)
-# CHECK-NEXT:  1      1     1.00    *      *      * 	vmaskmovdqu	%xmm0, %xmm1
+# CHECK-NEXT:  2      1     1.00    *      *      * 	vmaskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  2      7     0.50    *               	vmaskmovpd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  2      8     0.50    *               	vmaskmovpd	(%rax), %ymm0, %ymm2
 # CHECK-NEXT:  2      2     1.00    *      *        	vmaskmovpd	%xmm0, %xmm1, (%rax)
@@ -1241,7 +1241,7 @@
 # CHECK-NEXT:  1      4     0.50                    	vminss	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  2      9     0.50    *               	vminss	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.33                    	vmovapd	%xmm0, %xmm2
-# CHECK-NEXT:  1      1     1.00           *        	vmovapd	%xmm0, (%rax)
+# CHECK-NEXT:  2      1     1.00           *        	vmovapd	%xmm0, (%rax)
 # CHECK-NEXT:  1      6     0.50    *               	vmovapd	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.33                    	vmovapd	%ymm0, %ymm2
 # CHECK-NEXT:  2      1     1.00           *        	vmovapd	%ymm0, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
index a0a06f2..060c4e1 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s
@@ -457,7 +457,7 @@
 # CHECK-NEXT:  1      14    3.00                    	divsd	%xmm0, %xmm2
 # CHECK-NEXT:  2      19    4.00    *               	divsd	(%rax), %xmm2
 # CHECK-NEXT:  2      2     0.50    *      *      * 	lfence
-# CHECK-NEXT:  1      1     1.00    *      *      * 	maskmovdqu	%xmm0, %xmm1
+# CHECK-NEXT:  2      1     1.00    *      *      * 	maskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  1      4     0.50                    	maxpd	%xmm0, %xmm2
 # CHECK-NEXT:  2      10    0.50    *               	maxpd	(%rax), %xmm2
 # CHECK-NEXT:  1      4     0.50                    	maxsd	%xmm0, %xmm2
@@ -467,7 +467,7 @@
 # CHECK-NEXT:  1      4     0.50                    	minsd	%xmm0, %xmm2
 # CHECK-NEXT:  2      9     0.50    *               	minsd	(%rax), %xmm2
 # CHECK-NEXT:  1      1     0.33                    	movapd	%xmm0, %xmm2
-# CHECK-NEXT:  1      1     1.00           *        	movapd	%xmm0, (%rax)
+# CHECK-NEXT:  2      1     1.00           *        	movapd	%xmm0, (%rax)
 # CHECK-NEXT:  1      6     0.50    *               	movapd	(%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                    	movd	%eax, %xmm2
 # CHECK-NEXT:  1      5     0.50    *               	movd	(%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
index fea3250..c30c862 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
@@ -1207,7 +1207,7 @@
 # CHECK-NEXT:  1      6     0.50    *               	vlddqu	(%rax), %xmm2
 # CHECK-NEXT:  1      7     0.50    *               	vlddqu	(%rax), %ymm2
 # CHECK-NEXT:  3      7     1.00    *      *      * 	vldmxcsr	(%rax)
-# CHECK-NEXT:  1      1     1.00    *      *      * 	vmaskmovdqu	%xmm0, %xmm1
+# CHECK-NEXT:  2      1     1.00    *      *      * 	vmaskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  2      7     0.50    *               	vmaskmovpd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  2      8     0.50    *               	vmaskmovpd	(%rax), %ymm0, %ymm2
 # CHECK-NEXT:  2      2     1.00    *      *        	vmaskmovpd	%xmm0, %xmm1, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s
index 281cf14..dd7e8fd 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s
@@ -457,7 +457,7 @@
 # CHECK-NEXT:  1      14    3.00                    	divsd	%xmm0, %xmm2
 # CHECK-NEXT:  2      19    4.00    *               	divsd	(%rax), %xmm2
 # CHECK-NEXT:  2      2     0.50    *      *      * 	lfence
-# CHECK-NEXT:  1      1     1.00    *      *      * 	maskmovdqu	%xmm0, %xmm1
+# CHECK-NEXT:  2      1     1.00    *      *      * 	maskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  1      4     0.33                    	maxpd	%xmm0, %xmm2
 # CHECK-NEXT:  2      10    0.50    *               	maxpd	(%rax), %xmm2
 # CHECK-NEXT:  1      4     0.33                    	maxsd	%xmm0, %xmm2