[X86] Prevent combining (v8i1 (bitconvert (i8 load)))->(v8i1 load) if we don't have DQI.

We end up using an i8 load via an isel pattern from v8i1 anyway. This just makes it more explicit. This seems to improve codgen in some cases and I'd like to kill off some of the load patterns.

llvm-svn: 321598
diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
index 5f4b050..ef7cf1b 100644
--- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
@@ -37186,9 +37186,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
 ; NoVLX-NEXT:    vandps %xmm2, %xmm0, %xmm0
@@ -37247,9 +37245,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqps (%rsi), %xmm0, %xmm0
 ; NoVLX-NEXT:    vandps %xmm1, %xmm0, %xmm0
@@ -37309,9 +37305,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vbroadcastss (%rsi), %xmm2
 ; NoVLX-NEXT:    vcmpeqps %xmm2, %xmm0, %xmm0
@@ -37532,9 +37526,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
 ; NoVLX-NEXT:    vandps %xmm2, %xmm0, %xmm0
@@ -37593,9 +37585,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqps (%rsi), %xmm0, %xmm0
 ; NoVLX-NEXT:    vandps %xmm1, %xmm0, %xmm0
@@ -37655,9 +37645,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vbroadcastss (%rsi), %xmm2
 ; NoVLX-NEXT:    vcmpeqps %xmm2, %xmm0, %xmm0
@@ -37853,10 +37841,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $32, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
 ; NoVLX-NEXT:    vandps %xmm2, %xmm0, %xmm0
@@ -37869,8 +37855,8 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
 ; NoVLX-NEXT:    vzeroupper
@@ -37903,10 +37889,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $32, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqps (%rsi), %xmm0, %xmm0
 ; NoVLX-NEXT:    vandps %xmm1, %xmm0, %xmm0
@@ -37919,8 +37903,8 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
 ; NoVLX-NEXT:    vzeroupper
@@ -37954,10 +37938,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $32, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vbroadcastss (%rsi), %xmm2
 ; NoVLX-NEXT:    vcmpeqps %xmm2, %xmm0, %xmm0
@@ -37971,8 +37953,8 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
 ; NoVLX-NEXT:    vzeroupper
@@ -38154,10 +38136,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $96, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $64, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
 ; NoVLX-NEXT:    vandps %xmm2, %xmm0, %xmm0
@@ -38173,10 +38153,10 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
 ; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; NoVLX-NEXT:    shlq $32, %rcx
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    orq %rcx, %rax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
@@ -38210,10 +38190,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $96, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $64, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqps (%rsi), %xmm0, %xmm0
 ; NoVLX-NEXT:    vandps %xmm1, %xmm0, %xmm0
@@ -38229,10 +38207,10 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
 ; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; NoVLX-NEXT:    shlq $32, %rcx
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    orq %rcx, %rax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
@@ -38267,10 +38245,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $96, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $64, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vbroadcastss (%rsi), %xmm2
 ; NoVLX-NEXT:    vcmpeqps %xmm2, %xmm0, %xmm0
@@ -38287,10 +38263,10 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
 ; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; NoVLX-NEXT:    shlq $32, %rcx
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    orq %rcx, %rax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
@@ -40087,9 +40063,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
@@ -40124,9 +40098,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm1, %xmm0, %xmm0
@@ -40162,9 +40134,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
 ; NoVLX-NEXT:    vcmpeqpd %xmm2, %xmm0, %xmm0
@@ -40319,9 +40289,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
@@ -40366,9 +40334,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm1, %xmm0, %xmm0
@@ -40414,9 +40380,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
 ; NoVLX-NEXT:    vcmpeqpd %xmm2, %xmm0, %xmm0
@@ -40581,9 +40545,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
@@ -40628,9 +40590,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm1, %xmm0, %xmm0
@@ -40676,9 +40636,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
 ; NoVLX-NEXT:    vcmpeqpd %xmm2, %xmm0, %xmm0
@@ -40860,10 +40818,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $32, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
@@ -40876,8 +40832,8 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
 ; NoVLX-NEXT:    vzeroupper
@@ -40910,10 +40866,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $32, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm1, %xmm0, %xmm0
@@ -40926,8 +40880,8 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
 ; NoVLX-NEXT:    vzeroupper
@@ -40961,10 +40915,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $32, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
 ; NoVLX-NEXT:    vcmpeqpd %xmm2, %xmm0, %xmm0
@@ -40978,8 +40930,8 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
 ; NoVLX-NEXT:    vzeroupper
@@ -41161,10 +41113,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $96, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $64, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
@@ -41180,10 +41130,10 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
 ; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; NoVLX-NEXT:    shlq $32, %rcx
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    orq %rcx, %rax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
@@ -41217,10 +41167,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $96, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $64, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd (%rsi), %xmm0, %xmm0
 ; NoVLX-NEXT:    vandpd %xmm1, %xmm0, %xmm0
@@ -41236,10 +41184,10 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
 ; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; NoVLX-NEXT:    shlq $32, %rcx
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    orq %rcx, %rax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
@@ -41274,10 +41222,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $96, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $64, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
 ; NoVLX-NEXT:    vcmpeqpd %xmm2, %xmm0, %xmm0
@@ -41294,10 +41240,10 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
 ; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; NoVLX-NEXT:    shlq $32, %rcx
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    orq %rcx, %rax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
@@ -41497,9 +41443,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm0
 ; NoVLX-NEXT:    vpmovqd %zmm0, %ymm0
@@ -41560,9 +41504,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %ymm0
 ; NoVLX-NEXT:    vpmovqd %zmm0, %ymm0
@@ -41624,9 +41566,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vbroadcastsd (%rsi), %ymm2
 ; NoVLX-NEXT:    vcmpeqpd %ymm2, %ymm0, %ymm0
@@ -41858,9 +41798,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm0
 ; NoVLX-NEXT:    vpmovqd %zmm0, %ymm0
@@ -41921,9 +41859,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %ymm0
 ; NoVLX-NEXT:    vpmovqd %zmm0, %ymm0
@@ -41985,9 +41921,7 @@
 ;
 ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
 ; NoVLX:       # %bb.0: # %entry
-; NoVLX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vbroadcastsd (%rsi), %ymm2
 ; NoVLX-NEXT:    vcmpeqpd %ymm2, %ymm0, %ymm0
@@ -42191,10 +42125,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $32, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm0
 ; NoVLX-NEXT:    vpmovqd %zmm0, %ymm0
@@ -42208,8 +42140,8 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
 ; NoVLX-NEXT:    vzeroupper
@@ -42243,10 +42175,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $32, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %ymm0
 ; NoVLX-NEXT:    vpmovqd %zmm0, %ymm0
@@ -42260,8 +42190,8 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
 ; NoVLX-NEXT:    vzeroupper
@@ -42296,10 +42226,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $64, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $32, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vbroadcastsd (%rsi), %ymm2
 ; NoVLX-NEXT:    vcmpeqpd %ymm2, %ymm0, %ymm0
@@ -42314,8 +42242,8 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
 ; NoVLX-NEXT:    vzeroupper
@@ -42504,10 +42432,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $96, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $64, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm0
 ; NoVLX-NEXT:    vpmovqd %zmm0, %ymm0
@@ -42524,10 +42450,10 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
 ; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; NoVLX-NEXT:    shlq $32, %rcx
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    orq %rcx, %rax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
@@ -42562,10 +42488,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $96, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $64, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vcmpeqpd (%rsi), %ymm0, %ymm0
 ; NoVLX-NEXT:    vpmovqd %zmm0, %ymm0
@@ -42582,10 +42506,10 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
 ; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; NoVLX-NEXT:    shlq $32, %rcx
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    orq %rcx, %rax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
@@ -42621,10 +42545,8 @@
 ; NoVLX-NEXT:    movq %rsp, %rbp
 ; NoVLX-NEXT:    .cfi_def_cfa_register %rbp
 ; NoVLX-NEXT:    andq $-32, %rsp
-; NoVLX-NEXT:    subq $96, %rsp
-; NoVLX-NEXT:    movb %dil, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT:    kmovw %eax, %k1
+; NoVLX-NEXT:    subq $64, %rsp
+; NoVLX-NEXT:    kmovw %edi, %k1
 ; NoVLX-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
 ; NoVLX-NEXT:    vbroadcastsd (%rsi), %ymm2
 ; NoVLX-NEXT:    vcmpeqpd %ymm2, %ymm0, %ymm0
@@ -42642,10 +42564,10 @@
 ; NoVLX-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; NoVLX-NEXT:    vpslld $31, %zmm0, %zmm0
 ; NoVLX-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT:    kmovw %k0, (%rsp)
 ; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
 ; NoVLX-NEXT:    shlq $32, %rcx
-; NoVLX-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT:    movl (%rsp), %eax
 ; NoVLX-NEXT:    orq %rcx, %rax
 ; NoVLX-NEXT:    movq %rbp, %rsp
 ; NoVLX-NEXT:    popq %rbp
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index 8673a0b..ba6d773 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -45,9 +45,7 @@
 ; AVX512-LABEL: ext_i2_2i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    andb $3, %dil
-; AVX512-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512-NEXT:    kmovd %eax, %k1
+; AVX512-NEXT:    kmovd %edi, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
@@ -87,9 +85,7 @@
 ; AVX512-LABEL: ext_i4_4i32:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    andb $15, %dil
-; AVX512-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512-NEXT:    kmovd %eax, %k1
+; AVX512-NEXT:    kmovd %edi, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
@@ -240,9 +236,7 @@
 ; AVX512-LABEL: ext_i4_4i64:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    andb $15, %dil
-; AVX512-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512-NEXT:    kmovd %eax, %k1
+; AVX512-NEXT:    kmovd %edi, %k1
 ; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index ce38b4c..fc91fda 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -49,9 +49,7 @@
 ; AVX512F-LABEL: ext_i2_2i64:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    andb $3, %dil
-; AVX512F-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    kmovw %edi, %k1
 ; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -60,9 +58,7 @@
 ; AVX512VLBW-LABEL: ext_i2_2i64:
 ; AVX512VLBW:       # %bb.0:
 ; AVX512VLBW-NEXT:    andb $3, %dil
-; AVX512VLBW-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512VLBW-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512VLBW-NEXT:    kmovd %eax, %k1
+; AVX512VLBW-NEXT:    kmovd %edi, %k1
 ; AVX512VLBW-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
 ; AVX512VLBW-NEXT:    retq
   %1 = bitcast i2 %a0 to <2 x i1>
@@ -104,9 +100,7 @@
 ; AVX512F-LABEL: ext_i4_4i32:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    andb $15, %dil
-; AVX512F-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    kmovw %edi, %k1
 ; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    # kill: def %xmm0 killed %xmm0 killed %zmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -115,9 +109,7 @@
 ; AVX512VLBW-LABEL: ext_i4_4i32:
 ; AVX512VLBW:       # %bb.0:
 ; AVX512VLBW-NEXT:    andb $15, %dil
-; AVX512VLBW-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512VLBW-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512VLBW-NEXT:    kmovd %eax, %k1
+; AVX512VLBW-NEXT:    kmovd %edi, %k1
 ; AVX512VLBW-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
 ; AVX512VLBW-NEXT:    retq
   %1 = bitcast i4 %a0 to <4 x i1>
@@ -300,9 +292,7 @@
 ; AVX512F-LABEL: ext_i4_4i64:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    andb $15, %dil
-; AVX512F-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512F-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    kmovw %edi, %k1
 ; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    # kill: def %ymm0 killed %ymm0 killed %zmm0
 ; AVX512F-NEXT:    retq
@@ -310,9 +300,7 @@
 ; AVX512VLBW-LABEL: ext_i4_4i64:
 ; AVX512VLBW:       # %bb.0:
 ; AVX512VLBW-NEXT:    andb $15, %dil
-; AVX512VLBW-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512VLBW-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512VLBW-NEXT:    kmovd %eax, %k1
+; AVX512VLBW-NEXT:    kmovd %edi, %k1
 ; AVX512VLBW-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}
 ; AVX512VLBW-NEXT:    retq
   %1 = bitcast i4 %a0 to <4 x i1>
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
index a105c0b..8af95df 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
@@ -43,9 +43,7 @@
 ;
 ; AVX512-LABEL: bitcast_i2_2i1:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512-NEXT:    kmovd %eax, %k1
+; AVX512-NEXT:    kmovd %edi, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq
@@ -86,9 +84,7 @@
 ;
 ; AVX512-LABEL: bitcast_i4_4i1:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; AVX512-NEXT:    kmovd %eax, %k1
+; AVX512-NEXT:    kmovd %edi, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    retq