[X86] Stop promoting vector and/or/xor/andn to vXi64.
These promotions add additional bitcasts to the SelectionDAG that can pessimize computeKnownBits/computeNumSignBits. It also seems to interfere with broadcast formation.
This patch removes the promotion and adds isel patterns instead.
The increased table size is more than I would like, but hopefully we can find some canonicalizations or other tricks to start pruning out patterns going forward.
Differential Revision: https://reviews.llvm.org/D53268
llvm-svn: 345408
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll
index d836e9e..6bc6921 100644
--- a/llvm/test/CodeGen/X86/avx512-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512-arith.ll
@@ -601,17 +601,17 @@
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
; AVX512F-LABEL: andd512fold:
; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vpandq (%rdi), %zmm0, %zmm0
+; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: andd512fold:
; AVX512VL: # %bb.0: # %entry
-; AVX512VL-NEXT: vpandq (%rdi), %zmm0, %zmm0
+; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: andd512fold:
; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: vpandq (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: andd512fold:
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
index aa89ee7..d888ea4 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -3614,8 +3614,8 @@
; CHECK-LABEL: test_mm512_fnmsub_round_ps:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm3 = [-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0]
-; CHECK-NEXT: vpxorq %zmm3, %zmm0, %zmm4
-; CHECK-NEXT: vpxorq %zmm3, %zmm2, %zmm0
+; CHECK-NEXT: vpxord %zmm3, %zmm0, %zmm4
+; CHECK-NEXT: vpxord %zmm3, %zmm2, %zmm0
; CHECK-NEXT: vfmadd231ps {rn-sae}, %zmm4, %zmm1, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
@@ -3837,8 +3837,8 @@
; CHECK-LABEL: test_mm512_fnmsub_ps:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm3 = [-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0,-0]
-; CHECK-NEXT: vpxorq %zmm3, %zmm0, %zmm4
-; CHECK-NEXT: vpxorq %zmm3, %zmm2, %zmm0
+; CHECK-NEXT: vpxord %zmm3, %zmm0, %zmm4
+; CHECK-NEXT: vpxord %zmm3, %zmm2, %zmm0
; CHECK-NEXT: vfmadd231ps {{.*#+}} zmm0 = (zmm1 * zmm4) + zmm0
; CHECK-NEXT: ret{{[l|q]}}
entry:
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index a70ff9b..26e8636 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -1658,7 +1658,7 @@
define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_xor_epi32:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
+; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
ret < 16 x i32> %res
@@ -1687,7 +1687,7 @@
define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_or_epi32:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
+; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
ret < 16 x i32> %res
@@ -1716,7 +1716,7 @@
define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_and_epi32:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
+; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
ret < 16 x i32> %res
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll
index bb1e855..65d9d67 100644
--- a/llvm/test/CodeGen/X86/avx512-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512-logic.ll
@@ -7,7 +7,7 @@
; ALL-LABEL: vpandd:
; ALL: ## %bb.0: ## %entry
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
@@ -21,7 +21,7 @@
; ALL-LABEL: vpandnd:
; ALL: ## %bb.0: ## %entry
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT: vpandnq %zmm0, %zmm1, %zmm0
+; ALL-NEXT: vpandnd %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
@@ -37,7 +37,7 @@
; ALL-LABEL: vpord:
; ALL: ## %bb.0: ## %entry
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpord %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
@@ -51,7 +51,7 @@
; ALL-LABEL: vpxord:
; ALL: ## %bb.0: ## %entry
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpxord %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
@@ -132,7 +132,7 @@
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
; KNL-LABEL: andd512fold:
; KNL: ## %bb.0: ## %entry
-; KNL-NEXT: vpandq (%rdi), %zmm0, %zmm0
+; KNL-NEXT: vpandd (%rdi), %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: andd512fold:
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 1449f5c..13ffb9f 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -3177,7 +3177,7 @@
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0
; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testw %ax, %ax
@@ -3196,7 +3196,7 @@
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
-; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0
; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: testw %ax, %ax
@@ -3215,7 +3215,7 @@
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: pushq %rax
; AVX512BW-NEXT: .cfi_def_cfa_offset 16
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: testw %ax, %ax
@@ -3234,7 +3234,7 @@
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: pushq %rax
; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: testw %ax, %ax
@@ -3253,7 +3253,7 @@
; X86: ## %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
-; X86-NEXT: vporq %zmm1, %zmm0, %zmm0
+; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT: kmovd %k0, %eax
; X86-NEXT: testw %ax, %ax
@@ -3287,7 +3287,7 @@
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
; CHECK-NEXT: kortestw %k0, %k0
; CHECK-NEXT: jb LBB65_2
@@ -3303,7 +3303,7 @@
; X86: ## %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
-; X86-NEXT: vporq %zmm1, %zmm0, %zmm0
+; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT: kortestw %k0, %k0
; X86-NEXT: jb LBB65_2
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index e0237ff..7ea2aef 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -5029,13 +5029,13 @@
; GENERIC-LABEL: vpandd:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpandd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpandd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpandd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5049,13 +5049,13 @@
; GENERIC-LABEL: vpandnd:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpandnd %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpandnd:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpandnd %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5071,13 +5071,13 @@
; GENERIC-LABEL: vpord:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpord %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpord:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
@@ -5091,13 +5091,13 @@
; GENERIC-LABEL: vpxord:
; GENERIC: # %bb.0: # %entry
; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpxord %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vpxord:
; SKX: # %bb.0: # %entry
; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
+; SKX-NEXT: vpxord %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
entry:
; Force the execution domain with an add.
diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll
index 008a3b4..90e533c 100644
--- a/llvm/test/CodeGen/X86/avx512-select.ll
+++ b/llvm/test/CodeGen/X86/avx512-select.ll
@@ -11,7 +11,7 @@
; X86-NEXT: # %bb.1:
; X86-NEXT: vmovdqa64 %zmm0, %zmm1
; X86-NEXT: .LBB0_2:
-; X86-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; X86-NEXT: vpxord %zmm1, %zmm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: select00:
@@ -22,7 +22,7 @@
; X64-NEXT: # %bb.1:
; X64-NEXT: vmovdqa64 %zmm0, %zmm1
; X64-NEXT: .LBB0_2:
-; X64-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; X64-NEXT: vpxord %zmm1, %zmm0, %zmm0
; X64-NEXT: retq
%cmpres = icmp eq i32 %a, 255
%selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index 2964c90..b1a63ff 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -657,7 +657,7 @@
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-1.7939930131212661E-307,-1.7939930131212661E-307,-1.7939930131212661E-307,-1.7939930131212661E-307]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index 139fabd..c524021 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -845,7 +845,7 @@
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-1.7939930131212661E-307,-1.7939930131212661E-307,-1.7939930131212661E-307,-1.7939930131212661E-307]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll
index c43b19a..3f78d0c 100644
--- a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll
+++ b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll
@@ -708,7 +708,6 @@
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
; AVX2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000
; AVX2-NEXT: orq %rcx, %rax
diff --git a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
index bb79efc..19d3b47 100644
--- a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
+++ b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
@@ -359,7 +359,8 @@
; AVX-LABEL: f64i8_i32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37]
+; AVX-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -367,7 +368,7 @@
; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: retl
@@ -391,7 +392,8 @@
; AVX-64-LABEL: f64i8_i32:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37]
+; AVX-64-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -399,7 +401,7 @@
; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: retq
@@ -425,12 +427,13 @@
}
+; FIXME the load should be folded with the MOVDDUP with AVX1. PR39454
define <64 x i8> @f64xi8_i64(<64 x i8> %a) {
; AVX-LABEL: f64xi8_i64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vmovddup {{.*#+}} xmm3 = [7.9499288951273625E-275,7.9499288951273625E-275]
-; AVX-NEXT: # xmm3 = mem[0,0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0]
; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -438,7 +441,7 @@
; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: retl
@@ -462,8 +465,8 @@
; AVX-64-LABEL: f64xi8_i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = [7.9499288951273625E-275,7.9499288951273625E-275]
-; AVX-64-NEXT: # xmm3 = mem[0,0]
+; AVX-64-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0]
; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -471,7 +474,7 @@
; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: retq
@@ -496,7 +499,6 @@
ret <64 x i8> %res2
}
-
define <64 x i8> @f64xi8_i128(<64 x i8> %a) {
; AVX-LABEL: f64xi8_i128:
; AVX: # %bb.0:
@@ -509,7 +511,7 @@
; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: retl
@@ -543,7 +545,7 @@
; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: retq
@@ -844,7 +846,8 @@
; AVX-LABEL: f32xi16_i32:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vbroadcastss {{.*#+}} xmm3 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41]
+; AVX-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -852,7 +855,7 @@
; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: retl
@@ -876,7 +879,8 @@
; AVX-64-LABEL: f32xi16_i32:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm3 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41]
+; AVX-64-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddw %xmm3, %xmm1, %xmm1
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -884,7 +888,7 @@
; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: retq
@@ -910,12 +914,13 @@
}
+; FIXME the load should be folded with the MOVDDUP with AVX1. PR39454
define <32 x i16> @f32xi16_i64(<32 x i16> %a) {
; AVX-LABEL: f32xi16_i64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vmovddup {{.*#+}} xmm3 = [4.1720559249406128E-309,4.1720559249406128E-309]
-; AVX-NEXT: # xmm3 = mem[0,0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0]
; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -923,7 +928,7 @@
; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: retl
@@ -947,8 +952,8 @@
; AVX-64-LABEL: f32xi16_i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = [4.1720559249406128E-309,4.1720559249406128E-309]
-; AVX-64-NEXT: # xmm3 = mem[0,0]
+; AVX-64-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0]
; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddw %xmm3, %xmm1, %xmm1
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -956,7 +961,7 @@
; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: retq
@@ -994,7 +999,7 @@
; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: retl
@@ -1028,7 +1033,7 @@
; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: retq
@@ -1252,12 +1257,13 @@
}
+; FIXME the load should be folded with the MOVDDUP with AVX1. PR39454
define <16 x i32> @f16xi32_i64(<16 x i32> %a) {
; AVX-LABEL: f16xi32_i64:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vmovddup {{.*#+}} xmm3 = [2.1219957909652723E-314,2.1219957909652723E-314]
-; AVX-NEXT: # xmm3 = mem[0,0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0]
; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddd %xmm3, %xmm1, %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -1265,7 +1271,7 @@
; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddd %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1]
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: retl
@@ -1283,14 +1289,14 @@
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retl
;
; AVX-64-LABEL: f16xi32_i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = [2.1219957909652723E-314,2.1219957909652723E-314]
-; AVX-64-NEXT: # xmm3 = mem[0,0]
+; AVX-64-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0]
; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddd %xmm3, %xmm1, %xmm1
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
@@ -1298,7 +1304,7 @@
; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddd %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: retq
@@ -1316,7 +1322,7 @@
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296]
; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: retq
%res1 = add <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %a
%res2 = and <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %res1
@@ -1336,7 +1342,7 @@
; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX-NEXT: vpaddd %xmm3, %xmm0, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3]
+; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-NEXT: retl
@@ -1356,7 +1362,7 @@
; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retl
;
; AVX-64-LABEL: f16xi32_i128:
@@ -1370,7 +1376,7 @@
; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; AVX-64-NEXT: vpaddd %xmm3, %xmm0, %xmm0
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm2
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX-64-NEXT: retq
@@ -1390,7 +1396,7 @@
; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: retq
%res1 = add <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a
%res2 = and <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 4b33b04..452676e 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -2338,8 +2338,7 @@
;
; SKX-LABEL: allones_v4i32_and1:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
-; SKX-NEXT: vptestmd %xmm1, %xmm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andb $15, %al
; SKX-NEXT: cmpb $15, %al
@@ -2382,8 +2381,7 @@
;
; SKX-LABEL: allzeros_v4i32_and1:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
-; SKX-NEXT: vptestmd %xmm1, %xmm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: testb $15, %al
; SKX-NEXT: sete %al
@@ -2444,8 +2442,7 @@
;
; SKX-LABEL: allones_v8i32_and1:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
-; SKX-NEXT: vptestmd %ymm1, %ymm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
@@ -2506,8 +2503,7 @@
;
; SKX-LABEL: allzeros_v8i32_and1:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
-; SKX-NEXT: vptestmd %ymm1, %ymm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
@@ -2584,8 +2580,7 @@
;
; KNL-LABEL: allones_v16i32_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
+; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
; KNL-NEXT: kortestw %k0, %k0
; KNL-NEXT: setb %al
; KNL-NEXT: vzeroupper
@@ -2593,8 +2588,7 @@
;
; SKX-LABEL: allones_v16i32_and1:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SKX-NEXT: vptestmd %zmm1, %zmm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
@@ -2671,8 +2665,7 @@
;
; KNL-LABEL: allzeros_v16i32_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
+; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
; KNL-NEXT: kortestw %k0, %k0
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -2680,8 +2673,7 @@
;
; SKX-LABEL: allzeros_v16i32_and1:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; SKX-NEXT: vptestmd %zmm1, %zmm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
@@ -4010,8 +4002,7 @@
;
; SKX-LABEL: allones_v4i32_and4:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
-; SKX-NEXT: vptestmd %xmm1, %xmm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andb $15, %al
; SKX-NEXT: cmpb $15, %al
@@ -4054,8 +4045,7 @@
;
; SKX-LABEL: allzeros_v4i32_and4:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4,4,4,4]
-; SKX-NEXT: vptestmd %xmm1, %xmm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: testb $15, %al
; SKX-NEXT: sete %al
@@ -4116,8 +4106,7 @@
;
; SKX-LABEL: allones_v8i32_and4:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
-; SKX-NEXT: vptestmd %ymm1, %ymm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
@@ -4178,8 +4167,7 @@
;
; SKX-LABEL: allzeros_v8i32_and4:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
-; SKX-NEXT: vptestmd %ymm1, %ymm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to8}, %ymm0, %k0
; SKX-NEXT: kortestb %k0, %k0
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
@@ -4256,8 +4244,7 @@
;
; KNL-LABEL: allones_v16i32_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
+; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
; KNL-NEXT: kortestw %k0, %k0
; KNL-NEXT: setb %al
; KNL-NEXT: vzeroupper
@@ -4265,8 +4252,7 @@
;
; SKX-LABEL: allones_v16i32_and4:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; SKX-NEXT: vptestmd %zmm1, %zmm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: setb %al
; SKX-NEXT: vzeroupper
@@ -4343,8 +4329,7 @@
;
; KNL-LABEL: allzeros_v16i32_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vptestmd %zmm1, %zmm0, %k0
+; KNL-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
; KNL-NEXT: kortestw %k0, %k0
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
@@ -4352,8 +4337,7 @@
;
; SKX-LABEL: allzeros_v16i32_and4:
; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; SKX-NEXT: vptestmd %zmm1, %zmm0, %k0
+; SKX-NEXT: vptestmd {{.*}}(%rip){1to16}, %zmm0, %k0
; SKX-NEXT: kortestw %k0, %k0
; SKX-NEXT: sete %al
; SKX-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index a6bdfe9..6f4a381 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -531,18 +531,16 @@
; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
; SSE41-NEXT: pxor %xmm5, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSE41-NEXT: pshufb %xmm6, %xmm0
-; SSE41-NEXT: movdqa %xmm3, %xmm7
-; SSE41-NEXT: pmaxud %xmm2, %xmm7
-; SSE41-NEXT: pcmpeqd %xmm3, %xmm7
-; SSE41-NEXT: pxor %xmm5, %xmm7
-; SSE41-NEXT: pshufb %xmm6, %xmm7
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm7[0]
+; SSE41-NEXT: movdqa %xmm3, %xmm6
+; SSE41-NEXT: pmaxud %xmm2, %xmm6
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
+; SSE41-NEXT: pxor %xmm5, %xmm6
+; SSE41-NEXT: packssdw %xmm6, %xmm0
; SSE41-NEXT: psubd %xmm2, %xmm3
; SSE41-NEXT: psubd %xmm1, %xmm4
-; SSE41-NEXT: pshufb %xmm6, %xmm4
-; SSE41-NEXT: pshufb %xmm6, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT: pshufb %xmm1, %xmm4
+; SSE41-NEXT: pshufb %xmm1, %xmm3
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0]
; SSE41-NEXT: pandn %xmm4, %xmm0
; SSE41-NEXT: retq
@@ -916,18 +914,16 @@
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
; SSE41-NEXT: pxor %xmm5, %xmm4
-; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSE41-NEXT: pshufb %xmm6, %xmm4
-; SSE41-NEXT: movdqa %xmm3, %xmm7
-; SSE41-NEXT: pminud %xmm2, %xmm7
-; SSE41-NEXT: pcmpeqd %xmm3, %xmm7
-; SSE41-NEXT: pxor %xmm5, %xmm7
-; SSE41-NEXT: pshufb %xmm6, %xmm7
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
+; SSE41-NEXT: movdqa %xmm3, %xmm6
+; SSE41-NEXT: pminud %xmm2, %xmm6
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
+; SSE41-NEXT: pxor %xmm5, %xmm6
+; SSE41-NEXT: packssdw %xmm6, %xmm4
; SSE41-NEXT: psubd %xmm2, %xmm3
; SSE41-NEXT: psubd %xmm1, %xmm0
-; SSE41-NEXT: pshufb %xmm6, %xmm0
-; SSE41-NEXT: pshufb %xmm6, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT: pshufb %xmm1, %xmm0
+; SSE41-NEXT: pshufb %xmm1, %xmm3
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: retq
@@ -1052,18 +1048,16 @@
; SSE41-NEXT: pcmpeqd %xmm1, %xmm4
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
; SSE41-NEXT: pxor %xmm5, %xmm4
-; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSE41-NEXT: pshufb %xmm6, %xmm4
-; SSE41-NEXT: movdqa %xmm2, %xmm7
-; SSE41-NEXT: pmaxud %xmm3, %xmm7
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm7
-; SSE41-NEXT: pxor %xmm5, %xmm7
-; SSE41-NEXT: pshufb %xmm6, %xmm7
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
+; SSE41-NEXT: movdqa %xmm2, %xmm6
+; SSE41-NEXT: pmaxud %xmm3, %xmm6
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm6
+; SSE41-NEXT: pxor %xmm5, %xmm6
+; SSE41-NEXT: packssdw %xmm6, %xmm4
; SSE41-NEXT: psubd %xmm2, %xmm3
; SSE41-NEXT: psubd %xmm1, %xmm0
-; SSE41-NEXT: pshufb %xmm6, %xmm0
-; SSE41-NEXT: pshufb %xmm6, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT: pshufb %xmm1, %xmm0
+; SSE41-NEXT: pshufb %xmm1, %xmm3
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE41-NEXT: pand %xmm4, %xmm0
; SSE41-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll
index ec160c9..f0989e8 100644
--- a/llvm/test/CodeGen/X86/sat-add.ll
+++ b/llvm/test/CodeGen/X86/sat-add.ll
@@ -746,15 +746,16 @@
; SSE2-LABEL: unsigned_sat_variable_v4i32_using_min:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE2-NEXT: pxor %xmm1, %xmm2
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: pxor %xmm0, %xmm3
; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647]
; SSE2-NEXT: pxor %xmm1, %xmm4
; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm0
-; SSE2-NEXT: pandn %xmm2, %xmm4
-; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm4, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
index f109d69..7cb0d3f 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
@@ -132,9 +132,9 @@
;
; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
; CHECK-SSE2: # %bb.0:
-; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
+; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm2
+; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2
; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2
; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0
; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
@@ -142,9 +142,9 @@
;
; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
; CHECK-XOP: # %bb.0:
-; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
+; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-XOP-NEXT: vpxor (%rdi), %xmm1, %xmm2
+; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2
; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0
; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-XOP-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll
index 6fb0033..b08b15c 100644
--- a/llvm/test/CodeGen/X86/vec-copysign-avx512.ll
+++ b/llvm/test/CodeGen/X86/vec-copysign-avx512.ll
@@ -43,7 +43,7 @@
; AVX512VL: ## %bb.0:
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512VLDQ-LABEL: v16f32:
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index fa4c8ab..b249eed 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -2046,27 +2046,27 @@
; AVX512F-NEXT: vpsrld $24, %zmm0, %zmm1
; AVX512F-NEXT: vpsrld $8, %zmm0, %zmm2
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; AVX512F-NEXT: vporq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT: vpord %zmm1, %zmm2, %zmm1
; AVX512F-NEXT: vpslld $24, %zmm0, %zmm2
; AVX512F-NEXT: vpslld $8, %zmm0, %zmm0
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0
+; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
; AVX512F-NEXT: vpslld $4, %zmm1, %zmm1
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: vpsrld $4, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
; AVX512F-NEXT: vpslld $2, %zmm1, %zmm1
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: vpsrld $2, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
; AVX512F-NEXT: vpslld $1, %zmm1, %zmm1
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm0
-; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v16i32:
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
index 10db0ae..71a9ba1 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
@@ -172,15 +172,15 @@
; AVX512BW-LABEL: testv16i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
@@ -201,15 +201,15 @@
; AVX512DQ-LABEL: testv16i32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -257,15 +257,15 @@
; AVX512BW-LABEL: testv16i32u:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
@@ -286,15 +286,15 @@
; AVX512DQ-LABEL: testv16i32u:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1
-; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and.ll b/llvm/test/CodeGen/X86/vector-reduce-and.ll
index 89ae951..560bceb 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and.ll
@@ -309,13 +309,13 @@
; AVX512-LABEL: test_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -372,15 +372,15 @@
;
; AVX512-LABEL: test_v32i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or.ll b/llvm/test/CodeGen/X86/vector-reduce-or.ll
index 04ec6cf..1693940 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or.ll
@@ -309,13 +309,13 @@
; AVX512-LABEL: test_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -372,15 +372,15 @@
;
; AVX512-LABEL: test_v32i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor.ll b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
index cb69ee8..d1bf3e9 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor.ll
@@ -309,13 +309,13 @@
; AVX512-LABEL: test_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -372,15 +372,15 @@
;
; AVX512-LABEL: test_v32i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index eb51c10..30de8d7 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -876,7 +876,7 @@
; AVX512-LABEL: splatconstant_rotate_mask_v16i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vprold $4, %zmm0, %zmm0
-; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: retq
%shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
@@ -980,10 +980,8 @@
; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
@@ -991,10 +989,8 @@
; AVX512VLBW-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1
; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll
index e552f5f..7dc8503 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-math.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll
@@ -3505,7 +3505,7 @@
;
; AVX512-LABEL: trunc_and_v16i32_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -4309,7 +4309,7 @@
;
; AVX512-LABEL: trunc_xor_v16i32_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -5113,7 +5113,7 @@
;
; AVX512-LABEL: trunc_or_v16i32_v16i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-512.ll b/llvm/test/CodeGen/X86/vector-tzcnt-512.ll
index 1de0346..501d7e9 100644
--- a/llvm/test/CodeGen/X86/vector-tzcnt-512.ll
+++ b/llvm/test/CodeGen/X86/vector-tzcnt-512.ll
@@ -128,7 +128,7 @@
; AVX512CD: # %bb.0:
; AVX512CD-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512CD-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; AVX512CD-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; AVX512CD-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: vpbroadcastd {{.*#+}} zmm1 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm0
@@ -138,7 +138,7 @@
; AVX512CDBW: # %bb.0:
; AVX512CDBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512CDBW-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; AVX512CDBW-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; AVX512CDBW-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CDBW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm0
@@ -148,7 +148,7 @@
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
@@ -169,7 +169,7 @@
; AVX512VPOPCNTDQ: # %bb.0:
; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; AVX512VPOPCNTDQ-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
@@ -177,7 +177,7 @@
; BITALG: # %bb.0:
; BITALG-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; BITALG-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; BITALG-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; BITALG-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; BITALG-NEXT: vpopcntb %zmm0, %zmm0
; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
@@ -195,7 +195,7 @@
; AVX512CD: # %bb.0:
; AVX512CD-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512CD-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; AVX512CD-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; AVX512CD-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: vpbroadcastd {{.*#+}} zmm1 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm0
@@ -205,7 +205,7 @@
; AVX512CDBW: # %bb.0:
; AVX512CDBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512CDBW-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; AVX512CDBW-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; AVX512CDBW-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CDBW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm0
@@ -215,7 +215,7 @@
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
@@ -236,7 +236,7 @@
; AVX512VPOPCNTDQ: # %bb.0:
; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; AVX512VPOPCNTDQ-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
@@ -244,7 +244,7 @@
; BITALG: # %bb.0:
; BITALG-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; BITALG-NEXT: vpaddd %zmm1, %zmm0, %zmm1
-; BITALG-NEXT: vpandnq %zmm1, %zmm0, %zmm0
+; BITALG-NEXT: vpandnd %zmm1, %zmm0, %zmm0
; BITALG-NEXT: vpopcntb %zmm0, %zmm0
; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]