[AVX512] Don't use 32-bit elements version of AND/OR/XOR/ANDN during isel unless we're matching a masked op or broadcast
Selecting 32-bit element logical ops without a select or broadcast requires matching a bitconvert on the inputs to the and. But that's a weird thing to rely on. It's entirely possible that one of the inputs doesn't have a bitcast and one does.
Since there's no functional difference, just remove the extra patterns and save some isel table size.
Differential Revision: https://reviews.llvm.org/D36854
llvm-svn: 312138
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll
index debfb29..1bcd3c6 100644
--- a/llvm/test/CodeGen/X86/avx512-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512-arith.ll
@@ -607,17 +607,17 @@
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
; AVX512F-LABEL: andd512fold:
; AVX512F: # BB#0: # %entry
-; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; AVX512F-NEXT: vpandq (%rdi), %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: andd512fold:
; AVX512VL: # BB#0: # %entry
-; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; AVX512VL-NEXT: vpandq (%rdi), %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: andd512fold:
; AVX512BW: # BB#0: # %entry
-; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: andd512fold:
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index afb463d..0e4a88b 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -959,7 +959,7 @@
define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_xor_epi32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
ret < 16 x i32> %res
@@ -981,7 +981,7 @@
define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_or_epi32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
ret < 16 x i32> %res
@@ -1003,7 +1003,7 @@
define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: test_and_epi32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
ret < 16 x i32> %res
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll
index 6e08753..c96c63d 100644
--- a/llvm/test/CodeGen/X86/avx512-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512-logic.ll
@@ -7,7 +7,7 @@
; ALL-LABEL: vpandd:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
@@ -21,7 +21,7 @@
; ALL-LABEL: vpandnd:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT: vpandnd %zmm0, %zmm1, %zmm0
+; ALL-NEXT: vpandnq %zmm0, %zmm1, %zmm0
; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
@@ -37,7 +37,7 @@
; ALL-LABEL: vpord:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT: vpord %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
@@ -51,7 +51,7 @@
; ALL-LABEL: vpxord:
; ALL: ## BB#0: ## %entry
; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
-; ALL-NEXT: vpxord %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
@@ -132,7 +132,7 @@
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
; KNL-LABEL: andd512fold:
; KNL: ## BB#0: ## %entry
-; KNL-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; KNL-NEXT: vpandq (%rdi), %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: andd512fold:
diff --git a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
index 14bdb38..1194f96 100644
--- a/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
+++ b/llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
@@ -1335,7 +1335,7 @@
; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retl
;
; AVX-64-LABEL: f16xi32_i128:
@@ -1369,7 +1369,7 @@
; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0
-; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512F-64-NEXT: retq
%res1 = add <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a
%res2 = and <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1
diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll
index af91d35..4859112 100644
--- a/llvm/test/CodeGen/X86/vector-bitreverse.ll
+++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll
@@ -2051,17 +2051,17 @@
; AVX512F-NEXT: vpslld $4, %zmm1, %zmm1
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: vpsrld $4, %zmm0, %zmm0
-; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
; AVX512F-NEXT: vpslld $2, %zmm1, %zmm1
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: vpsrld $2, %zmm0, %zmm0
-; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
; AVX512F-NEXT: vpslld $1, %zmm1, %zmm1
; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm0
-; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_bitreverse_v16i32:
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
index 2a200d0..a7ef5980 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll
@@ -176,7 +176,7 @@
; AVX512BW-LABEL: testv16i32:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1
-; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
@@ -206,7 +206,7 @@
; AVX512DQ-LABEL: testv16i32:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
@@ -263,7 +263,7 @@
; AVX512BW-LABEL: testv16i32u:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1
-; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1
; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1
@@ -293,7 +293,7 @@
; AVX512DQ-LABEL: testv16i32u:
; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1
-; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1
; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1
diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll
index 9403ea1..bf02f94 100644
--- a/llvm/test/CodeGen/X86/vector-rotate-512.ll
+++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll
@@ -696,7 +696,7 @@
; AVX512-LABEL: splatconstant_rotate_mask_v16i32:
; AVX512: # BB#0:
; AVX512-NEXT: vprold $4, %zmm0, %zmm0
-; AVX512-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
; AVX512-NEXT: retq
%shl = shl <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%lshr = lshr <16 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll
index f22bc95..bbeb905 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-math.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll
@@ -3116,7 +3116,7 @@
;
; AVX512-LABEL: trunc_and_v16i32_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -3830,7 +3830,7 @@
;
; AVX512-LABEL: trunc_xor_v16i32_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -4544,7 +4544,7 @@
;
; AVX512-LABEL: trunc_or_v16i32_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-512.ll b/llvm/test/CodeGen/X86/vector-tzcnt-512.ll
index a604e41..4d38588 100644
--- a/llvm/test/CodeGen/X86/vector-tzcnt-512.ll
+++ b/llvm/test/CodeGen/X86/vector-tzcnt-512.ll
@@ -139,7 +139,7 @@
; AVX512CD: # BB#0:
; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1
-; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512CD-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512CD-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512CD-NEXT: vextracti64x4 $1, %zmm0, %ymm1
@@ -175,7 +175,7 @@
; AVX512CDBW: # BB#0:
; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
-; AVX512CDBW-NEXT: vpandd %zmm2, %zmm0, %zmm0
+; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0
; AVX512CDBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
; AVX512CDBW-NEXT: vpaddd %zmm2, %zmm0, %zmm0
; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -197,7 +197,7 @@
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -219,7 +219,7 @@
; AVX512VPOPCNTDQ: # BB#0:
; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1
-; AVX512VPOPCNTDQ-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0
@@ -233,7 +233,7 @@
; AVX512CD: # BB#0:
; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1
-; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm0
@@ -243,7 +243,7 @@
; AVX512CDBW: # BB#0:
; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm1
-; AVX512CDBW-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CDBW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm0
@@ -253,7 +253,7 @@
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2
-; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -275,7 +275,7 @@
; AVX512VPOPCNTDQ: # BB#0:
; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1
-; AVX512VPOPCNTDQ-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0