X86 Tests: Update more isel tests with FastVariableShuffle feature
Summary:
Added the FastVariableShuffle feature to cases that resembled processors
for which this fearure is on.
For AVX2 there are processors with and w/o this fearue enable.
For AVX512 only KNL does enable this feature so cases which only have
+avx512f were left without the FastVariableShuffle enabled.
Reviewers: RKSimon, craig.topper
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D41851
llvm-svn: 322090
diff --git a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
index b49e2ce..00fad6f 100644
--- a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -O2 | FileCheck %s --check-prefix=AVX512
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl -O2 | FileCheck %s --check-prefix=AVX512NOTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512NOTDQ
define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
; AVX512-LABEL: load_v8i1_broadcast_4_v2i1:
@@ -331,8 +331,8 @@
; AVX512-NEXT: kmovd (%rdi), %k0
; AVX512-NEXT: kshiftrd $24, %k0, %k0
; AVX512-NEXT: vpmovm2d %k0, %ymm2
-; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
-; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
+; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
; AVX512-NEXT: vpmovd2m %ymm2, %k1
; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovaps %ymm1, (%rsi)
@@ -345,8 +345,8 @@
; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
-; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
+; AVX512NOTDQ-NEXT: vpermd %ymm2, %ymm3, %ymm2
; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
@@ -541,8 +541,8 @@
; AVX512-NEXT: kmovq (%rdi), %k0
; AVX512-NEXT: kshiftrq $56, %k0, %k0
; AVX512-NEXT: vpmovm2d %k0, %ymm2
-; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
-; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
+; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
; AVX512-NEXT: vpmovd2m %ymm2, %k1
; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovaps %ymm1, (%rsi)
@@ -555,8 +555,8 @@
; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
-; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
+; AVX512NOTDQ-NEXT: vpermd %ymm2, %ymm3, %ymm2
; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
@@ -1134,8 +1134,8 @@
; AVX512-NEXT: kmovd (%rdi), %k0
; AVX512-NEXT: kshiftrd $24, %k0, %k0
; AVX512-NEXT: vpmovm2d %k0, %ymm0
-; AVX512-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
-; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
+; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpmovd2m %ymm0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: vzeroupper
@@ -1147,8 +1147,8 @@
; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
-; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
+; AVX512NOTDQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
@@ -1369,8 +1369,8 @@
; AVX512-NEXT: kmovq (%rdi), %k0
; AVX512-NEXT: kshiftrq $56, %k0, %k0
; AVX512-NEXT: vpmovm2d %k0, %ymm0
-; AVX512-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
-; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
+; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpmovd2m %ymm0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: vzeroupper
@@ -1382,8 +1382,8 @@
; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
-; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
+; AVX512NOTDQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
; AVX512NOTDQ-NEXT: movb %al, (%rsi)