X86 Tests: Update more isel tests with FastVariableShuffle feature

Summary:
Added the FastVariableShuffle feature to cases that resembled processors
for which this fearure is on.
For AVX2 there are processors with and w/o this fearue enable.
For AVX512 only KNL does enable this feature so cases which only have
+avx512f were left without the FastVariableShuffle enabled.

Reviewers: RKSimon, craig.topper

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D41851

llvm-svn: 322090
diff --git a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
index b49e2ce..00fad6f 100644
--- a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -O2 | FileCheck %s --check-prefix=AVX512
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl -O2 | FileCheck %s --check-prefix=AVX512NOTDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512NOTDQ
 
 define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
 ; AVX512-LABEL: load_v8i1_broadcast_4_v2i1:
@@ -331,8 +331,8 @@
 ; AVX512-NEXT:    kmovd (%rdi), %k0
 ; AVX512-NEXT:    kshiftrd $24, %k0, %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm2
-; AVX512-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
-; AVX512-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
+; AVX512-NEXT:    vpermd %ymm2, %ymm3, %ymm2
 ; AVX512-NEXT:    vpmovd2m %ymm2, %k1
 ; AVX512-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512-NEXT:    vmovaps %ymm1, (%rsi)
@@ -345,8 +345,8 @@
 ; AVX512NOTDQ-NEXT:    kshiftrd $24, %k0, %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
-; AVX512NOTDQ-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
+; AVX512NOTDQ-NEXT:    vpermd %ymm2, %ymm3, %ymm2
 ; AVX512NOTDQ-NEXT:    vptestmd %ymm2, %ymm2, %k1
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm1, (%rsi)
@@ -541,8 +541,8 @@
 ; AVX512-NEXT:    kmovq (%rdi), %k0
 ; AVX512-NEXT:    kshiftrq $56, %k0, %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm2
-; AVX512-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
-; AVX512-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
+; AVX512-NEXT:    vpermd %ymm2, %ymm3, %ymm2
 ; AVX512-NEXT:    vpmovd2m %ymm2, %k1
 ; AVX512-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512-NEXT:    vmovaps %ymm1, (%rsi)
@@ -555,8 +555,8 @@
 ; AVX512NOTDQ-NEXT:    kshiftrq $56, %k0, %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
-; AVX512NOTDQ-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
+; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
+; AVX512NOTDQ-NEXT:    vpermd %ymm2, %ymm3, %ymm2
 ; AVX512NOTDQ-NEXT:    vptestmd %ymm2, %ymm2, %k1
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm0, %ymm1 {%k1}
 ; AVX512NOTDQ-NEXT:    vmovaps %ymm1, (%rsi)
@@ -1134,8 +1134,8 @@
 ; AVX512-NEXT:    kmovd (%rdi), %k0
 ; AVX512-NEXT:    kshiftrd $24, %k0, %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm0
-; AVX512-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
-; AVX512-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
+; AVX512-NEXT:    vpermd %ymm0, %ymm1, %ymm0
 ; AVX512-NEXT:    vpmovd2m %ymm0, %k0
 ; AVX512-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-NEXT:    vzeroupper
@@ -1147,8 +1147,8 @@
 ; AVX512NOTDQ-NEXT:    kshiftrd $24, %k0, %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
-; AVX512NOTDQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
+; AVX512NOTDQ-NEXT:    vpermd %ymm0, %ymm1, %ymm0
 ; AVX512NOTDQ-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512NOTDQ-NEXT:    kmovd %k0, %eax
 ; AVX512NOTDQ-NEXT:    movb %al, (%rsi)
@@ -1369,8 +1369,8 @@
 ; AVX512-NEXT:    kmovq (%rdi), %k0
 ; AVX512-NEXT:    kshiftrq $56, %k0, %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm0
-; AVX512-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
-; AVX512-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
+; AVX512-NEXT:    vpermd %ymm0, %ymm1, %ymm0
 ; AVX512-NEXT:    vpmovd2m %ymm0, %k0
 ; AVX512-NEXT:    kmovb %k0, (%rsi)
 ; AVX512-NEXT:    vzeroupper
@@ -1382,8 +1382,8 @@
 ; AVX512NOTDQ-NEXT:    kshiftrq $56, %k0, %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; AVX512NOTDQ-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
-; AVX512NOTDQ-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
+; AVX512NOTDQ-NEXT:    vpermd %ymm0, %ymm1, %ymm0
 ; AVX512NOTDQ-NEXT:    vptestmd %ymm0, %ymm0, %k0
 ; AVX512NOTDQ-NEXT:    kmovd %k0, %eax
 ; AVX512NOTDQ-NEXT:    movb %al, (%rsi)