[X86] Use vector widening to support sign extend from i1 when the dest type is not 512-bits and vlx is not enabled.

Previously we used a wider element type and truncated. But its more efficient to keep the element type and drop unused elements.

If BWI isn't supported and we have a i16 or i8 type, we'll extend it to be i32 and still use a truncate.

llvm-svn: 319740
diff --git a/llvm/test/CodeGen/X86/avg-mask.ll b/llvm/test/CodeGen/X86/avg-mask.ll
index 4eacbdd..578d7aa 100644
--- a/llvm/test/CodeGen/X86/avg-mask.ll
+++ b/llvm/test/CodeGen/X86/avg-mask.ll
@@ -252,8 +252,8 @@
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    kmovw %edi, %k1
-; AVX512F-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; AVX512F-NEXT:    vpmovqw %zmm1, %xmm1
+; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
 ; AVX512F-NEXT:    vpblendvb %xmm1, %xmm0, %xmm2, %xmm0
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
@@ -280,8 +280,8 @@
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    kmovw %edi, %k1
-; AVX512F-NEXT:    vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; AVX512F-NEXT:    vpmovqw %zmm1, %xmm1
+; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
 ; AVX512F-NEXT:    vpand %xmm0, %xmm1, %xmm0
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq