[X86] Use __builtin_convertvector to replace some of the avx512 truncate builtins.

As long as the destination type is a 256 or 128 bit vector with the same number of elements we can use __builtin_convertvector to directly generate trunc IR instruction which will be handled natively by the backend.

Differential Revision: https://reviews.llvm.org/D46742

llvm-svn: 332266
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e891b6c..24a4e9f 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -7601,24 +7601,23 @@
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_cvtepi32_epi8 (__m512i __A)
 {
-  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
-              (__v16qi) _mm_undefined_si128 (),
-              (__mmask16) -1);
+  return (__m128i)__builtin_convertvector((__v16si)__A, __v16qi);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
 {
-  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
-              (__v16qi) __O, __M);
+  return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M,
+                                             (__v16qi)_mm512_cvtepi32_epi8(__A),
+                                             (__v16qi)__O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
 {
-  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
-              (__v16qi) _mm_setzero_si128 (),
-              __M);
+  return (__m128i)__builtin_ia32_selectb_128((__mmask8)__M,
+                                             (__v16qi)_mm512_cvtepi32_epi8(__A),
+                                             (__v16qi)_mm_setzero_si128());
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -7630,24 +7629,23 @@
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_cvtepi32_epi16 (__m512i __A)
 {
-  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
-              (__v16hi) _mm256_undefined_si256 (),
-              (__mmask16) -1);
+  return (__m256i)__builtin_convertvector((__v16si)__A, __v16hi);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
 {
-  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
-              (__v16hi) __O, __M);
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
+                                             (__v16hi)_mm512_cvtepi32_epi16(__A),
+                                             (__v16hi)__O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
 {
-  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
-              (__v16hi) _mm256_setzero_si256 (),
-              __M);
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
+                                             (__v16hi)_mm512_cvtepi32_epi16(__A),
+                                             (__v16hi)_mm256_setzero_si256());
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -7688,24 +7686,23 @@
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_cvtepi64_epi32 (__m512i __A)
 {
-  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
-              (__v8si) _mm256_undefined_si256 (),
-              (__mmask8) -1);
+  return (__m256i)__builtin_convertvector((__v8di) __A, __v8si);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
 {
-  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
-              (__v8si) __O, __M);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+                                             (__v8si)_mm512_cvtepi64_epi32(__A),
+                                             (__v8si)__O);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
 {
-  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
-              (__v8si) _mm256_setzero_si256 (),
-              __M);
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+                                             (__v8si)_mm512_cvtepi64_epi32(__A),
+                                             (__v8si)_mm256_setzero_si256());
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -7717,24 +7714,23 @@
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_cvtepi64_epi16 (__m512i __A)
 {
-  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
-              (__v8hi) _mm_undefined_si128 (),
-              (__mmask8) -1);
+  return (__m128i)__builtin_convertvector((__v8di)__A, __v8hi);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
 {
-  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
-              (__v8hi) __O, __M);
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
+                                             (__v8hi)_mm512_cvtepi64_epi16(__A),
+                                             (__v8hi)__O);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
 {
-  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
-              (__v8hi) _mm_setzero_si128 (),
-              __M);
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
+                                             (__v8hi)_mm512_cvtepi64_epi16(__A),
+                                             (__v8hi)_mm_setzero_si128());
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS