X86: add more GATHER intrinsics in Clang

Corrected type for index of _mm256_mask_i32gather_pd
  from 256-bit to 128-bit
Corrected types for src|dst|mask of _mm256_mask_i64gather_ps
  from 256-bit to 128-bit

Support the following intrinsics:
  _mm_mask_i32gather_epi64, _mm256_mask_i32gather_epi64,
  _mm_mask_i64gather_epi64, _mm256_mask_i64gather_epi64,
  _mm_mask_i32gather_epi32, _mm256_mask_i32gather_epi32,
  _mm_mask_i64gather_epi32, _mm256_mask_i64gather_epi32


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159403 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c
index c6953b3..86cc80a 100644
--- a/test/CodeGen/avx2-builtins.c
+++ b/test/CodeGen/avx2-builtins.c
@@ -10,6 +10,11 @@
   return _mm256_mpsadbw_epu8(x, y, 3);
 }
 
+__m256i test_mm256_sad_epu8(__m256i x, __m256i y) {
+  // CHECK: @llvm.x86.avx2.psad.bw
+  return _mm256_sad_epu8(x, y);
+}
+
 __m256i test_mm256_abs_epi8(__m256i a) {
   // CHECK: @llvm.x86.avx2.pabs.b
   return _mm256_abs_epi8(a);
@@ -787,7 +792,7 @@
   return _mm_mask_i32gather_pd(a, b, c, d, 2);
 }
 
-__m256d test_mm256_mask_i32gather_pd(__m256d a, double const *b, __m256i c,
+__m256d test_mm256_mask_i32gather_pd(__m256d a, double const *b, __m128i c,
                                       __m256d d) {
   // CHECK: @llvm.x86.avx2.gather.d.pd.256
   return _mm256_mask_i32gather_pd(a, b, c, d, 2);
@@ -818,8 +823,50 @@
   // CHECK: @llvm.x86.avx2.gather.q.ps
   return _mm_mask_i64gather_ps(a, b, c, d, 2);
 }
-__m256 test_mm256_mask_i64gather_ps(__m256 a, float const *b, __m256i c,
-                                     __m256 d) {
+__m128 test_mm256_mask_i64gather_ps(__m128 a, float const *b, __m256i c,
+                                    __m128 d) {
   // CHECK: @llvm.x86.avx2.gather.q.ps.256
   return _mm256_mask_i64gather_ps(a, b, c, d, 2);
 }
+
+__m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c,
+                                     __m128i d) {
+  // CHECK: @llvm.x86.avx2.gather.d.d
+  return _mm_mask_i32gather_epi32(a, b, c, d, 2);
+}
+__m256i test_mm256_mask_i32gather_epi32(__m256i a, int const *b, __m256i c,
+                                        __m256i d) {
+  // CHECK: @llvm.x86.avx2.gather.d.d.256
+  return _mm256_mask_i32gather_epi32(a, b, c, d, 2);
+}
+__m128i test_mm_mask_i64gather_epi32(__m128i a, int const *b, __m128i c,
+                                     __m128i d) {
+  // CHECK: @llvm.x86.avx2.gather.q.d
+  return _mm_mask_i64gather_epi32(a, b, c, d, 2);
+}
+__m128i test_mm256_mask_i64gather_epi32(__m128i a, int const *b, __m256i c,
+                                        __m128i d) {
+  // CHECK: @llvm.x86.avx2.gather.q.d.256
+  return _mm256_mask_i64gather_epi32(a, b, c, d, 2);
+}
+
+__m128i test_mm_mask_i32gather_epi64(__m128i a, int const *b, __m128i c,
+                                     __m128i d) {
+  // CHECK: @llvm.x86.avx2.gather.d.q
+  return _mm_mask_i32gather_epi64(a, b, c, d, 2);
+}
+__m256i test_mm256_mask_i32gather_epi64(__m256i a, int const *b, __m128i c,
+                                        __m256i d) {
+  // CHECK: @llvm.x86.avx2.gather.d.q.256
+  return _mm256_mask_i32gather_epi64(a, b, c, d, 2);
+}
+__m128i test_mm_mask_i64gather_epi64(__m128i a, int const *b, __m128i c,
+                                     __m128i d) {
+  // CHECK: @llvm.x86.avx2.gather.q.q
+  return _mm_mask_i64gather_epi64(a, b, c, d, 2);
+}
+__m256i test_mm256_mask_i64gather_epi64(__m256i a, int const *b, __m256i c,
+                                        __m256i d) {
+  // CHECK: @llvm.x86.avx2.gather.q.q.256
+  return _mm256_mask_i64gather_epi64(a, b, c, d, 2);
+}