X86: add more GATHER intrinsics in Clang
Corrected type for index of _mm256_mask_i32gather_pd
from 256-bit to 128-bit
Corrected types for src|dst|mask of _mm256_mask_i64gather_ps
from 256-bit to 128-bit
Support the following intrinsics:
_mm_mask_i32gather_epi64, _mm256_mask_i32gather_epi64,
_mm_mask_i64gather_epi64, _mm256_mask_i64gather_epi64,
_mm_mask_i32gather_epi32, _mm256_mask_i32gather_epi32,
_mm_mask_i64gather_epi32, _mm256_mask_i64gather_epi32
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159403 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c
index c6953b3..86cc80a 100644
--- a/test/CodeGen/avx2-builtins.c
+++ b/test/CodeGen/avx2-builtins.c
@@ -10,6 +10,11 @@
return _mm256_mpsadbw_epu8(x, y, 3);
}
+__m256i test_mm256_sad_epu8(__m256i x, __m256i y) {
+ // CHECK: @llvm.x86.avx2.psad.bw
+ return _mm256_sad_epu8(x, y);
+}
+
__m256i test_mm256_abs_epi8(__m256i a) {
// CHECK: @llvm.x86.avx2.pabs.b
return _mm256_abs_epi8(a);
@@ -787,7 +792,7 @@
return _mm_mask_i32gather_pd(a, b, c, d, 2);
}
-__m256d test_mm256_mask_i32gather_pd(__m256d a, double const *b, __m256i c,
+__m256d test_mm256_mask_i32gather_pd(__m256d a, double const *b, __m128i c,
__m256d d) {
// CHECK: @llvm.x86.avx2.gather.d.pd.256
return _mm256_mask_i32gather_pd(a, b, c, d, 2);
@@ -818,8 +823,50 @@
// CHECK: @llvm.x86.avx2.gather.q.ps
return _mm_mask_i64gather_ps(a, b, c, d, 2);
}
-__m256 test_mm256_mask_i64gather_ps(__m256 a, float const *b, __m256i c,
- __m256 d) {
+__m128 test_mm256_mask_i64gather_ps(__m128 a, float const *b, __m256i c,
+ __m128 d) {
// CHECK: @llvm.x86.avx2.gather.q.ps.256
return _mm256_mask_i64gather_ps(a, b, c, d, 2);
}
+
+__m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c,
+ __m128i d) {
+ // CHECK: @llvm.x86.avx2.gather.d.d
+ return _mm_mask_i32gather_epi32(a, b, c, d, 2);
+}
+__m256i test_mm256_mask_i32gather_epi32(__m256i a, int const *b, __m256i c,
+ __m256i d) {
+ // CHECK: @llvm.x86.avx2.gather.d.d.256
+ return _mm256_mask_i32gather_epi32(a, b, c, d, 2);
+}
+__m128i test_mm_mask_i64gather_epi32(__m128i a, int const *b, __m128i c,
+ __m128i d) {
+ // CHECK: @llvm.x86.avx2.gather.q.d
+ return _mm_mask_i64gather_epi32(a, b, c, d, 2);
+}
+__m128i test_mm256_mask_i64gather_epi32(__m128i a, int const *b, __m256i c,
+ __m128i d) {
+ // CHECK: @llvm.x86.avx2.gather.q.d.256
+ return _mm256_mask_i64gather_epi32(a, b, c, d, 2);
+}
+
+__m128i test_mm_mask_i32gather_epi64(__m128i a, int const *b, __m128i c,
+ __m128i d) {
+ // CHECK: @llvm.x86.avx2.gather.d.q
+ return _mm_mask_i32gather_epi64(a, b, c, d, 2);
+}
+__m256i test_mm256_mask_i32gather_epi64(__m256i a, int const *b, __m128i c,
+ __m256i d) {
+ // CHECK: @llvm.x86.avx2.gather.d.q.256
+ return _mm256_mask_i32gather_epi64(a, b, c, d, 2);
+}
+__m128i test_mm_mask_i64gather_epi64(__m128i a, int const *b, __m128i c,
+ __m128i d) {
+ // CHECK: @llvm.x86.avx2.gather.q.q
+ return _mm_mask_i64gather_epi64(a, b, c, d, 2);
+}
+__m256i test_mm256_mask_i64gather_epi64(__m256i a, int const *b, __m256i c,
+ __m256i d) {
+ // CHECK: @llvm.x86.avx2.gather.q.q.256
+ return _mm256_mask_i64gather_epi64(a, b, c, d, 2);
+}