[AVX512] Add intrinsics for masked aligned FP loads and stores
Part of <rdar://problem/17688758>
llvm-svn: 226298
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9c80710..4b19590 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -928,6 +928,24 @@
(__mmask8) __U);
}
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
_mm512_loadu_pd(double const *__p)
{
@@ -946,6 +964,24 @@
return ((struct __loadu_ps*)__p)->__v;
}
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_load_ps(double const *__p)
+{
+ return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_load_pd(float const *__p)
+{
+ return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
/* SIMD store ops */
static __inline void __attribute__ ((__always_inline__, __nodebug__))
@@ -988,9 +1024,9 @@
}
static __inline void __attribute__ ((__always_inline__, __nodebug__))
-_mm512_store_ps(void *__P, __m512 __A)
+_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
{
- *(__m512*)__P = __A;
+ __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
}
static __inline void __attribute__ ((__always_inline__, __nodebug__))
@@ -999,6 +1035,19 @@
*(__m512d*)__P = __A;
}
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
+{
+ __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
+ (__mmask16) __U);
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_store_ps(void *__P, __m512 __A)
+{
+ *(__m512*)__P = __A;
+}
+
/* Mask ops */
static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))