Now that LLVM CodeGen can handle the generic variations a bit better,
get rid of a few more clang vector builtins.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@73015 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h
index 7291f88..2903049 100644
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -464,20 +464,19 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_loadh_pi(__m128 a, __m64 const *p)
{
- return __builtin_ia32_loadhps(a, (__v2si *)p);
+ __m128 b;
+ b[0] = *(float*)p;
+ b[1] = *((float*)p+1);
+ return __builtin_shufflevector(a, b, 0, 1, 4, 5);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_loadl_pi(__m128 a, __m64 const *p)
{
-#if 0
- // FIXME: This should work, but gives really crappy code at the moment
__m128 b;
b[0] = *(float*)p;
b[1] = *((float*)p+1);
- return __builtin_shufflevector(a, b, 0, 1, 4, 5);
-#endif
- return __builtin_ia32_loadlps(a, (__v2si *)p);
+ return __builtin_shufflevector(a, b, 4, 5, 2, 3);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
@@ -899,8 +898,6 @@
(row3) = _mm_movelh_ps(tmp3, tmp1); \
} while (0)
-#include <emmintrin.h>
-
#endif /* __SSE__ */
#endif /* __XMMINTRIN_H */