Switch some functions from using x86 builtins to using vector 
operations.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@76753 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h
index 72710be..9db6951 100644
--- a/lib/Headers/emmintrin.h
+++ b/lib/Headers/emmintrin.h
@@ -35,7 +35,7 @@
 
 typedef int __v4si __attribute__((__vector_size__(16)));
 typedef short __v8hi __attribute__((__vector_size__(16)));
-typedef char __v16qi __attribute__((__vector_size__(16)));
+typedef signed char __v16qi __attribute__((__vector_size__(16)));
 
 static inline __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_add_sd(__m128d a, __m128d b)
@@ -886,55 +886,55 @@
 static inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_epi8(__m128i a, __m128i b)
 {
-  return (__m128i)__builtin_ia32_pcmpeqb128((__v16qi)a, (__v16qi)b);
+  return (__m128i)((__v16qi)a == (__v16qi)b);
 }
 
 static inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_epi16(__m128i a, __m128i b)
 {
-  return (__m128i)__builtin_ia32_pcmpeqw128((__v8hi)a, (__v8hi)b);
+  return (__m128i)((__v8hi)a == (__v8hi)b);
 }
 
 static inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_epi32(__m128i a, __m128i b)
 {
-  return (__m128i)__builtin_ia32_pcmpeqd128((__v4si)a, (__v4si)b);
+  return (__m128i)((__v4si)a == (__v4si)b);
 }
 
 static inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_epi8(__m128i a, __m128i b)
 {
-  return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)a, (__v16qi)b);
+  return (__m128i)((__v16qi)a > (__v16qi)b);
 }
 
 static inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_epi16(__m128i a, __m128i b)
 {
-  return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)a, (__v8hi)b);
+  return (__m128i)((__v8hi)a > (__v8hi)b);
 }
 
 static inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_epi32(__m128i a, __m128i b)
 {
-  return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)a, (__v4si)b);
+  return (__m128i)((__v4si)a > (__v4si)b);
 }
 
 static inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_epi8(__m128i a, __m128i b)
 {
-  return (__m128i)__builtin_ia32_pcmpgtb128((__v16qi)b, (__v16qi)a);
+  return _mm_cmpgt_epi8(b,a);
 }
 
 static inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_epi16(__m128i a, __m128i b)
 {
-  return (__m128i)__builtin_ia32_pcmpgtw128((__v8hi)b, (__v8hi)a);
+  return _mm_cmpgt_epi16(b,a);
 }
 
 static inline __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_epi32(__m128i a, __m128i b)
 {
-  return (__m128i)__builtin_ia32_pcmpgtd128((__v4si)b, (__v4si)a);
+  return _mm_cmpgt_epi32(b,a);
 }
 
 #ifdef __x86_64__
diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h
index 8ea3c47..e3cbe48 100644
--- a/lib/Headers/mmintrin.h
+++ b/lib/Headers/mmintrin.h
@@ -32,7 +32,7 @@
 
 typedef int __v2si __attribute__((__vector_size__(8)));
 typedef short __v4hi __attribute__((__vector_size__(8)));
-typedef char __v8qi __attribute__((__vector_size__(8)));
+typedef signed char __v8qi __attribute__((__vector_size__(8)));
 
 static inline void __attribute__((__always_inline__, __nodebug__))
 _mm_empty(void)
@@ -348,37 +348,37 @@
 static inline __m64 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
 {
-    return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
+    return (__m64)((__v8qi)__m1 == (__v8qi)__m2);
 }
 
 static inline __m64 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
 {
-    return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
+    return (__m64)((__v4hi)__m1 == (__v4hi)__m2);
 }
 
 static inline __m64 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
 {
-    return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
+    return (__m64)((__v2si)__m1 == (__v2si)__m2);
 }
 
 static inline __m64 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
 {
-    return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
+    return (__m64)((__v8qi)__m1 > (__v8qi)__m2);
 }
 
 static inline __m64 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
 {
-    return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
+    return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
 }
 
 static inline __m64 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
 {
-    return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
+    return (__m64)((__v2si)__m1 > (__v2si)__m2);
 }
 
 static inline __m64 __attribute__((__always_inline__, __nodebug__))