Merge "Update LLVM-related Mac prebuilts for rebase to r222494."
diff --git a/renderscript/clang-include/Intrin.h b/renderscript/clang-include/Intrin.h
index 13e105e..016c480 100644
--- a/renderscript/clang-include/Intrin.h
+++ b/renderscript/clang-include/Intrin.h
@@ -160,9 +160,6 @@
 void __writemsr(unsigned long, unsigned __int64);
 static __inline__
 void *_AddressOfReturnAddress(void);
-unsigned int _andn_u32(unsigned int, unsigned int);
-unsigned int _bextr_u32(unsigned int, unsigned int, unsigned int);
-unsigned int _bextri_u32(unsigned int, unsigned int);
 static __inline__
 unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
 static __inline__
@@ -175,20 +172,9 @@
 unsigned char _bittestandreset(long *, long);
 static __inline__
 unsigned char _bittestandset(long *, long);
-unsigned int _blcfill_u32(unsigned int);
-unsigned int _blci_u32(unsigned int);
-unsigned int _blcic_u32(unsigned int);
-unsigned int _blcmsk_u32(unsigned int);
-unsigned int _blcs_u32(unsigned int);
-unsigned int _blsfill_u32(unsigned int);
-unsigned int _blsi_u32(unsigned int);
-unsigned int _blsic_u32(unsigned int);
-unsigned int _blsmsk_u32(unsigned int);
-unsigned int _blsr_u32(unsigned int);
 unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
 unsigned long __cdecl _byteswap_ulong(unsigned long);
 unsigned short __cdecl _byteswap_ushort(unsigned short);
-unsigned _bzhi_u32(unsigned int, unsigned int);
 void __cdecl _disable(void);
 void __cdecl _enable(void);
 void __cdecl _fxrstor(void const *);
@@ -266,7 +252,6 @@
 static __inline__
 unsigned long __cdecl _lrotr(unsigned long, int);
 static __inline__
-unsigned int _lzcnt_u32(unsigned int);
 static __inline__
 void _ReadBarrier(void);
 static __inline__
@@ -274,8 +259,6 @@
 static __inline__
 void *_ReturnAddress(void);
 unsigned int _rorx_u32(unsigned int, const unsigned int);
-int __cdecl _rdrand16_step(unsigned short *);
-int __cdecl _rdrand32_step(unsigned int *);
 static __inline__
 unsigned int __cdecl _rotl(unsigned int _Value, int _Shift);
 static __inline__
@@ -301,12 +284,8 @@
 void _Store_HLERelease(long volatile *, long);
 void _Store64_HLERelease(__int64 volatile *, __int64);
 void _StorePointer_HLERelease(void *volatile *, void *);
-unsigned int _t1mskc_u32(unsigned int);
-unsigned int _tzcnt_u32(unsigned int);
-unsigned int _tzmsk_u32(unsigned int);
 static __inline__
 void _WriteBarrier(void);
-void _xabort(const unsigned int imm);
 unsigned __int32 xbegin(void);
 void _xend(void);
 static __inline__
@@ -315,7 +294,6 @@
 void __cdecl _xsave(void *, unsigned __int64);
 void __cdecl _xsaveopt(void *, unsigned __int64);
 void __cdecl _xsetbv(unsigned int, unsigned __int64);
-unsigned char _xtest(void);
 
 /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
 #ifdef __x86_64__
@@ -364,9 +342,6 @@
 void __writegsdword(unsigned long, unsigned long);
 void __writegsqword(unsigned long, unsigned __int64);
 void __writegsword(unsigned long, unsigned short);
-unsigned __int64 _andn_u64(unsigned __int64, unsigned __int64);
-unsigned __int64 _bextr_u64(unsigned __int64, unsigned int, unsigned int);
-unsigned __int64 _bextri_u64(unsigned __int64, unsigned int);
 static __inline__
 unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
 static __inline__
@@ -379,18 +354,7 @@
 unsigned char _bittestandreset64(__int64 *, __int64);
 static __inline__
 unsigned char _bittestandset64(__int64 *, __int64);
-unsigned __int64 _blcfill_u64(unsigned __int64);
-unsigned __int64 _blci_u64(unsigned __int64);
-unsigned __int64 _blcic_u64(unsigned __int64);
-unsigned __int64 _blcmsk_u64(unsigned __int64);
-unsigned __int64 _blcs_u64(unsigned __int64);
-unsigned __int64 _blsfill_u64(unsigned __int64);
-unsigned __int64 _blsi_u64(unsigned __int64);
-unsigned __int64 _blsic_u64(unsigned __int64);
-unsigned __int64 _blsmsk_u64(unsigned __int64);
-unsigned __int64 _blsr_u64(unsigned __int64);
 unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64);
-unsigned __int64 _bzhi_u64(unsigned __int64, unsigned int);
 void __cdecl _fxrstor64(void const *);
 void __cdecl _fxsave64(void *);
 long _InterlockedAnd_np(long volatile *_Value, long _Mask);
@@ -444,13 +408,8 @@
 __int64 _InterlockedXor64_np(__int64 volatile *_Value, __int64 _Mask);
 char _InterlockedXor8_np(char volatile *_Value, char _Mask);
 static __inline__
-unsigned __int64 _lzcnt_u64(unsigned __int64);
 __int64 _mul128(__int64 _Multiplier, __int64 _Multiplicand,
                 __int64 *_HighProduct);
-unsigned int __cdecl _readfsbase_u32(void);
-unsigned __int64 __cdecl _readfsbase_u64(void);
-unsigned int __cdecl _readgsbase_u32(void);
-unsigned __int64 __cdecl _readgsbase_u64(void);
 unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int);
 __int64 _sarx_i64(__int64, unsigned int);
 #if __STDC_HOSTED__
@@ -458,15 +417,9 @@
 #endif
 unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);
 unsigned __int64 shrx_u64(unsigned __int64, unsigned int);
-unsigned __int64 _tzcnt_u64(unsigned __int64);
-unsigned __int64 _tzmsk_u64(unsigned __int64);
 unsigned __int64 _umul128(unsigned __int64 _Multiplier,
                           unsigned __int64 _Multiplicand,
                           unsigned __int64 *_HighProduct);
-void __cdecl _writefsbase_u32(unsigned int);
-void _cdecl _writefsbase_u64(unsigned __int64);
-void __cdecl _writegsbase_u32(unsigned int);
-void __cdecl _writegsbase_u64(unsigned __int64);
 void __cdecl _xrstor64(void const *, unsigned __int64);
 void __cdecl _xsave64(void *, unsigned __int64);
 void __cdecl _xsaveopt64(void *, unsigned __int64);
@@ -545,12 +498,6 @@
   *_Index = 31 - __builtin_clzl(_Mask);
   return 1;
 }
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
-_lzcnt_u32(unsigned int a) {
-  if (!a)
-    return 32;
-  return __builtin_clzl(a);
-}
 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
 __popcnt16(unsigned short value) {
   return __builtin_popcount((int)value);
@@ -608,13 +555,6 @@
   *_Index = 63 - __builtin_clzll(_Mask);
   return 1;
 }
-static
-__inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
-_lzcnt_u64(unsigned __int64 a) {
-  if (!a)
-    return 64;
-  return __builtin_clzll(a);
-}
 static __inline__
 unsigned __int64 __attribute__((__always_inline__, __nodebug__))
  __popcnt64(unsigned __int64 value) {
@@ -861,10 +801,6 @@
 __readfsbyte(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned char, __offset);
 }
-static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
-__readfsdword(unsigned long __offset) {
-  return *__ptr_to_addr_space(257, unsigned long, __offset);
-}
 static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
 __readfsqword(unsigned long __offset) {
   return *__ptr_to_addr_space(257, unsigned __int64, __offset);
diff --git a/renderscript/clang-include/__stddef_max_align_t.h b/renderscript/clang-include/__stddef_max_align_t.h
new file mode 100644
index 0000000..a06f412
--- /dev/null
+++ b/renderscript/clang-include/__stddef_max_align_t.h
@@ -0,0 +1,40 @@
+/*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---===
+ *
+ * Copyright (c) 2014 Chandler Carruth
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_MAX_ALIGN_T_DEFINED
+#define __CLANG_MAX_ALIGN_T_DEFINED
+
+#ifndef _MSC_VER
+typedef struct {
+  long long __clang_max_align_nonce1
+      __attribute__((__aligned__(__alignof__(long long))));
+  long double __clang_max_align_nonce2
+      __attribute__((__aligned__(__alignof__(long double))));
+} max_align_t;
+#else
+typedef double max_align_t;
+#endif
+
+#endif
diff --git a/renderscript/clang-include/adxintrin.h b/renderscript/clang-include/adxintrin.h
new file mode 100644
index 0000000..9db8bcb
--- /dev/null
+++ b/renderscript/clang-include/adxintrin.h
@@ -0,0 +1,83 @@
+/*===---- adxintrin.h - ADX intrinsics -------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <adxintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __ADXINTRIN_H
+#define __ADXINTRIN_H
+
+/* Intrinsics that are available only if __ADX__ defined */
+#ifdef __ADX__
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+_addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
+               unsigned int *__p)
+{
+  return __builtin_ia32_addcarryx_u32(__cf, __x, __y, __p);
+}
+
+#ifdef __x86_64__
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+_addcarryx_u64(unsigned char __cf, unsigned long long __x,
+               unsigned long long __y, unsigned long long  *__p)
+{
+  return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
+}
+#endif
+#endif
+
+/* Intrinsics that are also available if __ADX__ undefined */
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+_addcarry_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
+              unsigned int *__p)
+{
+  return __builtin_ia32_addcarry_u32(__cf, __x, __y, __p);
+}
+
+#ifdef __x86_64__
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+_addcarry_u64(unsigned char __cf, unsigned long long __x,
+              unsigned long long __y, unsigned long long  *__p)
+{
+  return __builtin_ia32_addcarry_u64(__cf, __x, __y, __p);
+}
+#endif
+
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+_subborrow_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
+              unsigned int *__p)
+{
+  return __builtin_ia32_subborrow_u32(__cf, __x, __y, __p);
+}
+
+#ifdef __x86_64__
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__))
+_subborrow_u64(unsigned char __cf, unsigned long long __x,
+               unsigned long long __y, unsigned long long  *__p)
+{
+  return __builtin_ia32_subborrow_u64(__cf, __x, __y, __p);
+}
+#endif
+
+#endif /* __ADXINTRIN_H */
diff --git a/renderscript/clang-include/altivec.h b/renderscript/clang-include/altivec.h
index 7a4a774..eded7b2 100644
--- a/renderscript/clang-include/altivec.h
+++ b/renderscript/clang-include/altivec.h
@@ -1623,6 +1623,21 @@
   return __builtin_altivec_vctuxs(__a, __b);
 }
 
+/* vec_div */
+#ifdef __VSX__
+static vector float __ATTRS_o_ai
+vec_div(vector float __a, vector float __b)
+{
+  return __builtin_vsx_xvdivsp(__a, __b);
+}
+
+static vector double __ATTRS_o_ai
+vec_div(vector double __a, vector double __b)
+{
+  return __builtin_vsx_xvdivdp(__a, __b);
+}
+#endif
+
 /* vec_dss */
 
 static void __attribute__((__always_inline__))
@@ -2253,91 +2268,273 @@
 
 /* vec_lvsl */
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsl(int __a, const signed char *__b)
+{
+  vector unsigned char mask = 
+    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsl(int __a, const signed char *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsl(int __a, const unsigned char *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsl(int __a, const unsigned char *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsl(int __a, const short *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsl(int __a, const short *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsl(int __a, const unsigned short *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsl(int __a, const unsigned short *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsl(int __a, const int *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsl(int __a, const int *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsl(int __a, const unsigned int *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsl(int __a, const unsigned int *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsl(int __a, const float *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsl(int __a, const float *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsl(__a, __b);
 }
+#endif
 
 /* vec_lvsr */
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsr(int __a, const signed char *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsr(int __a, const signed char *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsr(int __a, const unsigned char *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsr(int __a, const unsigned char *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsr(int __a, const short *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsr(int __a, const short *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsr(int __a, const unsigned short *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsr(int __a, const unsigned short *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsr(int __a, const int *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsr(int __a, const int *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsr(int __a, const unsigned int *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsr(int __a, const unsigned int *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
+#endif
 
+#ifdef __LITTLE_ENDIAN__
+static vector unsigned char __ATTRS_o_ai
+__attribute__((deprecated("use assignment for unaligned little endian \
+loads/stores")))
+vec_lvsr(int __a, const float *__b)
+{
+  vector unsigned char mask =
+    (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
+  vector unsigned char reverse = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
+  return vec_perm(mask, mask, reverse);
+}
+#else
 static vector unsigned char __ATTRS_o_ai
 vec_lvsr(int __a, const float *__b)
 {
   return (vector unsigned char)__builtin_altivec_lvsr(__a, __b);
 }
+#endif
 
 /* vec_madd */
 
@@ -2485,9 +2682,21 @@
 static vector float __ATTRS_o_ai
 vec_max(vector float __a, vector float __b)
 {
+#ifdef __VSX__
+  return __builtin_vsx_xvmaxsp(__a, __b);
+#else
   return __builtin_altivec_vmaxfp(__a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_max(vector double __a, vector double __b)
+{
+  return __builtin_vsx_xvmaxdp(__a, __b);
+}
+#endif
+
 /* vec_vmaxsb */
 
 static vector signed char __ATTRS_o_ai
@@ -2613,7 +2822,11 @@
 static vector float __attribute__((__always_inline__))
 vec_vmaxfp(vector float __a, vector float __b)
 {
+#ifdef __VSX__
+  return __builtin_vsx_xvmaxsp(__a, __b);
+#else
   return __builtin_altivec_vmaxfp(__a, __b);
+#endif
 }
 
 /* vec_mergeh */
@@ -3117,9 +3330,21 @@
 static vector float __ATTRS_o_ai
 vec_min(vector float __a, vector float __b)
 {
+#ifdef __VSX__
+  return __builtin_vsx_xvminsp(__a, __b);
+#else
   return __builtin_altivec_vminfp(__a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static vector double __ATTRS_o_ai
+vec_min(vector double __a, vector double __b)
+{
+  return __builtin_vsx_xvmindp(__a, __b);
+}
+#endif
+
 /* vec_vminsb */
 
 static vector signed char __ATTRS_o_ai
@@ -3245,7 +3470,11 @@
 static vector float __attribute__((__always_inline__))
 vec_vminfp(vector float __a, vector float __b)
 {
+#ifdef __VSX__
+  return __builtin_vsx_xvminsp(__a, __b);
+#else
   return __builtin_altivec_vminfp(__a, __b);
+#endif
 }
 
 /* vec_mladd */
@@ -4673,6 +4902,52 @@
 #endif
 }
 
+#ifdef __VSX__
+vector long long __ATTRS_o_ai
+vec_perm(vector long long __a, vector long long __b, vector unsigned char __c)
+{
+#ifdef __LITTLE_ENDIAN__
+  vector unsigned char __d = {255,255,255,255,255,255,255,255,
+                              255,255,255,255,255,255,255,255};
+  __d = vec_xor(__c, __d);
+  return (vector long long)__builtin_altivec_vperm_4si(__b, __a, __d);
+#else
+  return (vector long long)__builtin_altivec_vperm_4si(__a, __b, __c);
+#endif
+}
+
+vector unsigned long long __ATTRS_o_ai
+vec_perm(vector unsigned long long __a, vector unsigned long long __b,
+         vector unsigned char __c)
+{
+#ifdef __LITTLE_ENDIAN__
+  vector unsigned char __d = {255,255,255,255,255,255,255,255,
+                              255,255,255,255,255,255,255,255};
+  __d = vec_xor(__c, __d);
+  return (vector unsigned long long)
+           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+#else
+  return (vector unsigned long long)
+           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+#endif
+}
+
+vector double __ATTRS_o_ai
+vec_perm(vector double __a, vector double __b, vector unsigned char __c)
+{
+#ifdef __LITTLE_ENDIAN__
+  vector unsigned char __d = {255,255,255,255,255,255,255,255,
+                              255,255,255,255,255,255,255,255};
+  __d = vec_xor(__c, __d);
+  return (vector double)
+           __builtin_altivec_vperm_4si((vector int)__b, (vector int)__a, __d);
+#else
+  return (vector double)
+           __builtin_altivec_vperm_4si((vector int)__a, (vector int)__b, __c);
+#endif
+}
+#endif
+
 /* vec_vperm */
 
 static vector signed char __ATTRS_o_ai
@@ -4745,6 +5020,27 @@
   return vec_perm(__a, __b, __c);
 }
 
+#ifdef __VSX__
+static vector long long __ATTRS_o_ai
+vec_vperm(vector long long __a, vector long long __b, vector unsigned char __c)
+{
+  return vec_perm(__a, __b, __c);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vperm(vector unsigned long long __a, vector unsigned long long __b,
+          vector unsigned char __c)
+{
+  return vec_perm(__a, __b, __c);
+}
+
+static vector double __ATTRS_o_ai
+vec_vperm(vector double __a, vector double __b, vector unsigned char __c)
+{
+  return vec_perm(__a, __b, __c);
+}
+#endif
+
 /* vec_re */
 
 static vector float __attribute__((__always_inline__))
@@ -5224,113 +5520,65 @@
 static vector signed char __ATTRS_o_ai
 vec_sld(vector signed char __a, vector signed char __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned char __ATTRS_o_ai
 vec_sld(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector short __ATTRS_o_ai
 vec_sld(vector short __a, vector short __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned short __ATTRS_o_ai
 vec_sld(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector pixel __ATTRS_o_ai
 vec_sld(vector pixel __a, vector pixel __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector int __ATTRS_o_ai
 vec_sld(vector int __a, vector int __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned int __ATTRS_o_ai
 vec_sld(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector float __ATTRS_o_ai
 vec_sld(vector float __a, vector float __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 /* vec_vsldoi */
@@ -5338,113 +5586,65 @@
 static vector signed char __ATTRS_o_ai
 vec_vsldoi(vector signed char __a, vector signed char __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned char __ATTRS_o_ai
 vec_vsldoi(vector unsigned char __a, vector unsigned char __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector short __ATTRS_o_ai
 vec_vsldoi(vector short __a, vector short __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned short __ATTRS_o_ai
 vec_vsldoi(vector unsigned short __a, vector unsigned short __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector pixel __ATTRS_o_ai
 vec_vsldoi(vector pixel __a, vector pixel __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector int __ATTRS_o_ai
 vec_vsldoi(vector int __a, vector int __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector unsigned int __ATTRS_o_ai
 vec_vsldoi(vector unsigned int __a, vector unsigned int __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 static vector float __ATTRS_o_ai
 vec_vsldoi(vector float __a, vector float __b, unsigned char __c)
 {
-#ifdef __LITTLE_ENDIAN__
-  return vec_perm(__a, __b, (vector unsigned char)
-    (__c,   __c-1, __c-2, __c-3, __c-4, __c-5, __c-6, __c-7,
-     __c-8, __c-9, __c-10, __c-11, __c-12, __c-13, __c-14, __c-15));
-#else
   return vec_perm(__a, __b, (vector unsigned char)
     (__c,   __c+1, __c+2,  __c+3,  __c+4,  __c+5,  __c+6,  __c+7,
      __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15));
-#endif
 }
 
 /* vec_sll */
@@ -8504,13 +8704,9 @@
 vec_sums(vector signed int __a, vector signed int __b)
 {
 #ifdef __LITTLE_ENDIAN__
-  __b = (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-	     (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+  __b = (vector signed int)vec_splat(__b, 3);
   __b = __builtin_altivec_vsumsws(__a, __b);
-  return (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-	     (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+  return (vector signed int)(0, 0, 0, __b[0]);
 #else
   return __builtin_altivec_vsumsws(__a, __b);
 #endif
@@ -8522,13 +8718,9 @@
 vec_vsumsws(vector signed int __a, vector signed int __b)
 {
 #ifdef __LITTLE_ENDIAN__
-  __b = (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-	     (12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11));
+  __b = (vector signed int)vec_splat(__b, 3);
   __b = __builtin_altivec_vsumsws(__a, __b);
-  return (vector signed int)
-    vec_perm(__b, __b, (vector unsigned char)
-	     (4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3));
+  return (vector signed int)(0, 0, 0, __b[0]);
 #else
   return __builtin_altivec_vsumsws(__a, __b);
 #endif
@@ -8765,6 +8957,91 @@
 #endif
 }
 
+/* vec_vsx_ld */
+
+#ifdef __VSX__
+
+static vector signed int __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector signed int *__b)
+{
+  return (vector signed int)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static vector unsigned int __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector unsigned int *__b)
+{
+  return (vector unsigned int)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static vector float __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector float *__b)
+{
+  return (vector float)__builtin_vsx_lxvw4x(__a, __b);
+}
+
+static vector signed long long __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector signed long long *__b)
+{
+  return (vector signed long long)__builtin_vsx_lxvd2x(__a, __b);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector unsigned long long *__b)
+{
+  return (vector unsigned long long)__builtin_vsx_lxvd2x(__a, __b);
+}
+
+static vector double __ATTRS_o_ai
+vec_vsx_ld(int __a, const vector double *__b)
+{
+  return (vector double)__builtin_vsx_lxvd2x(__a, __b);
+}
+
+#endif
+
+/* vec_vsx_st */
+
+#ifdef __VSX__
+
+static void __ATTRS_o_ai
+vec_vsx_st(vector signed int __a, int __b, vector signed int *__c)
+{
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static void __ATTRS_o_ai
+vec_vsx_st(vector unsigned int __a, int __b, vector unsigned int *__c)
+{
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static void __ATTRS_o_ai
+vec_vsx_st(vector float __a, int __b, vector float *__c)
+{
+  __builtin_vsx_stxvw4x((vector int)__a, __b, __c);
+}
+
+static void __ATTRS_o_ai
+vec_vsx_st(vector signed long long __a, int __b, vector signed long long *__c)
+{
+  __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
+}
+
+static void __ATTRS_o_ai
+vec_vsx_st(vector unsigned long long __a, int __b,
+           vector unsigned long long *__c)
+{
+  __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
+}
+
+static void __ATTRS_o_ai
+vec_vsx_st(vector double __a, int __b, vector double *__c)
+{
+  __builtin_vsx_stxvd2x((vector double)__a, __b, __c);
+}
+
+#endif
+
 /* vec_xor */
 
 #define __builtin_altivec_vxor vec_xor
diff --git a/renderscript/clang-include/arm_acle.h b/renderscript/clang-include/arm_acle.h
index d706745..814df2c 100644
--- a/renderscript/clang-include/arm_acle.h
+++ b/renderscript/clang-include/arm_acle.h
@@ -34,8 +34,106 @@
 extern "C" {
 #endif
 
-/* Miscellaneous data-processing intrinsics */
+/* 8 SYNCHRONIZATION, BARRIER AND HINT INTRINSICS */
+/* 8.3 Memory barriers */
+#if !defined(_MSC_VER)
+#define __dmb(i) __builtin_arm_dmb(i)
+#define __dsb(i) __builtin_arm_dsb(i)
+#define __isb(i) __builtin_arm_isb(i)
+#endif
 
+/* 8.4 Hints */
+
+#if !defined(_MSC_VER)
+static __inline__ void __attribute__((always_inline, nodebug)) __wfi(void) {
+  __builtin_arm_wfi();
+}
+
+static __inline__ void __attribute__((always_inline, nodebug)) __wfe(void) {
+  __builtin_arm_wfe();
+}
+
+static __inline__ void __attribute__((always_inline, nodebug)) __sev(void) {
+  __builtin_arm_sev();
+}
+
+static __inline__ void __attribute__((always_inline, nodebug)) __sevl(void) {
+  __builtin_arm_sevl();
+}
+
+static __inline__ void __attribute__((always_inline, nodebug)) __yield(void) {
+  __builtin_arm_yield();
+}
+#endif
+
+#if __ARM_32BIT_STATE
+#define __dbg(t) __builtin_arm_dbg(t)
+#endif
+
+/* 8.5 Swap */
+static __inline__ uint32_t __attribute__((always_inline, nodebug))
+  __swp(uint32_t x, volatile uint32_t *p) {
+  uint32_t v;
+  do v = __builtin_arm_ldrex(p); while (__builtin_arm_strex(x, p));
+  return v;
+}
+
+/* 8.6 Memory prefetch intrinsics */
+/* 8.6.1 Data prefetch */
+#define __pld(addr) __pldx(0, 0, 0, addr)
+
+#if __ARM_32BIT_STATE
+#define __pldx(access_kind, cache_level, retention_policy, addr) \
+  __builtin_arm_prefetch(addr, access_kind, 1)
+#else
+#define __pldx(access_kind, cache_level, retention_policy, addr) \
+  __builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
+#endif
+
+/* 8.6.2 Instruction prefetch */
+#define __pli(addr) __plix(0, 0, addr)
+
+#if __ARM_32BIT_STATE
+#define __plix(cache_level, retention_policy, addr) \
+  __builtin_arm_prefetch(addr, 0, 0)
+#else
+#define __plix(cache_level, retention_policy, addr) \
+  __builtin_arm_prefetch(addr, 0, cache_level, retention_policy, 0)
+#endif
+
+/* 8.7 NOP */
+static __inline__ void __attribute__((always_inline, nodebug)) __nop(void) {
+  __builtin_arm_nop();
+}
+
+/* 9 DATA-PROCESSING INTRINSICS */
+/* 9.2 Miscellaneous data-processing intrinsics */
+/* ROR */
+static __inline__ uint32_t __attribute__((always_inline, nodebug))
+  __ror(uint32_t x, uint32_t y) {
+  y %= 32;
+  if (y == 0)  return x;
+  return (x >> y) | (x << (32 - y));
+}
+
+static __inline__ uint64_t __attribute__((always_inline, nodebug))
+  __rorll(uint64_t x, uint32_t y) {
+  y %= 64;
+  if (y == 0)  return x;
+  return (x >> y) | (x << (64 - y));
+}
+
+static __inline__ unsigned long __attribute__((always_inline, nodebug))
+  __rorl(unsigned long x, uint32_t y) {
+#if __SIZEOF_LONG__ == 4
+  return __ror(x, y);
+#else
+  return __rorll(x, y);
+#endif
+}
+
+
+/* CLZ */
 static __inline__ uint32_t __attribute__((always_inline, nodebug))
   __clz(uint32_t t) {
   return __builtin_clz(t);
@@ -48,13 +146,10 @@
 
 static __inline__ uint64_t __attribute__((always_inline, nodebug))
   __clzll(uint64_t t) {
-#if __SIZEOF_LONG_LONG__ == 8
   return __builtin_clzll(t);
-#else
-  return __builtin_clzl(t);
-#endif
 }
 
+/* REV */
 static __inline__ uint32_t __attribute__((always_inline, nodebug))
   __rev(uint32_t t) {
   return __builtin_bswap32(t);
@@ -74,17 +169,67 @@
   return __builtin_bswap64(t);
 }
 
+/* REV16 */
+static __inline__ uint32_t __attribute__((always_inline, nodebug))
+  __rev16(uint32_t t) {
+  return __ror(__rev(t), 16);
+}
+
+static __inline__ unsigned long __attribute__((always_inline, nodebug))
+  __rev16l(unsigned long t) {
+    return __rorl(__revl(t), sizeof(long) / 2);
+}
+
+static __inline__ uint64_t __attribute__((always_inline, nodebug))
+  __rev16ll(uint64_t t) {
+  return __rorll(__revll(t), 32);
+}
+
+/* REVSH */
+static __inline__ int16_t __attribute__((always_inline, nodebug))
+  __revsh(int16_t t) {
+  return __builtin_bswap16(t);
+}
+
+/* RBIT */
+static __inline__ uint32_t __attribute__((always_inline, nodebug))
+  __rbit(uint32_t t) {
+  return __builtin_arm_rbit(t);
+}
+
+static __inline__ uint64_t __attribute__((always_inline, nodebug))
+  __rbitll(uint64_t t) {
+#if __ARM_32BIT_STATE
+  return (((uint64_t) __builtin_arm_rbit(t)) << 32) |
+    __builtin_arm_rbit(t >> 32);
+#else
+  return __builtin_arm_rbit64(t);
+#endif
+}
+
+static __inline__ unsigned long __attribute__((always_inline, nodebug))
+  __rbitl(unsigned long t) {
+#if __SIZEOF_LONG__ == 4
+  return __rbit(t);
+#else
+  return __rbitll(t);
+#endif
+}
 
 /*
- * Saturating intrinsics
+ * 9.4 Saturating intrinsics
  *
  * FIXME: Change guard to their corrosponding __ARM_FEATURE flag when Q flag
  * intrinsics are implemented and the flag is enabled.
  */
+/* 9.4.1 Width-specified saturation intrinsics */
 #if __ARM_32BIT_STATE
 #define __ssat(x, y) __builtin_arm_ssat(x, y)
 #define __usat(x, y) __builtin_arm_usat(x, y)
+#endif
 
+/* 9.4.2 Saturating addition and subtraction intrinsics */
+#if __ARM_32BIT_STATE
 static __inline__ int32_t __attribute__((always_inline, nodebug))
   __qadd(int32_t t, int32_t v) {
   return __builtin_arm_qadd(t, v);
@@ -101,7 +246,7 @@
 }
 #endif
 
-/* CRC32 intrinsics */
+/* 9.7 CRC32 intrinsics */
 #if __ARM_FEATURE_CRC32
 static __inline__ uint32_t __attribute__((always_inline, nodebug))
   __crc32b(uint32_t a, uint8_t b) {
diff --git a/renderscript/clang-include/avx512bwintrin.h b/renderscript/clang-include/avx512bwintrin.h
new file mode 100644
index 0000000..bc4d4ac
--- /dev/null
+++ b/renderscript/clang-include/avx512bwintrin.h
@@ -0,0 +1,60 @@
+/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
+ *
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __AVX512BWINTRIN_H
+#define __AVX512BWINTRIN_H
+
+typedef unsigned int __mmask32;
+typedef unsigned long long __mmask64;
+typedef char __v64qi __attribute__ ((vector_size (64)));
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+
+
+/* Integer compare */
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
+                                                   (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
+                                                   __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
+                                                   (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
+                                                   __u);
+}
+
+#endif
diff --git a/renderscript/clang-include/avx512erintrin.h b/renderscript/clang-include/avx512erintrin.h
new file mode 100644
index 0000000..1a5ea15
--- /dev/null
+++ b/renderscript/clang-include/avx512erintrin.h
@@ -0,0 +1,112 @@
+/*===---- avx512fintrin.h - AVX2 intrinsics -----------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512ERINTRIN_H
+#define __AVX512ERINTRIN_H
+
+
+// rsqrt28
+static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_rsqrt28_round_pd (__m512d __A, int __R)
+{
+  return (__m512d)__builtin_ia32_rsqrt28pd_mask ((__v8df)__A,
+                                                 (__v8df)_mm512_setzero_pd(),
+                                                 (__mmask8)-1,
+                                                 __R);
+}
+static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_rsqrt28_round_ps(__m512 __A, int __R)
+{
+  return (__m512)__builtin_ia32_rsqrt28ps_mask ((__v16sf)__A,
+                                                (__v16sf)_mm512_setzero_ps(),
+                                                (__mmask16)-1,
+                                                __R);
+}
+
+static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R)
+{
+  return (__m128) __builtin_ia32_rsqrt28ss_mask ((__v4sf) __A,
+             (__v4sf) __B,
+             (__v4sf)
+             _mm_setzero_ps (),
+             (__mmask8) -1,
+             __R);
+}
+
+static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
+{
+  return (__m128d) __builtin_ia32_rsqrt28sd_mask ((__v2df) __A,
+              (__v2df) __B,
+              (__v2df)
+              _mm_setzero_pd (),
+              (__mmask8) -1,
+             __R);
+}
+
+
+// rcp28
+static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_rcp28_round_pd (__m512d __A, int __R)
+{
+  return (__m512d)__builtin_ia32_rcp28pd_mask ((__v8df)__A,
+                                               (__v8df)_mm512_setzero_pd(),
+                                               (__mmask8)-1,
+                                               __R);
+}
+
+static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_rcp28_round_ps (__m512 __A, int __R)
+{
+  return (__m512)__builtin_ia32_rcp28ps_mask ((__v16sf)__A,
+                                              (__v16sf)_mm512_setzero_ps (),
+                                              (__mmask16)-1,
+                                              __R);
+}
+
+static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
+{
+  return (__m128) __builtin_ia32_rcp28ss_mask ((__v4sf) __A,
+             (__v4sf) __B,
+             (__v4sf)
+             _mm_setzero_ps (),
+             (__mmask8) -1,
+             __R);
+}
+static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
+{
+  return (__m128d) __builtin_ia32_rcp28sd_mask ((__v2df) __A,
+              (__v2df) __B,
+              (__v2df)
+              _mm_setzero_pd (),
+              (__mmask8) -1,
+             __R);
+}
+
+#endif // __AVX512ERINTRIN_H
diff --git a/renderscript/clang-include/avx512fintrin.h b/renderscript/clang-include/avx512fintrin.h
new file mode 100644
index 0000000..9591dcf
--- /dev/null
+++ b/renderscript/clang-include/avx512fintrin.h
@@ -0,0 +1,1003 @@
+/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512FINTRIN_H
+#define __AVX512FINTRIN_H
+
+typedef double __v8df __attribute__((__vector_size__(64)));
+typedef float __v16sf __attribute__((__vector_size__(64)));
+typedef long long __v8di __attribute__((__vector_size__(64)));
+typedef int __v16si __attribute__((__vector_size__(64)));
+
+typedef float __m512 __attribute__((__vector_size__(64)));
+typedef double __m512d __attribute__((__vector_size__(64)));
+typedef long long __m512i __attribute__((__vector_size__(64)));
+
+typedef unsigned char __mmask8;
+typedef unsigned short __mmask16;
+
+/* Rounding mode macros.  */
+#define _MM_FROUND_TO_NEAREST_INT   0x00
+#define _MM_FROUND_TO_NEG_INF       0x01
+#define _MM_FROUND_TO_POS_INF       0x02
+#define _MM_FROUND_TO_ZERO          0x03
+#define _MM_FROUND_CUR_DIRECTION    0x04
+
+/* Create vectors with repeated elements */
+
+static  __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_setzero_si512(void)
+{
+  return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
+{
+  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
+                 (__v16si)
+                 _mm512_setzero_si512 (),
+                 __M);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
+{
+#ifdef __x86_64__
+  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
+                 (__v8di)
+                 _mm512_setzero_si512 (),
+                 __M);
+#else
+  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
+                 (__v8di)
+                 _mm512_setzero_si512 (),
+                 __M);
+#endif
+}
+
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_setzero_ps(void)
+{
+  return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+}
+static  __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_setzero_pd(void)
+{
+  return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
+}
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_set1_ps(float __w)
+{
+  return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
+                   __w, __w, __w, __w, __w, __w, __w, __w  };
+}
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_set1_pd(double __w)
+{
+  return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
+}
+
+static __inline __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_set1_epi32(int __s)
+{
+  return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
+                             __s, __s, __s, __s, __s, __s, __s, __s };
+}
+
+static __inline __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_set1_epi64(long long __d)
+{
+  return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
+}
+
+static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_broadcastss_ps(__m128 __X)
+{
+  float __f = __X[0];
+  return (__v16sf){ __f, __f, __f, __f,
+                    __f, __f, __f, __f,
+                    __f, __f, __f, __f,
+                    __f, __f, __f, __f };
+}
+
+static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_broadcastsd_pd(__m128d __X)
+{
+  double __d = __X[0];
+  return (__v8df){ __d, __d, __d, __d,
+                   __d, __d, __d, __d };
+}
+
+/* Cast between vector types */
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_castpd256_pd512(__m256d __a)
+{
+  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
+}
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_castps256_ps512(__m256 __a)
+{
+  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
+                                          -1, -1, -1, -1, -1, -1, -1, -1);
+}
+
+static __inline __m128d __attribute__((__always_inline__, __nodebug__))
+_mm512_castpd512_pd128(__m512d __a)
+{
+  return __builtin_shufflevector(__a, __a, 0, 1);
+}
+
+static __inline __m128 __attribute__((__always_inline__, __nodebug__))
+_mm512_castps512_ps128(__m512 __a)
+{
+  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
+}
+
+/* Arithmetic */
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_add_pd(__m512d __a, __m512d __b)
+{
+  return __a + __b;
+}
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_add_ps(__m512 __a, __m512 __b)
+{
+  return __a + __b;
+}
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_mul_pd(__m512d __a, __m512d __b)
+{
+  return __a * __b;
+}
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_mul_ps(__m512 __a, __m512 __b)
+{
+  return __a * __b;
+}
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_sub_pd(__m512d __a, __m512d __b)
+{
+  return __a - __b;
+}
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_sub_ps(__m512 __a, __m512 __b)
+{
+  return __a - __b;
+}
+
+static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_max_pd(__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df)
+             _mm512_setzero_pd (),
+             (__mmask8) -1,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_max_ps(__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf)
+            _mm512_setzero_ps (),
+            (__mmask16) -1,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m512i
+__attribute__ ((__always_inline__, __nodebug__))
+_mm512_max_epi32(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_max_epu32(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_max_epi64(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_max_epu64(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) -1);
+}
+
+static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_min_pd(__m512d __A, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df)
+             _mm512_setzero_pd (),
+             (__mmask8) -1,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_min_ps(__m512 __A, __m512 __B)
+{
+  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf)
+            _mm512_setzero_ps (),
+            (__mmask16) -1,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m512i
+__attribute__ ((__always_inline__, __nodebug__))
+_mm512_min_epi32(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_min_epu32(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
+              (__v16si) __B,
+              (__v16si)
+              _mm512_setzero_si512 (),
+              (__mmask16) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_min_epi64(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_min_epu64(__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
+              (__v8di) __B,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mul_epi32(__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
+              (__v16si) __Y,
+              (__v8di)
+              _mm512_setzero_si512 (),
+              (__mmask8) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mul_epu32(__m512i __X, __m512i __Y)
+{
+  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
+               (__v16si) __Y,
+               (__v8di)
+               _mm512_setzero_si512 (),
+               (__mmask8) -1);
+}
+
+static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_sqrt_pd(__m512d a)
+{
+  return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
+                                                (__v8df) _mm512_setzero_pd (),
+                                                (__mmask8) -1,
+                                                _MM_FROUND_CUR_DIRECTION);
+}
+
+static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_sqrt_ps(__m512 a)
+{
+  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
+                                               (__v16sf) _mm512_setzero_ps (),
+                                               (__mmask16) -1,
+                                               _MM_FROUND_CUR_DIRECTION);
+}
+
+static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_rsqrt14_pd(__m512d __A)
+{
+  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
+                 (__v8df)
+                 _mm512_setzero_pd (),
+                 (__mmask8) -1);}
+
+static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_rsqrt14_ps(__m512 __A)
+{
+  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
+                (__v16sf)
+                _mm512_setzero_ps (),
+                (__mmask16) -1);
+}
+
+static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_rsqrt14_ss(__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+             (__v4sf) __B,
+             (__v4sf)
+             _mm_setzero_ps (),
+             (__mmask8) -1);
+}
+
+static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_rsqrt14_sd(__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
+              (__v2df) __B,
+              (__v2df)
+              _mm_setzero_pd (),
+              (__mmask8) -1);
+}
+
+static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_rcp14_pd(__m512d __A)
+{
+  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
+               (__v8df)
+               _mm512_setzero_pd (),
+               (__mmask8) -1);
+}
+
+static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_rcp14_ps(__m512 __A)
+{
+  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
+              (__v16sf)
+              _mm512_setzero_ps (),
+              (__mmask16) -1);
+}
+static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
+_mm_rcp14_ss(__m128 __A, __m128 __B)
+{
+  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+                 (__v4sf) __B,
+                 (__v4sf)
+                 _mm_setzero_ps (),
+                 (__mmask8) -1);
+}
+
+static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
+_mm_rcp14_sd(__m128d __A, __m128d __B)
+{
+  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
+            (__v2df) __B,
+            (__v2df)
+            _mm_setzero_pd (),
+            (__mmask8) -1);
+}
+
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_floor_ps(__m512 __A)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+                                                  _MM_FROUND_FLOOR,
+                                                  (__v16sf) __A, -1,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_floor_pd(__m512d __A)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+                                                   _MM_FROUND_FLOOR,
+                                                   (__v8df) __A, -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_ceil_ps(__m512 __A)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
+                                                  _MM_FROUND_CEIL,
+                                                  (__v16sf) __A, -1,
+                                                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_ceil_pd(__m512d __A)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
+                                                   _MM_FROUND_CEIL,
+                                                   (__v8df) __A, -1,
+                                                   _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
+_mm512_abs_epi64(__m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
+             (__v8di)
+             _mm512_setzero_si512 (),
+             (__mmask8) -1);
+}
+
+static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
+_mm512_abs_epi32(__m512i __A)
+{
+  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
+             (__v16si)
+             _mm512_setzero_si512 (),
+             (__mmask16) -1);
+}
+
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_roundscale_ps(__m512 __A, const int __imm)
+{
+  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
+              (__v16sf) __A, -1,
+              _MM_FROUND_CUR_DIRECTION);
+}
+static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_roundscale_pd(__m512d __A, const int __imm)
+{
+  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
+               (__v8df) __A, -1,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d)
+    __builtin_ia32_vfmaddpd512_mask(__A,
+                                    __B,
+                                    __C,
+                                    (__mmask8) -1,
+                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d)
+    __builtin_ia32_vfmsubpd512_mask(__A,
+                                    __B,
+                                    __C,
+                                    (__mmask8) -1,
+                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
+{
+  return (__m512d)
+    __builtin_ia32_vfnmaddpd512_mask(__A,
+                                     __B,
+                                     __C,
+                                     (__mmask8) -1,
+                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512)
+    __builtin_ia32_vfmaddps512_mask(__A,
+                                    __B,
+                                    __C,
+                                    (__mmask16) -1,
+                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512)
+    __builtin_ia32_vfmsubps512_mask(__A,
+                                    __B,
+                                    __C,
+                                    (__mmask16) -1,
+                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
+{
+  return (__m512)
+    __builtin_ia32_vfnmaddps512_mask(__A,
+                                     __B,
+                                     __C,
+                                     (__mmask16) -1,
+                                     _MM_FROUND_CUR_DIRECTION);
+}
+
+/* Vector permutations */
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
+                                                       /* idx */ ,
+                                                       (__v16si) __A,
+                                                       (__v16si) __B,
+                                                       (__mmask16) -1);
+}
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
+                                                       /* idx */ ,
+                                                       (__v8di) __A,
+                                                       (__v8di) __B,
+                                                       (__mmask8) -1);
+}
+
+static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
+{
+  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
+                                                        /* idx */ ,
+                                                        (__v8df) __A,
+                                                        (__v8df) __B,
+                                                        (__mmask8) -1);
+}
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
+{
+  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
+                                                       /* idx */ ,
+                                                       (__v16sf) __A,
+                                                       (__v16sf) __B,
+                                                       (__mmask16) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I)
+{
+  return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
+                                                 (__v8di)__B,
+                                                 __I,
+                                                 (__v8di)_mm512_setzero_si512(),
+                                                 (__mmask8) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I)
+{
+  return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
+                                                (__v16si)__B,
+                                                __I,
+                                                (__v16si)_mm512_setzero_si512(),
+                                                (__mmask16) -1);
+}
+
+/* Vector Blend */
+
+static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
+{
+  return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
+                 (__v8df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
+{
+  return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
+                (__v16sf) __W,
+                (__mmask16) __U);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
+{
+  return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
+                (__v8di) __W,
+                (__mmask8) __U);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
+{
+  return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
+                (__v16si) __W,
+                (__mmask16) __U);
+}
+
+/* Compare */
+
+static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p)
+{
+  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
+               (__v16sf) b, p, (__mmask16) -1,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__)) 
+_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P)
+{
+  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
+              (__v8df) __Y, __P,
+              (__mmask8) -1,
+              _MM_FROUND_CUR_DIRECTION);
+}
+
+/* Conversion */
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cvttps_epu32(__m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
+                  (__v16si)
+                  _mm512_setzero_si512 (),
+                  (__mmask16) -1,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
+_mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
+{
+  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
+               (__v16sf)
+               _mm512_setzero_ps (),
+               (__mmask16) -1,
+               __R);
+}
+
+static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
+_mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
+{
+  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
+               (__v16sf)
+               _mm512_setzero_ps (),
+               (__mmask16) -1,
+               __R);
+}
+
+static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
+_mm512_cvtepi32_pd(__m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
+                (__v8df)
+                _mm512_setzero_pd (),
+                (__mmask8) -1);
+}
+
+static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
+_mm512_cvtepu32_pd(__m256i __A)
+{
+  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
+                (__v8df)
+                _mm512_setzero_pd (),
+                (__mmask8) -1);
+}
+static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
+_mm512_cvt_roundpd_ps(__m512d __A, const int __R)
+{
+  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
+               (__v8sf)
+               _mm256_setzero_ps (),
+               (__mmask8) -1,
+               __R);
+}
+
+static  __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cvtps_ph(__m512 __A, const int __I)
+{
+  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
+                 __I,
+                 (__v16hi)
+                 _mm256_setzero_si256 (),
+                 -1);
+}
+
+static  __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cvtph_ps(__m256i __A)
+{
+  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
+                (__v16sf)
+                _mm512_setzero_ps (),
+                (__mmask16) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m512i __attribute__((__always_inline__, __nodebug__))
+_mm512_cvttps_epi32(__m512 a)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
+                                     (__v16si) _mm512_setzero_si512 (),
+                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m256i __attribute__((__always_inline__, __nodebug__))
+_mm512_cvttpd_epi32(__m512d a)
+{
+  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
+                                                   (__v8si)_mm256_setzero_si256(),
+                                                   (__mmask8) -1,
+                                                    _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
+                 (__v8si)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) -1,
+                 __R);
+}
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
+                 (__v16si)
+                 _mm512_setzero_si512 (),
+                 (__mmask16) -1,
+                 __R);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cvt_roundps_epi32(__m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) -1,
+                __R);
+}
+static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
+                (__v8si)
+                _mm256_setzero_si256 (),
+                (__mmask8) -1,
+                __R);
+}
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cvt_roundps_epu32(__m512 __A, const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
+                (__v16si)
+                _mm512_setzero_si512 (),
+                (__mmask16) -1,
+                __R);
+}
+static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
+{
+  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
+                (__v8si)
+                _mm256_setzero_si256 (),
+                (__mmask8) -1,
+                __R);
+}
+
+/* Bit Test */
+
+static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_test_epi32_mask(__m512i __A, __m512i __B)
+{
+  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
+            (__v16si) __B,
+            (__mmask16) -1);
+}
+
+static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_test_epi64_mask(__m512i __A, __m512i __B)
+{
+  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
+                 (__v8di) __B,
+                 (__mmask8) -1);
+}
+
+/* SIMD load ops */
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P,
+                                                     (__v16si)
+                                                     _mm512_setzero_si512 (),
+                                                     (__mmask16) __U);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
+{
+  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P,
+                                                     (__v8di)
+                                                     _mm512_setzero_si512 (),
+                                                     (__mmask8) __U);
+}
+
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P,
+                                                  (__v16sf)
+                                                  _mm512_setzero_ps (),
+                                                  (__mmask16) __U);
+}
+
+static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P,
+                                                   (__v8df)
+                                                   _mm512_setzero_pd (),
+                                                   (__mmask8) __U);
+}
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_loadu_pd(double const *__p)
+{
+  struct __loadu_pd {
+    __m512d __v;
+  } __attribute__((packed, may_alias));
+  return ((struct __loadu_pd*)__p)->__v;
+}
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_loadu_ps(float const *__p)
+{
+  struct __loadu_ps {
+    __m512 __v;
+  } __attribute__((packed, may_alias));
+  return ((struct __loadu_ps*)__p)->__v;
+}
+
+/* SIMD store ops */
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
+{
+  __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A,
+                                     (__mmask8) __U);
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
+{
+  __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A,
+                                     (__mmask16) __U);
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
+{
+  __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_storeu_pd(void *__P, __m512d __A)
+{
+  __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
+{
+  __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A,
+                                   (__mmask16) __U);
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_storeu_ps(void *__P, __m512 __A)
+{
+  __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_store_ps(void *__P, __m512 __A)
+{
+  *(__m512*)__P = __A;
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_store_pd(void *__P, __m512d __A)
+{
+  *(__m512d*)__P = __A;
+}
+
+/* Mask ops */
+
+static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_knot(__mmask16 __M)
+{
+  return __builtin_ia32_knothi(__M);
+}
+
+/* Integer compare */
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
+                                                   (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
+                                                   __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
+                                                  __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
+                                                  (__mmask8)-1);
+}
+
+#endif // __AVX512FINTRIN_H
diff --git a/renderscript/clang-include/avx512vlbwintrin.h b/renderscript/clang-include/avx512vlbwintrin.h
new file mode 100644
index 0000000..11333f8
--- /dev/null
+++ b/renderscript/clang-include/avx512vlbwintrin.h
@@ -0,0 +1,83 @@
+/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ----------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VLBWINTRIN_H
+#define __AVX512VLBWINTRIN_H
+
+/* Integer compare */
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epi8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b,
+                                                   (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_pcmpeqb128_mask((__v16qi)__a, (__v16qi)__b,
+                                                   __u);
+}
+
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b,
+                                                   (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_pcmpeqb256_mask((__v32qi)__a, (__v32qi)__b,
+                                                   __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
+                                                  __u);
+}
+
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b,
+                                                   (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_pcmpeqw256_mask((__v16hi)__a, (__v16hi)__b,
+                                                   __u);
+}
+
+#endif /* __AVX512VLBWINTRIN_H */
diff --git a/renderscript/clang-include/avx512vlintrin.h b/renderscript/clang-include/avx512vlintrin.h
new file mode 100644
index 0000000..8a374b1
--- /dev/null
+++ b/renderscript/clang-include/avx512vlintrin.h
@@ -0,0 +1,83 @@
+/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512VLINTRIN_H
+#define __AVX512VLINTRIN_H
+
+/* Integer compare */
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
+                                                  __u);
+}
+
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
+                                                  __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
+                                                  __u);
+}
+
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
+                                                  __u);
+}
+
+#endif /* __AVX512VLINTRIN_H */
diff --git a/renderscript/clang-include/bmiintrin.h b/renderscript/clang-include/bmiintrin.h
index 43c4a5e..0e5fd55 100644
--- a/renderscript/clang-include/bmiintrin.h
+++ b/renderscript/clang-include/bmiintrin.h
@@ -43,7 +43,7 @@
 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
 __tzcnt_u16(unsigned short __X)
 {
-  return __builtin_ctzs(__X);
+  return __X ? __builtin_ctzs(__X) : 16;
 }
 
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
@@ -87,7 +87,7 @@
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
 __tzcnt_u32(unsigned int __X)
 {
-  return __builtin_ctz(__X);
+  return __X ? __builtin_ctz(__X) : 32;
 }
 
 #ifdef __x86_64__
@@ -140,7 +140,7 @@
 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
 __tzcnt_u64(unsigned long long __X)
 {
-  return __builtin_ctzll(__X);
+  return __X ? __builtin_ctzll(__X) : 64;
 }
 
 #endif /* __x86_64__ */
diff --git a/renderscript/clang-include/cpuid.h b/renderscript/clang-include/cpuid.h
index f9254e9..5da02e0 100644
--- a/renderscript/clang-include/cpuid.h
+++ b/renderscript/clang-include/cpuid.h
@@ -25,6 +25,60 @@
 #error this header is for x86 only
 #endif
 
+/* Responses identification request with %eax 0 */
+/* AMD:     "AuthenticAMD" */
+#define signature_AMD_ebx 0x68747541
+#define signature_AMD_edx 0x69746e65
+#define signature_AMD_ecx 0x444d4163
+/* CENTAUR: "CentaurHauls" */
+#define signature_CENTAUR_ebx 0x746e6543
+#define signature_CENTAUR_edx 0x48727561
+#define signature_CENTAUR_ecx 0x736c7561
+/* CYRIX:   "CyrixInstead" */
+#define signature_CYRIX_ebx 0x69727943
+#define signature_CYRIX_edx 0x736e4978
+#define signature_CYRIX_ecx 0x64616574
+/* INTEL:   "GenuineIntel" */
+#define signature_INTEL_ebx 0x756e6547
+#define signature_INTEL_edx 0x49656e69
+#define signature_INTEL_ecx 0x6c65746e
+/* TM1:     "TransmetaCPU" */
+#define signature_TM1_ebx 0x6e617254
+#define signature_TM1_edx 0x74656d73
+#define signature_TM1_ecx 0x55504361
+/* TM2:     "GenuineTMx86" */
+#define signature_TM2_ebx 0x756e6547
+#define signature_TM2_edx 0x54656e69
+#define signature_TM2_ecx 0x3638784d
+/* NSC:     "Geode by NSC" */
+#define signature_NSC_ebx 0x646f6547
+#define signature_NSC_edx 0x43534e20
+#define signature_NSC_ecx 0x79622065
+/* NEXGEN:  "NexGenDriven" */
+#define signature_NEXGEN_ebx 0x4778654e
+#define signature_NEXGEN_edx 0x72446e65
+#define signature_NEXGEN_ecx 0x6e657669
+/* RISE:    "RiseRiseRise" */
+#define signature_RISE_ebx 0x65736952
+#define signature_RISE_edx 0x65736952
+#define signature_RISE_ecx 0x65736952
+/* SIS:     "SiS SiS SiS " */
+#define signature_SIS_ebx 0x20536953
+#define signature_SIS_edx 0x20536953
+#define signature_SIS_ecx 0x20536953
+/* UMC:     "UMC UMC UMC " */
+#define signature_UMC_ebx 0x20434d55
+#define signature_UMC_edx 0x20434d55
+#define signature_UMC_ecx 0x20434d55
+/* VIA:     "VIA VIA VIA " */
+#define signature_VIA_ebx 0x20414956
+#define signature_VIA_edx 0x20414956
+#define signature_VIA_ecx 0x20414956
+/* VORTEX:  "Vortex86 SoC" */
+#define signature_VORTEX_ebx 0x74726f56
+#define signature_VORTEX_edx 0x36387865
+#define signature_VORTEX_ecx 0x436f5320
+
 /* Features in %ecx for level 1 */
 #define bit_SSE3        0x00000001
 #define bit_PCLMULQDQ   0x00000002
@@ -53,7 +107,7 @@
 #define bit_XSAVE       0x04000000
 #define bit_OSXSAVE     0x08000000
 #define bit_AVX         0x10000000
-#define bit_RDRAND      0x40000000
+#define bit_RDRND       0x40000000
 
 /* Features in %edx for level 1 */
 #define bit_FPU         0x00000001
@@ -92,31 +146,29 @@
 #define bit_SMEP        0x00000080
 #define bit_ENH_MOVSB   0x00000200
 
-/* PIC on i386 uses %ebx, so preserve it. */
 #if __i386__
 #define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
-    __asm("  pushl  %%ebx\n" \
-          "  cpuid\n" \
-          "  mov    %%ebx,%1\n" \
-          "  popl   %%ebx" \
-        : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
-        : "0"(__level))
-
-#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
-    __asm("  pushl  %%ebx\n" \
-          "  cpuid\n" \
-          "  mov    %%ebx,%1\n" \
-          "  popl   %%ebx" \
-        : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
-        : "0"(__level), "2"(__count))
-#else
-#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
     __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
                   : "0"(__level))
 
 #define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
     __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
                   : "0"(__level), "2"(__count))
+#else
+/* x86-64 uses %rbx as the base register, so preserve it. */
+#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
+    __asm("  xchgq  %%rbx,%q1\n" \
+          "  cpuid\n" \
+          "  xchgq  %%rbx,%q1" \
+        : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
+        : "0"(__level))
+
+#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
+    __asm("  xchgq  %%rbx,%q1\n" \
+          "  cpuid\n" \
+          "  xchgq  %%rbx,%q1" \
+        : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \
+        : "0"(__level), "2"(__count))
 #endif
 
 static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,
diff --git a/renderscript/clang-include/float.h b/renderscript/clang-include/float.h
index 02ef6bf..238cf76 100644
--- a/renderscript/clang-include/float.h
+++ b/renderscript/clang-include/float.h
@@ -28,7 +28,7 @@
  * additional definitions provided for Windows.
  * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx
  */
-#if (defined(__MINGW32__) || defined(_MSC_VER)) && \
+#if (defined(__MINGW32__) || defined(_MSC_VER)) && __STDC_HOSTED__ && \
     __has_include_next(<float.h>)
 #  include_next <float.h>
 
diff --git a/renderscript/clang-include/immintrin.h b/renderscript/clang-include/immintrin.h
index df4bea8..2400fea 100644
--- a/renderscript/clang-include/immintrin.h
+++ b/renderscript/clang-include/immintrin.h
@@ -76,6 +76,26 @@
 #include <fmaintrin.h>
 #endif
 
+#ifdef __AVX512F__
+#include <avx512fintrin.h>
+#endif
+
+#ifdef __AVX512VL__
+#include <avx512vlintrin.h>
+#endif
+
+#ifdef __AVX512BW__
+#include <avx512bwintrin.h>
+#endif
+
+#if defined (__AVX512VL__) && defined (__AVX512BW__)
+#include <avx512vlbwintrin.h>
+#endif
+
+#ifdef __AVX512ER__
+#include <avx512erintrin.h>
+#endif
+
 #ifdef __RDRND__
 static __inline__ int __attribute__((__always_inline__, __nodebug__))
 _rdrand16_step(unsigned short *__p)
@@ -98,6 +118,58 @@
 #endif
 #endif /* __RDRND__ */
 
+#ifdef __FSGSBASE__
+#ifdef __x86_64__
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+_readfsbase_u32(void)
+{
+  return __builtin_ia32_rdfsbase32();
+}
+
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+_readfsbase_u64(void)
+{
+  return __builtin_ia32_rdfsbase64();
+}
+
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+_readgsbase_u32(void)
+{
+  return __builtin_ia32_rdgsbase32();
+}
+
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+_readgsbase_u64(void)
+{
+  return __builtin_ia32_rdgsbase64();
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_writefsbase_u32(unsigned int __V)
+{
+  return __builtin_ia32_wrfsbase32(__V);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_writefsbase_u64(unsigned long long __V)
+{
+  return __builtin_ia32_wrfsbase64(__V);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_writegsbase_u32(unsigned int __V)
+{
+  return __builtin_ia32_wrgsbase32(__V);
+}
+
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_writegsbase_u64(unsigned long long __V)
+{
+  return __builtin_ia32_wrgsbase64(__V);
+}
+#endif
+#endif /* __FSGSBASE__ */
+
 #ifdef __RTM__
 #include <rtmintrin.h>
 #endif
@@ -115,4 +187,8 @@
 #include <shaintrin.h>
 #endif
 
+/* Some intrinsics inside adxintrin.h are available only if __ADX__ defined,
+ * whereas others are also available if __ADX__ undefined */
+#include <adxintrin.h>
+
 #endif /* __IMMINTRIN_H */
diff --git a/renderscript/clang-include/lzcntintrin.h b/renderscript/clang-include/lzcntintrin.h
index 62ab5ca..35d6659 100644
--- a/renderscript/clang-include/lzcntintrin.h
+++ b/renderscript/clang-include/lzcntintrin.h
@@ -35,20 +35,32 @@
 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
 __lzcnt16(unsigned short __X)
 {
-  return __builtin_clzs(__X);
+  return __X ? __builtin_clzs(__X) : 16;
 }
 
 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
 __lzcnt32(unsigned int __X)
 {
-  return __builtin_clz(__X);
+  return __X ? __builtin_clz(__X) : 32;
+}
+
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+_lzcnt_u32(unsigned int __X)
+{
+  return __X ? __builtin_clz(__X) : 32;
 }
 
 #ifdef __x86_64__
 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
 __lzcnt64(unsigned long long __X)
 {
-  return __builtin_clzll(__X);
+  return __X ? __builtin_clzll(__X) : 64;
+}
+
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+_lzcnt_u64(unsigned long long __X)
+{
+  return __X ? __builtin_clzll(__X) : 64;
 }
 #endif
 
diff --git a/renderscript/clang-include/stdatomic.h b/renderscript/clang-include/stdatomic.h
new file mode 100644
index 0000000..e3c3476
--- /dev/null
+++ b/renderscript/clang-include/stdatomic.h
@@ -0,0 +1,190 @@
+/*===---- stdatomic.h - Standard header for atomic types and operations -----===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __CLANG_STDATOMIC_H
+#define __CLANG_STDATOMIC_H
+
+/* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for
+ * example, already has a Clang-compatible stdatomic.h header.
+ */
+#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>)
+# include_next <stdatomic.h>
+#else
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* 7.17.1 Introduction */
+
+#define ATOMIC_BOOL_LOCK_FREE       __GCC_ATOMIC_BOOL_LOCK_FREE
+#define ATOMIC_CHAR_LOCK_FREE       __GCC_ATOMIC_CHAR_LOCK_FREE
+#define ATOMIC_CHAR16_T_LOCK_FREE   __GCC_ATOMIC_CHAR16_T_LOCK_FREE
+#define ATOMIC_CHAR32_T_LOCK_FREE   __GCC_ATOMIC_CHAR32_T_LOCK_FREE
+#define ATOMIC_WCHAR_T_LOCK_FREE    __GCC_ATOMIC_WCHAR_T_LOCK_FREE
+#define ATOMIC_SHORT_T_LOCK_FREE    __GCC_ATOMIC_SHORT_T_LOCK_FREE
+#define ATOMIC_INT_T_LOCK_FREE      __GCC_ATOMIC_INT_T_LOCK_FREE
+#define ATOMIC_LONG_T_LOCK_FREE     __GCC_ATOMIC_LONG_T_LOCK_FREE
+#define ATOMIC_LLONG_T_LOCK_FREE    __GCC_ATOMIC_LLONG_T_LOCK_FREE
+#define ATOMIC_POINTER_T_LOCK_FREE  __GCC_ATOMIC_POINTER_T_LOCK_FREE
+
+/* 7.17.2 Initialization */
+
+#define ATOMIC_VAR_INIT(value) (value)
+#define atomic_init __c11_atomic_init
+
+/* 7.17.3 Order and consistency */
+
+typedef enum memory_order {
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_consume = __ATOMIC_CONSUME,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+#define kill_dependency(y) (y)
+
+/* 7.17.4 Fences */
+
+// These should be provided by the libc implementation.
+void atomic_thread_fence(memory_order);
+void atomic_signal_fence(memory_order);
+
+#define atomic_thread_fence(order) __c11_atomic_thread_fence(order)
+#define atomic_signal_fence(order) __c11_atomic_signal_fence(order)
+
+/* 7.17.5 Lock-free property */
+
+#define atomic_is_lock_free(obj) __c11_atomic_is_lock_free(sizeof(*(obj)))
+
+/* 7.17.6 Atomic integer types */
+
+#ifdef __cplusplus
+typedef _Atomic(bool)               atomic_bool;
+#else
+typedef _Atomic(_Bool)              atomic_bool;
+#endif
+typedef _Atomic(char)               atomic_char;
+typedef _Atomic(signed char)        atomic_schar;
+typedef _Atomic(unsigned char)      atomic_uchar;
+typedef _Atomic(short)              atomic_short;
+typedef _Atomic(unsigned short)     atomic_ushort;
+typedef _Atomic(int)                atomic_int;
+typedef _Atomic(unsigned int)       atomic_uint;
+typedef _Atomic(long)               atomic_long;
+typedef _Atomic(unsigned long)      atomic_ulong;
+typedef _Atomic(long long)          atomic_llong;
+typedef _Atomic(unsigned long long) atomic_ullong;
+typedef _Atomic(uint_least16_t)     atomic_char16_t;
+typedef _Atomic(uint_least32_t)     atomic_char32_t;
+typedef _Atomic(wchar_t)            atomic_wchar_t;
+typedef _Atomic(int_least8_t)       atomic_int_least8_t;
+typedef _Atomic(uint_least8_t)      atomic_uint_least8_t;
+typedef _Atomic(int_least16_t)      atomic_int_least16_t;
+typedef _Atomic(uint_least16_t)     atomic_uint_least16_t;
+typedef _Atomic(int_least32_t)      atomic_int_least32_t;
+typedef _Atomic(uint_least32_t)     atomic_uint_least32_t;
+typedef _Atomic(int_least64_t)      atomic_int_least64_t;
+typedef _Atomic(uint_least64_t)     atomic_uint_least64_t;
+typedef _Atomic(int_fast8_t)        atomic_int_fast8_t;
+typedef _Atomic(uint_fast8_t)       atomic_uint_fast8_t;
+typedef _Atomic(int_fast16_t)       atomic_int_fast16_t;
+typedef _Atomic(uint_fast16_t)      atomic_uint_fast16_t;
+typedef _Atomic(int_fast32_t)       atomic_int_fast32_t;
+typedef _Atomic(uint_fast32_t)      atomic_uint_fast32_t;
+typedef _Atomic(int_fast64_t)       atomic_int_fast64_t;
+typedef _Atomic(uint_fast64_t)      atomic_uint_fast64_t;
+typedef _Atomic(intptr_t)           atomic_intptr_t;
+typedef _Atomic(uintptr_t)          atomic_uintptr_t;
+typedef _Atomic(size_t)             atomic_size_t;
+typedef _Atomic(ptrdiff_t)          atomic_ptrdiff_t;
+typedef _Atomic(intmax_t)           atomic_intmax_t;
+typedef _Atomic(uintmax_t)          atomic_uintmax_t;
+
+/* 7.17.7 Operations on atomic types */
+
+#define atomic_store(object, desired) __c11_atomic_store(object, desired, __ATOMIC_SEQ_CST)
+#define atomic_store_explicit __c11_atomic_store
+
+#define atomic_load(object) __c11_atomic_load(object, __ATOMIC_SEQ_CST)
+#define atomic_load_explicit __c11_atomic_load
+
+#define atomic_exchange(object, desired) __c11_atomic_exchange(object, desired, __ATOMIC_SEQ_CST)
+#define atomic_exchange_explicit __c11_atomic_exchange
+
+#define atomic_compare_exchange_strong(object, expected, desired) __c11_atomic_compare_exchange_strong(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+#define atomic_compare_exchange_strong_explicit __c11_atomic_compare_exchange_strong
+
+#define atomic_compare_exchange_weak(object, expected, desired) __c11_atomic_compare_exchange_weak(object, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
+#define atomic_compare_exchange_weak_explicit __c11_atomic_compare_exchange_weak
+
+#define atomic_fetch_add(object, operand) __c11_atomic_fetch_add(object, operand, __ATOMIC_SEQ_CST)
+#define atomic_fetch_add_explicit __c11_atomic_fetch_add
+
+#define atomic_fetch_sub(object, operand) __c11_atomic_fetch_sub(object, operand, __ATOMIC_SEQ_CST)
+#define atomic_fetch_sub_explicit __c11_atomic_fetch_sub
+
+#define atomic_fetch_or(object, operand) __c11_atomic_fetch_or(object, operand, __ATOMIC_SEQ_CST)
+#define atomic_fetch_or_explicit __c11_atomic_fetch_or
+
+#define atomic_fetch_xor(object, operand) __c11_atomic_fetch_xor(object, operand, __ATOMIC_SEQ_CST)
+#define atomic_fetch_xor_explicit __c11_atomic_fetch_xor
+
+#define atomic_fetch_and(object, operand) __c11_atomic_fetch_and(object, operand, __ATOMIC_SEQ_CST)
+#define atomic_fetch_and_explicit __c11_atomic_fetch_and
+
+/* 7.17.8 Atomic flag type and operations */
+
+typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;
+
+#define ATOMIC_FLAG_INIT { 0 }
+
+// These should be provided by the libc implementation.
+#ifdef __cplusplus
+bool atomic_flag_test_and_set(volatile atomic_flag *);
+bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);
+#else
+_Bool atomic_flag_test_and_set(volatile atomic_flag *);
+_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);
+#endif
+void atomic_flag_clear(volatile atomic_flag *);
+void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order);
+
+#define atomic_flag_test_and_set(object) __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST)
+#define atomic_flag_test_and_set_explicit(object, order) __c11_atomic_exchange(&(object)->_Value, 1, order)
+
+#define atomic_flag_clear(object) __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST)
+#define atomic_flag_clear_explicit(object, order) __c11_atomic_store(&(object)->_Value, 0, order)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __STDC_HOSTED__ */
+#endif /* __CLANG_STDATOMIC_H */
+
diff --git a/renderscript/clang-include/stddef.h b/renderscript/clang-include/stddef.h
index 2dfe0a2..7354996 100644
--- a/renderscript/clang-include/stddef.h
+++ b/renderscript/clang-include/stddef.h
@@ -30,11 +30,15 @@
 #if !defined(__need_ptrdiff_t) && !defined(__need_size_t) &&                   \
     !defined(__need_wchar_t) && !defined(__need_NULL) &&                       \
     !defined(__need_wint_t)
+/* Always define miscellaneous pieces when modules are available. */
+#if !__has_feature(modules)
 #define __STDDEF_H
+#endif
 #define __need_ptrdiff_t
 #define __need_size_t
 #define __need_wchar_t
 #define __need_NULL
+#define __need_STDDEF_H_misc
 /* __need_wint_t is intentionally not defined here. */
 #endif
 
@@ -60,7 +64,7 @@
 #undef __need_size_t
 #endif /*defined(__need_size_t) */
 
-#if defined(__STDDEF_H)
+#if defined(__need_STDDEF_H_misc)
 /* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
  * enabled. */
 #if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \
@@ -71,7 +75,7 @@
 #endif
 typedef __SIZE_TYPE__ rsize_t;
 #endif
-#endif /* defined(__STDDEF_H) */
+#endif /* defined(__need_STDDEF_H_misc) */
 
 #if defined(__need_wchar_t)
 #ifndef __cplusplus
@@ -109,26 +113,13 @@
 #undef __need_NULL
 #endif /* defined(__need_NULL) */
 
-#if defined(__STDDEF_H)
-
+#if defined(__need_STDDEF_H_misc)
 #if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L
-#if !defined(__CLANG_MAX_ALIGN_T_DEFINED) || __has_feature(modules)
-#ifndef _MSC_VER
-typedef struct {
-  long long __clang_max_align_nonce1
-      __attribute__((__aligned__(__alignof__(long long))));
-  long double __clang_max_align_nonce2
-      __attribute__((__aligned__(__alignof__(long double))));
-} max_align_t;
-#else
-typedef double max_align_t;
+#include "__stddef_max_align_t.h"
 #endif
-#define __CLANG_MAX_ALIGN_T_DEFINED
-#endif
-#endif
-
 #define offsetof(t, d) __builtin_offsetof(t, d)
-#endif  /* __STDDEF_H */
+#undef __need_STDDEF_H_misc
+#endif  /* defined(__need_STDDEF_H_misc) */
 
 /* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use
 __WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
diff --git a/renderscript/clang-include/stdint.h b/renderscript/clang-include/stdint.h
index 2b1bc09..0303db9 100644
--- a/renderscript/clang-include/stdint.h
+++ b/renderscript/clang-include/stdint.h
@@ -104,9 +104,9 @@
 
 #ifdef __INT64_TYPE__
 # ifndef __int8_t_defined /* glibc sys/types.h also defines int64_t*/
-typedef signed __INT64_TYPE__ int64_t;
+typedef __INT64_TYPE__ int64_t;
 # endif /* __int8_t_defined */
-typedef unsigned __INT64_TYPE__ uint64_t;
+typedef __UINT64_TYPE__ uint64_t;
 # define __int_least64_t int64_t
 # define __uint_least64_t uint64_t
 # define __int_least32_t int64_t
@@ -125,8 +125,8 @@
 #endif /* __int_least64_t */
 
 #ifdef __INT56_TYPE__
-typedef signed __INT56_TYPE__ int56_t;
-typedef unsigned __INT56_TYPE__ uint56_t;
+typedef __INT56_TYPE__ int56_t;
+typedef __UINT56_TYPE__ uint56_t;
 typedef int56_t int_least56_t;
 typedef uint56_t uint_least56_t;
 typedef int56_t int_fast56_t;
@@ -141,8 +141,8 @@
 
 
 #ifdef __INT48_TYPE__
-typedef signed __INT48_TYPE__ int48_t;
-typedef unsigned __INT48_TYPE__ uint48_t;
+typedef __INT48_TYPE__ int48_t;
+typedef __UINT48_TYPE__ uint48_t;
 typedef int48_t int_least48_t;
 typedef uint48_t uint_least48_t;
 typedef int48_t int_fast48_t;
@@ -157,8 +157,8 @@
 
 
 #ifdef __INT40_TYPE__
-typedef signed __INT40_TYPE__ int40_t;
-typedef unsigned __INT40_TYPE__ uint40_t;
+typedef __INT40_TYPE__ int40_t;
+typedef __UINT40_TYPE__ uint40_t;
 typedef int40_t int_least40_t;
 typedef uint40_t uint_least40_t;
 typedef int40_t int_fast40_t;
@@ -175,12 +175,12 @@
 #ifdef __INT32_TYPE__
 
 # ifndef __int8_t_defined /* glibc sys/types.h also defines int32_t*/
-typedef signed __INT32_TYPE__ int32_t;
+typedef __INT32_TYPE__ int32_t;
 # endif /* __int8_t_defined */
 
 # ifndef __uint32_t_defined  /* more glibc compatibility */
 # define __uint32_t_defined
-typedef unsigned __INT32_TYPE__ uint32_t;
+typedef __UINT32_TYPE__ uint32_t;
 # endif /* __uint32_t_defined */
 
 # define __int_least32_t int32_t
@@ -199,8 +199,8 @@
 #endif /* __int_least32_t */
 
 #ifdef __INT24_TYPE__
-typedef signed __INT24_TYPE__ int24_t;
-typedef unsigned __INT24_TYPE__ uint24_t;
+typedef __INT24_TYPE__ int24_t;
+typedef __UINT24_TYPE__ uint24_t;
 typedef int24_t int_least24_t;
 typedef uint24_t uint_least24_t;
 typedef int24_t int_fast24_t;
@@ -213,9 +213,9 @@
 
 #ifdef __INT16_TYPE__
 #ifndef __int8_t_defined /* glibc sys/types.h also defines int16_t*/
-typedef signed __INT16_TYPE__ int16_t;
+typedef __INT16_TYPE__ int16_t;
 #endif /* __int8_t_defined */
-typedef unsigned __INT16_TYPE__ uint16_t;
+typedef __UINT16_TYPE__ uint16_t;
 # define __int_least16_t int16_t
 # define __uint_least16_t uint16_t
 # define __int_least8_t int16_t
@@ -232,9 +232,9 @@
 
 #ifdef __INT8_TYPE__
 #ifndef __int8_t_defined  /* glibc sys/types.h also defines int8_t*/
-typedef signed __INT8_TYPE__ int8_t;
+typedef __INT8_TYPE__ int8_t;
 #endif /* __int8_t_defined */
-typedef unsigned __INT8_TYPE__ uint8_t;
+typedef __UINT8_TYPE__ uint8_t;
 # define __int_least8_t int8_t
 # define __uint_least8_t uint8_t
 #endif /* __INT8_TYPE__ */
diff --git a/renderscript/clang-include/vadefs.h b/renderscript/clang-include/vadefs.h
new file mode 100644
index 0000000..7fe9a74
--- /dev/null
+++ b/renderscript/clang-include/vadefs.h
@@ -0,0 +1,65 @@
+/* ===-------- vadefs.h ---------------------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/* Only include this if we are aiming for MSVC compatibility. */
+#ifndef _MSC_VER
+#include_next <vadefs.h>
+#else
+
+#ifndef __clang_vadefs_h
+#define __clang_vadefs_h
+
+#include_next <vadefs.h>
+
+/* Override macros from vadefs.h with definitions that work with Clang. */
+#ifdef _crt_va_start
+#undef _crt_va_start
+#define _crt_va_start(ap, param) __builtin_va_start(ap, param)
+#endif
+#ifdef _crt_va_end
+#undef _crt_va_end
+#define _crt_va_end(ap)          __builtin_va_end(ap)
+#endif
+#ifdef _crt_va_arg
+#undef _crt_va_arg
+#define _crt_va_arg(ap, type)    __builtin_va_arg(ap, type)
+#endif
+
+/* VS 2015 switched to double underscore names, which is an improvement, but now
+ * we have to intercept those names too.
+ */
+#ifdef __crt_va_start
+#undef __crt_va_start
+#define __crt_va_start(ap, param) __builtin_va_start(ap, param)
+#endif
+#ifdef __crt_va_end
+#undef __crt_va_end
+#define __crt_va_end(ap)          __builtin_va_end(ap)
+#endif
+#ifdef __crt_va_arg
+#undef __crt_va_arg
+#define __crt_va_arg(ap, type)    __builtin_va_arg(ap, type)
+#endif
+
+#endif
+#endif
diff --git a/renderscript/lib/arm/libRSSupport.so b/renderscript/lib/arm/libRSSupport.so
index 912ab7a..b43e828 100755
--- a/renderscript/lib/arm/libRSSupport.so
+++ b/renderscript/lib/arm/libRSSupport.so
Binary files differ
diff --git a/renderscript/lib/arm/libc.so b/renderscript/lib/arm/libc.so
index b5ecd63..d14bbeb 100755
--- a/renderscript/lib/arm/libc.so
+++ b/renderscript/lib/arm/libc.so
Binary files differ
diff --git a/renderscript/lib/arm/libclcore.bc b/renderscript/lib/arm/libclcore.bc
index ddb5263..04d97a2 100644
--- a/renderscript/lib/arm/libclcore.bc
+++ b/renderscript/lib/arm/libclcore.bc
Binary files differ
diff --git a/renderscript/lib/arm/libm.so b/renderscript/lib/arm/libm.so
index 6222def..ab4d664 100755
--- a/renderscript/lib/arm/libm.so
+++ b/renderscript/lib/arm/libm.so
Binary files differ
diff --git a/renderscript/lib/arm/librsjni.so b/renderscript/lib/arm/librsjni.so
index 528237f..712a4a8 100755
--- a/renderscript/lib/arm/librsjni.so
+++ b/renderscript/lib/arm/librsjni.so
Binary files differ
diff --git a/renderscript/lib/arm/librsrt_arm.bc b/renderscript/lib/arm/librsrt_arm.bc
index 87197a6..04d97a2 100644
--- a/renderscript/lib/arm/librsrt_arm.bc
+++ b/renderscript/lib/arm/librsrt_arm.bc
Binary files differ
diff --git a/renderscript/lib/javalib.jar b/renderscript/lib/javalib.jar
index 360b84e..f0fee31 100644
--- a/renderscript/lib/javalib.jar
+++ b/renderscript/lib/javalib.jar
Binary files differ
diff --git a/renderscript/lib/mips/libRSSupport.so b/renderscript/lib/mips/libRSSupport.so
index 49fdfa6..1244169 100755
--- a/renderscript/lib/mips/libRSSupport.so
+++ b/renderscript/lib/mips/libRSSupport.so
Binary files differ
diff --git a/renderscript/lib/mips/libc.so b/renderscript/lib/mips/libc.so
index f33b5bb..f6dc7e8 100755
--- a/renderscript/lib/mips/libc.so
+++ b/renderscript/lib/mips/libc.so
Binary files differ
diff --git a/renderscript/lib/mips/libclcore.bc b/renderscript/lib/mips/libclcore.bc
index ddb5263..04d97a2 100644
--- a/renderscript/lib/mips/libclcore.bc
+++ b/renderscript/lib/mips/libclcore.bc
Binary files differ
diff --git a/renderscript/lib/mips/libm.so b/renderscript/lib/mips/libm.so
index 43019d1..20227b7 100755
--- a/renderscript/lib/mips/libm.so
+++ b/renderscript/lib/mips/libm.so
Binary files differ
diff --git a/renderscript/lib/mips/librsjni.so b/renderscript/lib/mips/librsjni.so
index cff13c1..3663e4a 100755
--- a/renderscript/lib/mips/librsjni.so
+++ b/renderscript/lib/mips/librsjni.so
Binary files differ
diff --git a/renderscript/lib/mips/librsrt_mips.bc b/renderscript/lib/mips/librsrt_mips.bc
index 87197a6..04d97a2 100644
--- a/renderscript/lib/mips/librsrt_mips.bc
+++ b/renderscript/lib/mips/librsrt_mips.bc
Binary files differ
diff --git a/renderscript/lib/x86/libRSSupport.so b/renderscript/lib/x86/libRSSupport.so
index 58a6ba2..92a9ea3 100755
--- a/renderscript/lib/x86/libRSSupport.so
+++ b/renderscript/lib/x86/libRSSupport.so
Binary files differ
diff --git a/renderscript/lib/x86/libc.so b/renderscript/lib/x86/libc.so
index b2892bb..9b06ed1 100755
--- a/renderscript/lib/x86/libc.so
+++ b/renderscript/lib/x86/libc.so
Binary files differ
diff --git a/renderscript/lib/x86/libclcore.bc b/renderscript/lib/x86/libclcore.bc
index 6aa69ee..8833135 100644
--- a/renderscript/lib/x86/libclcore.bc
+++ b/renderscript/lib/x86/libclcore.bc
Binary files differ
diff --git a/renderscript/lib/x86/libm.so b/renderscript/lib/x86/libm.so
index 1ba5696..ac23969 100755
--- a/renderscript/lib/x86/libm.so
+++ b/renderscript/lib/x86/libm.so
Binary files differ
diff --git a/renderscript/lib/x86/librsjni.so b/renderscript/lib/x86/librsjni.so
index 943a4c0..6d374d8 100755
--- a/renderscript/lib/x86/librsjni.so
+++ b/renderscript/lib/x86/librsjni.so
Binary files differ
diff --git a/renderscript/lib/x86/librsrt_x86.bc b/renderscript/lib/x86/librsrt_x86.bc
index c6213bd..f6d83ce 100644
--- a/renderscript/lib/x86/librsrt_x86.bc
+++ b/renderscript/lib/x86/librsrt_x86.bc
Binary files differ
diff --git a/tools/linux/bcc_compat b/tools/linux/bcc_compat
index 17daf71..92d7953 100755
--- a/tools/linux/bcc_compat
+++ b/tools/linux/bcc_compat
Binary files differ
diff --git a/tools/linux/libLLVM.so b/tools/linux/libLLVM.so
index 4167255..e12a63a 100755
--- a/tools/linux/libLLVM.so
+++ b/tools/linux/libLLVM.so
Binary files differ
diff --git a/tools/linux/libbcc.so b/tools/linux/libbcc.so
index dd6ea84..b83227f 100755
--- a/tools/linux/libbcc.so
+++ b/tools/linux/libbcc.so
Binary files differ
diff --git a/tools/linux/libbcinfo.so b/tools/linux/libbcinfo.so
index 6aaf4e5..3162c2d 100755
--- a/tools/linux/libbcinfo.so
+++ b/tools/linux/libbcinfo.so
Binary files differ
diff --git a/tools/linux/libc++.so b/tools/linux/libc++.so
index 1e74381..19e4750 100755
--- a/tools/linux/libc++.so
+++ b/tools/linux/libc++.so
Binary files differ
diff --git a/tools/linux/libclang.so b/tools/linux/libclang.so
index bd46497..c1de4c4 100755
--- a/tools/linux/libclang.so
+++ b/tools/linux/libclang.so
Binary files differ
diff --git a/tools/linux/llvm-rs-cc b/tools/linux/llvm-rs-cc
index 1a0687b..47dbf6b 100755
--- a/tools/linux/llvm-rs-cc
+++ b/tools/linux/llvm-rs-cc
Binary files differ