am e0b7d9f2: am dcd53649: am 99aa9032: Merge "Update RS prebuilts for LLVM rebase to r230699."

* commit 'e0b7d9f262c3ca3234f1bb8e870317e01542fae2':
  Update RS prebuilts for LLVM rebase to r230699.
diff --git a/renderscript/clang-include/Intrin.h b/renderscript/clang-include/Intrin.h
index 016c480..727a55e 100644
--- a/renderscript/clang-include/Intrin.h
+++ b/renderscript/clang-include/Intrin.h
@@ -289,6 +289,7 @@
 unsigned __int32 xbegin(void);
 void _xend(void);
 static __inline__
+#define _XCR_XFEATURE_ENABLED_MASK 0
 unsigned __int64 __cdecl _xgetbv(unsigned int);
 void __cdecl _xrstor(void const *, unsigned __int64);
 void __cdecl _xsave(void *, unsigned __int64);
@@ -330,7 +331,6 @@
                                  unsigned char _Shift);
 static __inline__
 void __stosq(unsigned __int64 *, unsigned __int64, size_t);
-unsigned __int64 __umulh(unsigned __int64, unsigned __int64);
 unsigned char __vmx_on(unsigned __int64 *);
 unsigned char __vmx_vmclear(unsigned __int64 *);
 unsigned char __vmx_vmlaunch(void);
@@ -416,10 +416,25 @@
 int __cdecl _setjmpex(jmp_buf);
 #endif
 unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);
-unsigned __int64 shrx_u64(unsigned __int64, unsigned int);
-unsigned __int64 _umul128(unsigned __int64 _Multiplier,
-                          unsigned __int64 _Multiplicand,
-                          unsigned __int64 *_HighProduct);
+unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);
+/*
+ * Multiply two 64-bit integers and obtain a 64-bit result.
+ * The low-half is returned directly and the high half is in an out parameter.
+ */
+static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+_umul128(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand,
+         unsigned __int64 *_HighProduct) {
+  unsigned __int128 _FullProduct =
+      (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand;
+  *_HighProduct = _FullProduct >> 64;
+  return _FullProduct;
+}
+static __inline__ unsigned __int64 __attribute__((__always_inline__, __nodebug__))
+__umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) {
+  unsigned __int128 _FullProduct =
+      (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand;
+  return _FullProduct >> 64;
+}
 void __cdecl _xrstor64(void const *, unsigned __int64);
 void __cdecl _xsave64(void *, unsigned __int64);
 void __cdecl _xsaveopt64(void *, unsigned __int64);
@@ -766,17 +781,17 @@
 \*----------------------------------------------------------------------------*/
 #if defined(__i386__) || defined(__x86_64__)
 static __inline__ void __attribute__((__always_inline__, __nodebug__))
-__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
+__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _ReadWriteBarrier(void) {
   __asm__ volatile ("" : : : "memory");
 }
 static __inline__ void __attribute__((__always_inline__, __nodebug__))
-__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
+__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _ReadBarrier(void) {
   __asm__ volatile ("" : : : "memory");
 }
 static __inline__ void __attribute__((__always_inline__, __nodebug__))
-__attribute__((deprecated("use other intrinsics or C++11 atomics instead")))
+__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _WriteBarrier(void) {
   __asm__ volatile ("" : : : "memory");
 }
@@ -929,14 +944,14 @@
   return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;
 }
 
-static __inline__ unsigned long __attribute__((always_inline, __nodebug__))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
 __readcr3(void) {
   unsigned long __cr3_val;
   __asm__ __volatile__ ("mov %%cr3, %0" : "=q"(__cr3_val) : : "memory");
   return __cr3_val;
 }
 
-static __inline__ void __attribute__((always_inline, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
 __writecr3(unsigned int __cr3_val) {
   __asm__ ("mov %0, %%cr3" : : "q"(__cr3_val) : "memory");
 }
diff --git a/renderscript/clang-include/__stddef_max_align_t.h b/renderscript/clang-include/__stddef_max_align_t.h
index a06f412..1e10ca9 100644
--- a/renderscript/clang-include/__stddef_max_align_t.h
+++ b/renderscript/clang-include/__stddef_max_align_t.h
@@ -26,15 +26,18 @@
 #ifndef __CLANG_MAX_ALIGN_T_DEFINED
 #define __CLANG_MAX_ALIGN_T_DEFINED
 
-#ifndef _MSC_VER
+#if defined(_MSC_VER)
+typedef double max_align_t;
+#elif defined(__APPLE__)
+typedef long double max_align_t;
+#else
+// Define 'max_align_t' to match the GCC definition.
 typedef struct {
   long long __clang_max_align_nonce1
       __attribute__((__aligned__(__alignof__(long long))));
   long double __clang_max_align_nonce2
       __attribute__((__aligned__(__alignof__(long double))));
 } max_align_t;
-#else
-typedef double max_align_t;
 #endif
 
 #endif
diff --git a/renderscript/clang-include/altivec.h b/renderscript/clang-include/altivec.h
index eded7b2..b8a8869 100644
--- a/renderscript/clang-include/altivec.h
+++ b/renderscript/clang-include/altivec.h
@@ -2270,7 +2270,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsl(int __a, const signed char *__b)
 {
@@ -2289,7 +2289,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsl(int __a, const unsigned char *__b)
 {
@@ -2308,7 +2308,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsl(int __a, const short *__b)
 {
@@ -2327,7 +2327,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsl(int __a, const unsigned short *__b)
 {
@@ -2346,7 +2346,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsl(int __a, const int *__b)
 {
@@ -2365,7 +2365,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsl(int __a, const unsigned int *__b)
 {
@@ -2384,7 +2384,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsl(int __a, const float *__b)
 {
@@ -2405,7 +2405,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsr(int __a, const signed char *__b)
 {
@@ -2424,7 +2424,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsr(int __a, const unsigned char *__b)
 {
@@ -2443,7 +2443,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsr(int __a, const short *__b)
 {
@@ -2462,7 +2462,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsr(int __a, const unsigned short *__b)
 {
@@ -2481,7 +2481,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsr(int __a, const int *__b)
 {
@@ -2500,7 +2500,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsr(int __a, const unsigned int *__b)
 {
@@ -2519,7 +2519,7 @@
 
 #ifdef __LITTLE_ENDIAN__
 static vector unsigned char __ATTRS_o_ai
-__attribute__((deprecated("use assignment for unaligned little endian \
+__attribute__((__deprecated__("use assignment for unaligned little endian \
 loads/stores")))
 vec_lvsr(int __a, const float *__b)
 {
@@ -4735,7 +4735,7 @@
 // in that the vec_xor can be recognized as a vec_nor (and for P8 and
 // later, possibly a vec_nand).
 
-vector signed char __ATTRS_o_ai
+static vector signed char __ATTRS_o_ai
 vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4750,7 +4750,7 @@
 #endif
 }
 
-vector unsigned char __ATTRS_o_ai
+static vector unsigned char __ATTRS_o_ai
 vec_perm(vector unsigned char __a,
          vector unsigned char __b,
          vector unsigned char __c)
@@ -4767,7 +4767,7 @@
 #endif
 }
 
-vector bool char __ATTRS_o_ai
+static vector bool char __ATTRS_o_ai
 vec_perm(vector bool char __a, vector bool char __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4782,7 +4782,7 @@
 #endif
 }
 
-vector short __ATTRS_o_ai
+static vector short __ATTRS_o_ai
 vec_perm(vector short __a, vector short __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4797,7 +4797,7 @@
 #endif
 }
 
-vector unsigned short __ATTRS_o_ai
+static vector unsigned short __ATTRS_o_ai
 vec_perm(vector unsigned short __a,
          vector unsigned short __b,
          vector unsigned char __c)
@@ -4814,7 +4814,7 @@
 #endif
 }
 
-vector bool short __ATTRS_o_ai
+static vector bool short __ATTRS_o_ai
 vec_perm(vector bool short __a, vector bool short __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4829,7 +4829,7 @@
 #endif
 }
 
-vector pixel __ATTRS_o_ai
+static vector pixel __ATTRS_o_ai
 vec_perm(vector pixel __a, vector pixel __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4844,7 +4844,7 @@
 #endif
 }
 
-vector int __ATTRS_o_ai
+static vector int __ATTRS_o_ai
 vec_perm(vector int __a, vector int __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4857,7 +4857,7 @@
 #endif
 }
 
-vector unsigned int __ATTRS_o_ai
+static vector unsigned int __ATTRS_o_ai
 vec_perm(vector unsigned int __a, vector unsigned int __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4872,7 +4872,7 @@
 #endif
 }
 
-vector bool int __ATTRS_o_ai
+static vector bool int __ATTRS_o_ai
 vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4887,7 +4887,7 @@
 #endif
 }
 
-vector float __ATTRS_o_ai
+static vector float __ATTRS_o_ai
 vec_perm(vector float __a, vector float __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4903,7 +4903,7 @@
 }
 
 #ifdef __VSX__
-vector long long __ATTRS_o_ai
+static vector long long __ATTRS_o_ai
 vec_perm(vector long long __a, vector long long __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -4916,7 +4916,7 @@
 #endif
 }
 
-vector unsigned long long __ATTRS_o_ai
+static vector unsigned long long __ATTRS_o_ai
 vec_perm(vector unsigned long long __a, vector unsigned long long __b,
          vector unsigned char __c)
 {
@@ -4932,7 +4932,7 @@
 #endif
 }
 
-vector double __ATTRS_o_ai
+static vector double __ATTRS_o_ai
 vec_perm(vector double __a, vector double __b, vector unsigned char __c)
 {
 #ifdef __LITTLE_ENDIAN__
@@ -8664,11 +8664,11 @@
 #ifdef __LITTLE_ENDIAN__
   vector int __c = (vector signed int)
     vec_perm(__b, __b, (vector unsigned char)
-	     (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+             (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
   __c = __builtin_altivec_vsum2sws(__a, __c);
   return (vector signed int)
     vec_perm(__c, __c, (vector unsigned char)
-	     (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+             (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
 #else
   return __builtin_altivec_vsum2sws(__a, __b);
 #endif
@@ -8682,11 +8682,11 @@
 #ifdef __LITTLE_ENDIAN__
   vector int __c = (vector signed int)
     vec_perm(__b, __b, (vector unsigned char)
-	     (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+             (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
   __c = __builtin_altivec_vsum2sws(__a, __c);
   return (vector signed int)
     vec_perm(__c, __c, (vector unsigned char)
-	     (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
+             (4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11));
 #else
   return __builtin_altivec_vsum2sws(__a, __b);
 #endif
diff --git a/renderscript/clang-include/arm_acle.h b/renderscript/clang-include/arm_acle.h
index 814df2c..6c56f3b 100644
--- a/renderscript/clang-include/arm_acle.h
+++ b/renderscript/clang-include/arm_acle.h
@@ -45,23 +45,23 @@
 /* 8.4 Hints */
 
 #if !defined(_MSC_VER)
-static __inline__ void __attribute__((always_inline, nodebug)) __wfi(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfi(void) {
   __builtin_arm_wfi();
 }
 
-static __inline__ void __attribute__((always_inline, nodebug)) __wfe(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __wfe(void) {
   __builtin_arm_wfe();
 }
 
-static __inline__ void __attribute__((always_inline, nodebug)) __sev(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sev(void) {
   __builtin_arm_sev();
 }
 
-static __inline__ void __attribute__((always_inline, nodebug)) __sevl(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __sevl(void) {
   __builtin_arm_sevl();
 }
 
-static __inline__ void __attribute__((always_inline, nodebug)) __yield(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(void) {
   __builtin_arm_yield();
 }
 #endif
@@ -71,7 +71,7 @@
 #endif
 
 /* 8.5 Swap */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __swp(uint32_t x, volatile uint32_t *p) {
   uint32_t v;
   do v = __builtin_arm_ldrex(p); while (__builtin_arm_strex(x, p));
@@ -102,28 +102,28 @@
 #endif
 
 /* 8.7 NOP */
-static __inline__ void __attribute__((always_inline, nodebug)) __nop(void) {
+static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
   __builtin_arm_nop();
 }
 
 /* 9 DATA-PROCESSING INTRINSICS */
 /* 9.2 Miscellaneous data-processing intrinsics */
 /* ROR */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __ror(uint32_t x, uint32_t y) {
   y %= 32;
   if (y == 0)  return x;
   return (x >> y) | (x << (32 - y));
 }
 
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
   __rorll(uint64_t x, uint32_t y) {
   y %= 64;
   if (y == 0)  return x;
   return (x >> y) | (x << (64 - y));
 }
 
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
   __rorl(unsigned long x, uint32_t y) {
 #if __SIZEOF_LONG__ == 4
   return __ror(x, y);
@@ -134,28 +134,28 @@
 
 
 /* CLZ */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __clz(uint32_t t) {
   return __builtin_clz(t);
 }
 
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
   __clzl(unsigned long t) {
   return __builtin_clzl(t);
 }
 
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
   __clzll(uint64_t t) {
   return __builtin_clzll(t);
 }
 
 /* REV */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __rev(uint32_t t) {
   return __builtin_bswap32(t);
 }
 
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
   __revl(unsigned long t) {
 #if __SIZEOF_LONG__ == 4
   return __builtin_bswap32(t);
@@ -164,40 +164,40 @@
 #endif
 }
 
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
   __revll(uint64_t t) {
   return __builtin_bswap64(t);
 }
 
 /* REV16 */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __rev16(uint32_t t) {
   return __ror(__rev(t), 16);
 }
 
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
   __rev16l(unsigned long t) {
     return __rorl(__revl(t), sizeof(long) / 2);
 }
 
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
   __rev16ll(uint64_t t) {
   return __rorll(__revll(t), 32);
 }
 
 /* REVSH */
-static __inline__ int16_t __attribute__((always_inline, nodebug))
+static __inline__ int16_t __attribute__((__always_inline__, __nodebug__))
   __revsh(int16_t t) {
   return __builtin_bswap16(t);
 }
 
 /* RBIT */
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __rbit(uint32_t t) {
   return __builtin_arm_rbit(t);
 }
 
-static __inline__ uint64_t __attribute__((always_inline, nodebug))
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
   __rbitll(uint64_t t) {
 #if __ARM_32BIT_STATE
   return (((uint64_t) __builtin_arm_rbit(t)) << 32) |
@@ -207,7 +207,7 @@
 #endif
 }
 
-static __inline__ unsigned long __attribute__((always_inline, nodebug))
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
   __rbitl(unsigned long t) {
 #if __SIZEOF_LONG__ == 4
   return __rbit(t);
@@ -230,17 +230,17 @@
 
 /* 9.4.2 Saturating addition and subtraction intrinsics */
 #if __ARM_32BIT_STATE
-static __inline__ int32_t __attribute__((always_inline, nodebug))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
   __qadd(int32_t t, int32_t v) {
   return __builtin_arm_qadd(t, v);
 }
 
-static __inline__ int32_t __attribute__((always_inline, nodebug))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
   __qsub(int32_t t, int32_t v) {
   return __builtin_arm_qsub(t, v);
 }
 
-static __inline__ int32_t __attribute__((always_inline, nodebug))
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __qdbl(int32_t t) {
   return __builtin_arm_qadd(t, t);
 }
@@ -248,42 +248,42 @@
 
 /* 9.7 CRC32 intrinsics */
 #if __ARM_FEATURE_CRC32
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __crc32b(uint32_t a, uint8_t b) {
   return __builtin_arm_crc32b(a, b);
 }
 
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __crc32h(uint32_t a, uint16_t b) {
   return __builtin_arm_crc32h(a, b);
 }
 
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __crc32w(uint32_t a, uint32_t b) {
   return __builtin_arm_crc32w(a, b);
 }
 
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __crc32d(uint32_t a, uint64_t b) {
   return __builtin_arm_crc32d(a, b);
 }
 
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __crc32cb(uint32_t a, uint8_t b) {
   return __builtin_arm_crc32cb(a, b);
 }
 
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __crc32ch(uint32_t a, uint16_t b) {
   return __builtin_arm_crc32ch(a, b);
 }
 
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __crc32cw(uint32_t a, uint32_t b) {
   return __builtin_arm_crc32cw(a, b);
 }
 
-static __inline__ uint32_t __attribute__((always_inline, nodebug))
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   __crc32cd(uint32_t a, uint64_t b) {
   return __builtin_arm_crc32cd(a, b);
 }
diff --git a/renderscript/clang-include/avx512bwintrin.h b/renderscript/clang-include/avx512bwintrin.h
index bc4d4ac..acc3da2 100644
--- a/renderscript/clang-include/avx512bwintrin.h
+++ b/renderscript/clang-include/avx512bwintrin.h
@@ -21,13 +21,16 @@
  *
  *===-----------------------------------------------------------------------===
  */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
+#endif
 
 #ifndef __AVX512BWINTRIN_H
 #define __AVX512BWINTRIN_H
 
 typedef unsigned int __mmask32;
 typedef unsigned long long __mmask64;
-typedef char __v64qi __attribute__ ((vector_size (64)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
 
 
@@ -45,6 +48,18 @@
                                                    __u);
 }
 
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
+                                                 (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
+                                                 __u);
+}
+
 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
 _mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
   return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
@@ -57,4 +72,296 @@
                                                    __u);
 }
 
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
+                                                 __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
+                                                (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
+                                                 (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
+                                                (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
+                                                 __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
+                                                   (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
+                                                   __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
+                                                 (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
+                                                   (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
+                                                   __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
+                                                 __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
+                                                (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
+                                                 (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
+                                                (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
+                                                 __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
+                                                (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
+                                                 (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
+                                                (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
+                                                 __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
+                                                (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
+                                                 (__mmask64)-1);
+}
+
+static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
+  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
+                                                (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
+  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
+                                                 __u);
+}
+
+#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+                                         (__v64qi)(__m512i)(b), \
+                                         (p), (__mmask64)-1); })
+
+#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
+                                         (__v64qi)(__m512i)(b), \
+                                         (p), (__mmask64)(m)); })
+
+#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+                                          (__v64qi)(__m512i)(b), \
+                                          (p), (__mmask64)-1); })
+
+#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
+                                          (__v64qi)(__m512i)(b), \
+                                          (p), (__mmask64)(m)); })
+
+#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+                                         (__v32hi)(__m512i)(b), \
+                                         (p), (__mmask32)-1); })
+
+#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
+                                         (__v32hi)(__m512i)(b), \
+                                         (p), (__mmask32)(m)); })
+
+#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+                                          (__v32hi)(__m512i)(b), \
+                                          (p), (__mmask32)-1); })
+
+#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
+                                          (__v32hi)(__m512i)(b), \
+                                          (p), (__mmask32)(m)); })
+
 #endif
diff --git a/renderscript/clang-include/avx512erintrin.h b/renderscript/clang-include/avx512erintrin.h
index 1a5ea15..57c61aa 100644
--- a/renderscript/clang-include/avx512erintrin.h
+++ b/renderscript/clang-include/avx512erintrin.h
@@ -28,85 +28,259 @@
 #define __AVX512ERINTRIN_H
 
 
+// exp2a23
+#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+                                      (__v8df)_mm512_setzero_pd(), \
+                                      (__mmask8)-1, (R)); })
+
+#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+                                      (__v8df)(__m512d)(S), \
+                                      (__mmask8)(M), (R)); })
+
+#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+                                      (__v8df)_mm512_setzero_pd(), \
+                                      (__mmask8)(M), (R)); })
+
+#define _mm512_exp2a23_pd(A) \
+   _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_pd(S, M, A) \
+   _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_pd(M, A) \
+   _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+                                     (__v16sf)_mm512_setzero_ps(), \
+                                     (__mmask8)-1, (R)); })
+
+#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+                                     (__v16sf)(__m512)(S), \
+                                     (__mmask8)(M), (R)); })
+
+#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+                                     (__v16sf)_mm512_setzero_ps(), \
+                                     (__mmask8)(M), (R)); })
+
+#define _mm512_exp2a23_ps(A) \
+   _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_ps(S, M, A) \
+   _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_ps(M, A) \
+   _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
 // rsqrt28
-static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
-_mm512_rsqrt28_round_pd (__m512d __A, int __R)
-{
-  return (__m512d)__builtin_ia32_rsqrt28pd_mask ((__v8df)__A,
-                                                 (__v8df)_mm512_setzero_pd(),
-                                                 (__mmask8)-1,
-                                                 __R);
-}
-static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
-_mm512_rsqrt28_round_ps(__m512 __A, int __R)
-{
-  return (__m512)__builtin_ia32_rsqrt28ps_mask ((__v16sf)__A,
-                                                (__v16sf)_mm512_setzero_ps(),
-                                                (__mmask16)-1,
-                                                __R);
-}
+#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)_mm512_setzero_pd(), \
+                                         (__mmask8)-1, (R)); })
 
-static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
-_mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R)
-{
-  return (__m128) __builtin_ia32_rsqrt28ss_mask ((__v4sf) __A,
-             (__v4sf) __B,
-             (__v4sf)
-             _mm_setzero_ps (),
-             (__mmask8) -1,
-             __R);
-}
+#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)(__m512d)(S), \
+                                         (__mmask8)(M), (R)); })
 
-static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
-_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
-{
-  return (__m128d) __builtin_ia32_rsqrt28sd_mask ((__v2df) __A,
-              (__v2df) __B,
-              (__v2df)
-              _mm_setzero_pd (),
-              (__mmask8) -1,
-             __R);
-}
+#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)_mm512_setzero_pd(), \
+                                         (__mmask8)(M), (R)); })
 
+#define _mm512_rsqrt28_pd(A) \
+  _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_pd(S, M, A) \
+  _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_pd(M, A) \
+  _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+                                        (__v16sf)_mm512_setzero_ps(), \
+                                        (__mmask16)-1, (R)); })
+
+#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+                                        (__v16sf)(__m512)(S), \
+                                        (__mmask16)(M), (R)); })
+
+#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+                                        (__v16sf)_mm512_setzero_ps(), \
+                                        (__mmask16)(M), (R)); })
+
+#define _mm512_rsqrt28_ps(A) \
+  _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_ps(S, M, A) \
+  _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_ps(M, A) \
+  _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+                                        (__v4sf)(__m128)(B), \
+                                        (__v4sf)_mm_setzero_ps(), \
+                                        (__mmask8)-1, (R)); })
+
+#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+                                        (__v4sf)(__m128)(B), \
+                                        (__v4sf)(__m128)(S), \
+                                        (__mmask8)(M), (R)); })
+
+#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+                                        (__v4sf)(__m128)(B), \
+                                        (__v4sf)_mm_setzero_ps(), \
+                                        (__mmask8)(M), (R)); })
+
+#define _mm_rsqrt28_ss(A, B) \
+  _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_ss(S, M, A, B) \
+  _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_ss(M, A, B) \
+  _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+                                         (__v2df)(__m128d)(B), \
+                                         (__v2df)_mm_setzero_pd(), \
+                                         (__mmask8)-1, (R)); })
+
+#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+                                         (__v2df)(__m128d)(B), \
+                                         (__v2df)(__m128d)(S), \
+                                         (__mmask8)(M), (R)); })
+
+#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+                                         (__v2df)(__m128d)(B), \
+                                         (__v2df)_mm_setzero_pd(), \
+                                         (__mmask8)(M), (R)); })
+
+#define _mm_rsqrt28_sd(A, B) \
+  _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_sd(S, M, A, B) \
+  _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_sd(M, A, B) \
+  _mm_mask_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
 // rcp28
-static  __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
-_mm512_rcp28_round_pd (__m512d __A, int __R)
-{
-  return (__m512d)__builtin_ia32_rcp28pd_mask ((__v8df)__A,
-                                               (__v8df)_mm512_setzero_pd(),
-                                               (__mmask8)-1,
-                                               __R);
-}
+#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+                                       (__v8df)_mm512_setzero_pd(), \
+                                       (__mmask8)-1, (R)); })
 
-static  __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
-_mm512_rcp28_round_ps (__m512 __A, int __R)
-{
-  return (__m512)__builtin_ia32_rcp28ps_mask ((__v16sf)__A,
-                                              (__v16sf)_mm512_setzero_ps (),
-                                              (__mmask16)-1,
-                                              __R);
-}
+#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+                                       (__v8df)(__m512d)(S), \
+                                       (__mmask8)(M), (R)); })
 
-static  __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
-_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
-{
-  return (__m128) __builtin_ia32_rcp28ss_mask ((__v4sf) __A,
-             (__v4sf) __B,
-             (__v4sf)
-             _mm_setzero_ps (),
-             (__mmask8) -1,
-             __R);
-}
-static  __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
-_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
-{
-  return (__m128d) __builtin_ia32_rcp28sd_mask ((__v2df) __A,
-              (__v2df) __B,
-              (__v2df)
-              _mm_setzero_pd (),
-              (__mmask8) -1,
-             __R);
-}
+#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \
+  (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+                                       (__v8df)_mm512_setzero_pd(), \
+                                       (__mmask8)(M), (R)); })
+
+#define _mm512_rcp28_pd(A) \
+  _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_pd(S, M, A) \
+  _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_pd(M, A) \
+  _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+                                      (__v16sf)_mm512_setzero_ps(), \
+                                      (__mmask16)-1, (R)); })
+
+#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+                                      (__v16sf)(__m512)(S), \
+                                      (__mmask16)(M), (R)); })
+
+#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+                                      (__v16sf)_mm512_setzero_ps(), \
+                                      (__mmask16)(M), (R)); })
+
+#define _mm512_rcp28_ps(A) \
+  _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_ps(S, M, A) \
+  _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_ps(M, A) \
+  _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+                                      (__v4sf)(__m128)(B), \
+                                      (__v4sf)_mm_setzero_ps(), \
+                                      (__mmask8)-1, (R)); })
+
+#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+                                      (__v4sf)(__m128)(B), \
+                                      (__v4sf)(__m128)(S), \
+                                      (__mmask8)(M), (R)); })
+
+#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \
+  (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+                                      (__v4sf)(__m128)(B), \
+                                      (__v4sf)_mm_setzero_ps(), \
+                                      (__mmask8)(M), (R)); })
+
+#define _mm_rcp28_ss(A, B) \
+  _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rcp28_ss(S, M, A, B) \
+  _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_ss(M, A, B) \
+  _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+                                       (__v2df)(__m128d)(B), \
+                                       (__v2df)_mm_setzero_pd(), \
+                                       (__mmask8)-1, (R)); })
+
+#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+                                       (__v2df)(__m128d)(B), \
+                                       (__v2df)(__m128d)(S), \
+                                       (__mmask8)(M), (R)); })
+
+#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \
+  (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+                                       (__v2df)(__m128d)(B), \
+                                       (__v2df)_mm_setzero_pd(), \
+                                       (__mmask8)(M), (R)); })
+
+#define _mm_rcp28_sd(A, B) \
+  _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rcp28_sd(S, M, A, B) \
+  _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_sd(M, A, B) \
+  _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
 #endif // __AVX512ERINTRIN_H
diff --git a/renderscript/clang-include/avx512fintrin.h b/renderscript/clang-include/avx512fintrin.h
index 9591dcf..c6d46cb 100644
--- a/renderscript/clang-include/avx512fintrin.h
+++ b/renderscript/clang-include/avx512fintrin.h
@@ -492,20 +492,13 @@
              (__mmask16) -1);
 }
 
-static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
-_mm512_roundscale_ps(__m512 __A, const int __imm)
-{
-  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
-              (__v16sf) __A, -1,
-              _MM_FROUND_CUR_DIRECTION);
-}
-static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
-_mm512_roundscale_pd(__m512d __A, const int __imm)
-{
-  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
-               (__v8df) __A, -1,
-               _MM_FROUND_CUR_DIRECTION);
-}
+#define _mm512_roundscale_ps(A, B) __extension__ ({ \
+  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
+                                         -1, _MM_FROUND_CUR_DIRECTION); })
+
+#define _mm512_roundscale_pd(A, B) __extension__ ({ \
+  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \
+                                          -1, _MM_FROUND_CUR_DIRECTION); })
 
 static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
@@ -613,25 +606,35 @@
                                                        (__mmask16) -1);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I)
-{
-  return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
-                                                 (__v8di)__B,
-                                                 __I,
-                                                 (__v8di)_mm512_setzero_si512(),
-                                                 (__mmask8) -1);
-}
+#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
+                                         (__v8di)(__m512i)(B), \
+                                         (I), (__v8di)_mm512_setzero_si512(), \
+                                         (__mmask8)-1); })
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I)
-{
-  return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
-                                                (__v16si)__B,
-                                                __I,
-                                                (__v16si)_mm512_setzero_si512(),
-                                                (__mmask16) -1);
-}
+#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
+  (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
+                                         (__v16si)(__m512i)(B), \
+                                         (I), (__v16si)_mm512_setzero_si512(), \
+                                         (__mmask16)-1); })
+
+/* Vector Extract */
+
+#define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
+      __m512d __A = (A);                                                 \
+      (__m256d)                                                          \
+        __builtin_ia32_extractf64x4_mask((__v8df)__A,                    \
+                                         (I),                            \
+                                         (__v4df)_mm256_setzero_si256(), \
+                                         (__mmask8) -1); })
+
+#define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
+      __m512 __A = (A);                                                  \
+      (__m128)                                                           \
+        __builtin_ia32_extractf32x4_mask((__v16sf)__A,                   \
+                                         (I),                            \
+                                         (__v4sf)_mm_setzero_ps(),       \
+                                         (__mmask8) -1); })
 
 /* Vector Blend */
 
@@ -669,22 +672,37 @@
 
 /* Compare */
 
-static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p)
-{
-  return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
-               (__v16sf) b, p, (__mmask16) -1,
-               _MM_FROUND_CUR_DIRECTION);
-}
+#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          (P), (__mmask16)-1, (R)); })
 
-static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__)) 
-_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P)
-{
-  return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
-              (__v8df) __Y, __P,
-              (__mmask8) -1,
-              _MM_FROUND_CUR_DIRECTION);
-}
+#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
+                                          (__v16sf)(__m512)(B), \
+                                          (P), (__mmask16)(U), (R)); })
+
+#define _mm512_cmp_ps_mask(A, B, P) \
+  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
+  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)(__m512d)(B), \
+                                         (P), (__mmask8)-1, (R)); })
+
+#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
+                                         (__v8df)(__m512d)(B), \
+                                         (P), (__mmask8)(U), (R)); })
+
+#define _mm512_cmp_pd_mask(A, B, P) \
+  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
+  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
 
 /* Conversion */
 
@@ -698,25 +716,15 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
-_mm512_cvt_roundepi32_ps(__m512i __A, const int __R)
-{
-  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
-               (__v16sf)
-               _mm512_setzero_ps (),
-               (__mmask16) -1,
-               __R);
-}
+#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \
+                                          (__v16sf)_mm512_setzero_ps(), \
+                                          (__mmask16)-1, (R)); })
 
-static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
-_mm512_cvt_roundepu32_ps(__m512i __A, const int __R)
-{
-  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
-               (__v16sf)
-               _mm512_setzero_ps (),
-               (__mmask16) -1,
-               __R);
-}
+#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
+  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \
+                                           (__v16sf)_mm512_setzero_ps(), \
+                                           (__mmask16)-1, (R)); })
 
 static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
 _mm512_cvtepi32_pd(__m256i __A)
@@ -735,25 +743,16 @@
                 _mm512_setzero_pd (),
                 (__mmask8) -1);
 }
-static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
-_mm512_cvt_roundpd_ps(__m512d __A, const int __R)
-{
-  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
-               (__v8sf)
-               _mm256_setzero_ps (),
-               (__mmask8) -1,
-               __R);
-}
 
-static  __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvtps_ph(__m512 __A, const int __I)
-{
-  return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
-                 __I,
-                 (__v16hi)
-                 _mm256_setzero_si256 (),
-                 -1);
-}
+#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
+  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \
+                                          (__v8sf)_mm256_setzero_ps(), \
+                                          (__mmask8)-1, (R)); })
+
+#define _mm512_cvtps_ph(A, I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \
+                                            (__v16hi)_mm256_setzero_si256(), \
+                                            -1); })
 
 static  __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
 _mm512_cvtph_ps(__m256i __A)
@@ -783,60 +782,67 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R)
+#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \
+                                            (__v8si)_mm256_setzero_si256(), \
+                                            (__mmask8)-1, (R)); })
+
+#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \
+                                            (__v16si)_mm512_setzero_si512(), \
+                                            (__mmask16)-1, (R)); })
+
+#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \
+                                           (__v16si)_mm512_setzero_si512(), \
+                                           (__mmask16)-1, (R)); })
+
+#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \
+                                           (__v8si)_mm256_setzero_si256(), \
+                                           (__mmask8)-1, (R)); })
+
+#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
+  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \
+                                            (__v16si)_mm512_setzero_si512(), \
+                                            (__mmask16)-1, (R)); })
+
+#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
+  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \
+                                            (__v8si)_mm256_setzero_si256(), \
+                                            (__mmask8) -1, (R)); })
+
+/* Unpack and Interleave */
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_unpackhi_pd(__m512d __a, __m512d __b)
 {
-  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
-                 (__v8si)
-                 _mm256_setzero_si256 (),
-                 (__mmask8) -1,
-                 __R);
-}
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvtt_roundps_epi32(__m512 __A, const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
-                 (__v16si)
-                 _mm512_setzero_si512 (),
-                 (__mmask16) -1,
-                 __R);
+  return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
 }
 
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvt_roundps_epi32(__m512 __A, const int __R)
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_unpacklo_pd(__m512d __a, __m512d __b)
 {
-  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
-                (__v16si)
-                _mm512_setzero_si512 (),
-                (__mmask16) -1,
-                __R);
+  return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
 }
-static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvt_roundpd_epi32(__m512d __A, const int __R)
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_unpackhi_ps(__m512 __a, __m512 __b)
 {
-  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
-                (__v8si)
-                _mm256_setzero_si256 (),
-                (__mmask8) -1,
-                __R);
+  return __builtin_shufflevector(__a, __b,
+                                 2,    18,    3,    19,
+                                 2+4,  18+4,  3+4,  19+4,
+                                 2+8,  18+8,  3+8,  19+8,
+                                 2+12, 18+12, 3+12, 19+12);
 }
-static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvt_roundps_epu32(__m512 __A, const int __R)
+
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_unpacklo_ps(__m512 __a, __m512 __b)
 {
-  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
-                (__v16si)
-                _mm512_setzero_si512 (),
-                (__mmask16) -1,
-                __R);
-}
-static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
-_mm512_cvt_roundpd_epu32(__m512d __A, const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
-                (__v8si)
-                _mm256_setzero_si256 (),
-                (__mmask8) -1,
-                __R);
+  return __builtin_shufflevector(__a, __b,
+                                 0,    16,    1,    17,
+                                 0+4,  16+4,  1+4,  17+4,
+                                 0+8,  16+8,  1+8,  17+8,
+                                 0+12, 16+12, 1+12, 17+12);
 }
 
 /* Bit Test */
@@ -895,12 +901,30 @@
                                                    (__mmask8) __U);
 }
 
+static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
+{
+  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
+                                                  (__v16sf)
+                                                  _mm512_setzero_ps (),
+                                                  (__mmask16) __U);
+}
+
+static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
+_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
+{
+  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
+                                                   (__v8df)
+                                                   _mm512_setzero_pd (),
+                                                   (__mmask8) __U);
+}
+
 static __inline __m512d __attribute__((__always_inline__, __nodebug__))
 _mm512_loadu_pd(double const *__p)
 {
   struct __loadu_pd {
     __m512d __v;
-  } __attribute__((packed, may_alias));
+  } __attribute__((__packed__, __may_alias__));
   return ((struct __loadu_pd*)__p)->__v;
 }
 
@@ -909,10 +933,28 @@
 {
   struct __loadu_ps {
     __m512 __v;
-  } __attribute__((packed, may_alias));
+  } __attribute__((__packed__, __may_alias__));
   return ((struct __loadu_ps*)__p)->__v;
 }
 
+static __inline __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_load_ps(double const *__p)
+{
+  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
+                                                  (__v16sf)
+                                                  _mm512_setzero_ps (),
+                                                  (__mmask16) -1);
+}
+
+static __inline __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_load_pd(float const *__p)
+{
+  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
+                                                   (__v8df)
+                                                   _mm512_setzero_pd (),
+                                                   (__mmask8) -1);
+}
+
 /* SIMD store ops */
 
 static __inline void __attribute__ ((__always_inline__, __nodebug__))
@@ -955,9 +997,9 @@
 }
 
 static __inline void __attribute__ ((__always_inline__, __nodebug__))
-_mm512_store_ps(void *__P, __m512 __A)
+_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
 {
-  *(__m512*)__P = __A;
+  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
 }
 
 static __inline void __attribute__ ((__always_inline__, __nodebug__))
@@ -966,6 +1008,19 @@
   *(__m512d*)__P = __A;
 }
 
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
+{
+  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
+                                   (__mmask16) __U);
+}
+
+static __inline void __attribute__ ((__always_inline__, __nodebug__))
+_mm512_store_ps(void *__P, __m512 __A)
+{
+  *(__m512*)__P = __A;
+}
+
 /* Mask ops */
 
 static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
@@ -988,6 +1043,18 @@
                                                    __u);
 }
 
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
+                                                 __u);
+}
+
 static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
@@ -1000,4 +1067,303 @@
                                                   (__mmask8)-1);
 }
 
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
+                                                   (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
+                                                   __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
+                                                  __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
+  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
+                                                __u);
+}
+
+#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
+  __m512i __a = (a); \
+  __m512i __b = (b); \
+  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+                                         (__mmask16)-1); })
+
+#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
+  __m512i __a = (a); \
+  __m512i __b = (b); \
+  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+                                          (__mmask16)-1); })
+
+#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
+  __m512i __a = (a); \
+  __m512i __b = (b); \
+  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+                                        (__mmask8)-1); })
+
+#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
+  __m512i __a = (a); \
+  __m512i __b = (b); \
+  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+                                         (__mmask8)-1); })
+
+#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
+  __m512i __a = (a); \
+  __m512i __b = (b); \
+  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+                                         (__mmask16)(m)); })
+
+#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
+  __m512i __a = (a); \
+  __m512i __b = (b); \
+  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+                                          (__mmask16)(m)); })
+
+#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
+  __m512i __a = (a); \
+  __m512i __b = (b); \
+  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+                                        (__mmask8)(m)); })
+
+#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
+  __m512i __a = (a); \
+  __m512i __b = (b); \
+  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+                                         (__mmask8)(m)); })
 #endif // __AVX512FINTRIN_H
diff --git a/renderscript/clang-include/avx512vlbwintrin.h b/renderscript/clang-include/avx512vlbwintrin.h
index 11333f8..0746f43 100644
--- a/renderscript/clang-include/avx512vlbwintrin.h
+++ b/renderscript/clang-include/avx512vlbwintrin.h
@@ -42,6 +42,17 @@
                                                    __u);
 }
 
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epu8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 0,
+                                                 __u);
+}
 
 static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
 _mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) {
@@ -55,6 +66,18 @@
                                                    __u);
 }
 
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epu8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 0,
+                                                 __u);
+}
+
 static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_epi16_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqw128_mask((__v8hi)__a, (__v8hi)__b,
@@ -67,6 +90,17 @@
                                                   __u);
 }
 
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epu16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 0,
+                                                __u);
+}
 
 static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
 _mm256_cmpeq_epi16_mask(__m256i __a, __m256i __b) {
@@ -80,4 +114,576 @@
                                                    __u);
 }
 
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epu16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 0,
+                                                 __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epi8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epu8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 5,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epi8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
+                                                (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epu8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 5,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epi16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epu16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epi16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epu16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 5,
+                                                 __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
+                                                   (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_pcmpgtb128_mask((__v16qi)__a, (__v16qi)__b,
+                                                   __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epu8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 6,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
+                                                   (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_pcmpgtb256_mask((__v32qi)__a, (__v32qi)__b,
+                                                   __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epu8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 6,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtw128_mask((__v8hi)__a, (__v8hi)__b,
+                                                  __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epu16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 6,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
+                                                   (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_pcmpgtw256_mask((__v16hi)__a, (__v16hi)__b,
+                                                   __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epu16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 6,
+                                                 __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epi8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epu8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 2,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epi8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
+                                                (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epu8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 2,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epi16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epu16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epi16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epu16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 2,
+                                                 __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epu8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 1,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epi8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
+                                                (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epu8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 1,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epu16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epi16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epu16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 1,
+                                                 __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epi8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epi8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epu8_mask(__m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epu8_mask(__mmask16 __u, __m128i __a, __m128i __b) {
+  return (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)__a, (__v16qi)__b, 4,
+                                                 __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epi8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
+                                                (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epi8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epu8_mask(__m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
+                                                 (__mmask32)-1);
+}
+
+static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epu8_mask(__mmask32 __u, __m256i __a, __m256i __b) {
+  return (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)__a, (__v32qi)__b, 4,
+                                                 __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epi16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epi16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epu16_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epu16_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)__a, (__v8hi)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epi16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
+                                                (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epi16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epu16_mask(__m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
+                                                 (__mmask16)-1);
+}
+
+static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epu16_mask(__mmask16 __u, __m256i __a, __m256i __b) {
+  return (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)__a, (__v16hi)__b, 4,
+                                                 __u);
+}
+
+#define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
+                                         (__v16qi)(__m128i)(b), \
+                                         (p), (__mmask16)-1); })
+
+#define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
+                                         (__v16qi)(__m128i)(b), \
+                                         (p), (__mmask16)(m)); })
+
+#define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
+                                          (__v16qi)(__m128i)(b), \
+                                          (p), (__mmask16)-1); })
+
+#define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
+                                          (__v16qi)(__m128i)(b), \
+                                          (p), (__mmask16)(m)); })
+
+#define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
+                                         (__v32qi)(__m256i)(b), \
+                                         (p), (__mmask32)-1); })
+
+#define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
+                                         (__v32qi)(__m256i)(b), \
+                                         (p), (__mmask32)(m)); })
+
+#define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
+                                          (__v32qi)(__m256i)(b), \
+                                          (p), (__mmask32)-1); })
+
+#define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
+  (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
+                                          (__v32qi)(__m256i)(b), \
+                                          (p), (__mmask32)(m)); })
+
+#define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
+                                        (__v8hi)(__m128i)(b), \
+                                        (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
+                                        (__v8hi)(__m128i)(b), \
+                                        (p), (__mmask8)(m)); })
+
+#define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
+                                         (__v8hi)(__m128i)(b), \
+                                         (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
+                                         (__v8hi)(__m128i)(b), \
+                                         (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
+                                         (__v16hi)(__m256i)(b), \
+                                         (p), (__mmask16)-1); })
+
+#define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
+                                         (__v16hi)(__m256i)(b), \
+                                         (p), (__mmask16)(m)); })
+
+#define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
+                                          (__v16hi)(__m256i)(b), \
+                                          (p), (__mmask16)-1); })
+
+#define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
+  (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
+                                          (__v16hi)(__m256i)(b), \
+                                          (p), (__mmask16)(m)); })
+
 #endif /* __AVX512VLBWINTRIN_H */
diff --git a/renderscript/clang-include/avx512vlintrin.h b/renderscript/clang-include/avx512vlintrin.h
index 8a374b1..b460992 100644
--- a/renderscript/clang-include/avx512vlintrin.h
+++ b/renderscript/clang-include/avx512vlintrin.h
@@ -42,6 +42,17 @@
                                                   __u);
 }
 
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
+                                                __u);
+}
 
 static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
 _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
@@ -56,6 +67,18 @@
 }
 
 static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
                                                   (__mmask8)-1);
@@ -67,6 +90,17 @@
                                                   __u);
 }
 
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
+                                                __u);
+}
 
 static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
 _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
@@ -80,4 +114,580 @@
                                                   __u);
 }
 
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
+                                                __u);
+}
+
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
+                                                __u);
+}
+
+
+
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
+                                                  __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
+                                                  __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
+                                                  __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
+                                                  (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
+                                                  __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epi32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epu32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epi64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmple_epu64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
+                                                __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
+                                               (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
+                                               __u);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
+                                                (__mmask8)-1);
+}
+
+static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
+_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
+  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
+                                                __u);
+}
+
+#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
+                                        (__v4si)(__m128i)(b), \
+                                        (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
+                                        (__v4si)(__m128i)(b), \
+                                        (p), (__mmask8)(m)); })
+
+#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
+                                         (__v4si)(__m128i)(b), \
+                                         (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
+                                         (__v4si)(__m128i)(b), \
+                                         (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
+                                        (__v8si)(__m256i)(b), \
+                                        (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
+                                        (__v8si)(__m256i)(b), \
+                                        (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
+                                         (__v8si)(__m256i)(b), \
+                                         (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
+                                         (__v8si)(__m256i)(b), \
+                                         (p), (__mmask8)(m)); })
+
+#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
+                                        (__v2di)(__m128i)(b), \
+                                        (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
+                                        (__v2di)(__m128i)(b), \
+                                        (p), (__mmask8)(m)); })
+
+#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
+                                         (__v2di)(__m128i)(b), \
+                                         (p), (__mmask8)-1); })
+
+#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
+                                         (__v2di)(__m128i)(b), \
+                                         (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
+                                        (__v4di)(__m256i)(b), \
+                                        (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
+                                        (__v4di)(__m256i)(b), \
+                                        (p), (__mmask8)(m)); })
+
+#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
+                                         (__v4di)(__m256i)(b), \
+                                         (p), (__mmask8)-1); })
+
+#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
+  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
+                                         (__v4di)(__m256i)(b), \
+                                         (p), (__mmask8)(m)); })
+
 #endif /* __AVX512VLINTRIN_H */
diff --git a/renderscript/clang-include/avxintrin.h b/renderscript/clang-include/avxintrin.h
index 4e1044a..d7c7f46 100644
--- a/renderscript/clang-include/avxintrin.h
+++ b/renderscript/clang-include/avxintrin.h
@@ -257,8 +257,7 @@
 static __inline __m256 __attribute__((__always_inline__, __nodebug__))
 _mm256_permutevar_ps(__m256 __a, __m256i __c)
 {
-  return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a,
-						  (__v8si)__c);
+  return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
 }
 
 #define _mm_permute_pd(A, C) __extension__ ({ \
@@ -444,21 +443,21 @@
   (__m128i)__builtin_ia32_vextractf128_si256((__v8si)__A, (O)); })
 
 static __inline int __attribute__((__always_inline__, __nodebug__))
-_mm256_extract_epi32(__m256i __a, int const __imm)
+_mm256_extract_epi32(__m256i __a, const int __imm)
 {
   __v8si __b = (__v8si)__a;
   return __b[__imm & 7];
 }
 
 static __inline int __attribute__((__always_inline__, __nodebug__))
-_mm256_extract_epi16(__m256i __a, int const __imm)
+_mm256_extract_epi16(__m256i __a, const int __imm)
 {
   __v16hi __b = (__v16hi)__a;
   return __b[__imm & 15];
 }
 
 static __inline int __attribute__((__always_inline__, __nodebug__))
-_mm256_extract_epi8(__m256i __a, int const __imm)
+_mm256_extract_epi8(__m256i __a, const int __imm)
 {
   __v32qi __b = (__v32qi)__a;
   return __b[__imm & 31];
@@ -515,7 +514,7 @@
 
 #ifdef __x86_64__
 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
-_mm256_insert_epi64(__m256i __a, int __b, int const __imm)
+_mm256_insert_epi64(__m256i __a, long long __b, int const __imm)
 {
   __v4di __c = (__v4di)__a;
   __c[__imm & 3] = __b;
@@ -785,7 +784,7 @@
 {
   struct __loadu_pd {
     __m256d __v;
-  } __attribute__((packed, may_alias));
+  } __attribute__((__packed__, __may_alias__));
   return ((struct __loadu_pd*)__p)->__v;
 }
 
@@ -794,7 +793,7 @@
 {
   struct __loadu_ps {
     __m256 __v;
-  } __attribute__((packed, may_alias));
+  } __attribute__((__packed__, __may_alias__));
   return ((struct __loadu_ps*)__p)->__v;
 }
 
@@ -809,7 +808,7 @@
 {
   struct __loadu_si256 {
     __m256i __v;
-  } __attribute__((packed, may_alias));
+  } __attribute__((__packed__, __may_alias__));
   return ((struct __loadu_si256*)__p)->__v;
 }
 
@@ -935,23 +934,23 @@
 
 static __inline __m256 __attribute__((__always_inline__, __nodebug__))
 _mm256_set_ps(float __a, float __b, float __c, float __d,
-	            float __e, float __f, float __g, float __h)
+              float __e, float __f, float __g, float __h)
 {
   return (__m256){ __h, __g, __f, __e, __d, __c, __b, __a };
 }
 
 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_set_epi32(int __i0, int __i1, int __i2, int __i3,
-		             int __i4, int __i5, int __i6, int __i7)
+                 int __i4, int __i5, int __i6, int __i7)
 {
   return (__m256i)(__v8si){ __i7, __i6, __i5, __i4, __i3, __i2, __i1, __i0 };
 }
 
 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_set_epi16(short __w15, short __w14, short __w13, short __w12,
-		             short __w11, short __w10, short __w09, short __w08,
-		             short __w07, short __w06, short __w05, short __w04,
-		             short __w03, short __w02, short __w01, short __w00)
+                 short __w11, short __w10, short __w09, short __w08,
+                 short __w07, short __w06, short __w05, short __w04,
+                 short __w03, short __w02, short __w01, short __w00)
 {
   return (__m256i)(__v16hi){ __w00, __w01, __w02, __w03, __w04, __w05, __w06,
     __w07, __w08, __w09, __w10, __w11, __w12, __w13, __w14, __w15 };
@@ -959,13 +958,13 @@
 
 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_set_epi8(char __b31, char __b30, char __b29, char __b28,
-		            char __b27, char __b26, char __b25, char __b24,
-		            char __b23, char __b22, char __b21, char __b20,
-		            char __b19, char __b18, char __b17, char __b16,
-		            char __b15, char __b14, char __b13, char __b12,
-		            char __b11, char __b10, char __b09, char __b08,
-		            char __b07, char __b06, char __b05, char __b04,
-		            char __b03, char __b02, char __b01, char __b00)
+                char __b27, char __b26, char __b25, char __b24,
+                char __b23, char __b22, char __b21, char __b20,
+                char __b19, char __b18, char __b17, char __b16,
+                char __b15, char __b14, char __b13, char __b12,
+                char __b11, char __b10, char __b09, char __b08,
+                char __b07, char __b06, char __b05, char __b04,
+                char __b03, char __b02, char __b01, char __b00)
 {
   return (__m256i)(__v32qi){
     __b00, __b01, __b02, __b03, __b04, __b05, __b06, __b07,
@@ -990,23 +989,23 @@
 
 static __inline __m256 __attribute__((__always_inline__, __nodebug__))
 _mm256_setr_ps(float __a, float __b, float __c, float __d,
-		           float __e, float __f, float __g, float __h)
+               float __e, float __f, float __g, float __h)
 {
   return (__m256){ __a, __b, __c, __d, __e, __f, __g, __h };
 }
 
 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_setr_epi32(int __i0, int __i1, int __i2, int __i3,
-		              int __i4, int __i5, int __i6, int __i7)
+                  int __i4, int __i5, int __i6, int __i7)
 {
   return (__m256i)(__v8si){ __i0, __i1, __i2, __i3, __i4, __i5, __i6, __i7 };
 }
 
 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_setr_epi16(short __w15, short __w14, short __w13, short __w12,
-		   short __w11, short __w10, short __w09, short __w08,
-		   short __w07, short __w06, short __w05, short __w04,
-		   short __w03, short __w02, short __w01, short __w00)
+       short __w11, short __w10, short __w09, short __w08,
+       short __w07, short __w06, short __w05, short __w04,
+       short __w03, short __w02, short __w01, short __w00)
 {
   return (__m256i)(__v16hi){ __w15, __w14, __w13, __w12, __w11, __w10, __w09,
     __w08, __w07, __w06, __w05, __w04, __w03, __w02, __w01, __w00 };
@@ -1014,19 +1013,19 @@
 
 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
 _mm256_setr_epi8(char __b31, char __b30, char __b29, char __b28,
-		             char __b27, char __b26, char __b25, char __b24,
-		             char __b23, char __b22, char __b21, char __b20,
-		             char __b19, char __b18, char __b17, char __b16,
-		             char __b15, char __b14, char __b13, char __b12,
-		             char __b11, char __b10, char __b09, char __b08,
-		             char __b07, char __b06, char __b05, char __b04,
-		             char __b03, char __b02, char __b01, char __b00)
+                 char __b27, char __b26, char __b25, char __b24,
+                 char __b23, char __b22, char __b21, char __b20,
+                 char __b19, char __b18, char __b17, char __b16,
+                 char __b15, char __b14, char __b13, char __b12,
+                 char __b11, char __b10, char __b09, char __b08,
+                 char __b07, char __b06, char __b05, char __b04,
+                 char __b03, char __b02, char __b01, char __b00)
 {
   return (__m256i)(__v32qi){
     __b31, __b30, __b29, __b28, __b27, __b26, __b25, __b24,
-		__b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
-		__b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
-		__b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
+    __b23, __b22, __b21, __b20, __b19, __b18, __b17, __b16,
+    __b15, __b14, __b13, __b12, __b11, __b10, __b09, __b08,
+    __b07, __b06, __b05, __b04, __b03, __b02, __b01, __b00 };
 }
 
 static __inline __m256i __attribute__((__always_inline__, __nodebug__))
@@ -1195,7 +1194,7 @@
 {
   struct __loadu_si128 {
     __m128i __v;
-  } __attribute__((packed, may_alias));
+  } __attribute__((__packed__, __may_alias__));
   __m256i __v256 = _mm256_castsi128_si256(
     ((struct __loadu_si128*)__addr_lo)->__v);
   return _mm256_insertf128_si256(__v256,
diff --git a/renderscript/clang-include/emmintrin.h b/renderscript/clang-include/emmintrin.h
index b3f8569..c764d68 100644
--- a/renderscript/clang-include/emmintrin.h
+++ b/renderscript/clang-include/emmintrin.h
@@ -155,148 +155,148 @@
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 0);
+  return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 1);
+  return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmple_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 2);
+  return (__m128d)__builtin_ia32_cmplepd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__b, __a, 1);
+  return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__b, __a, 2);
+  return (__m128d)__builtin_ia32_cmplepd(__b, __a);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpord_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 7);
+  return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpunord_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 3);
+  return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpneq_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 4);
+  return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 5);
+  return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnle_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__a, __b, 6);
+  return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__b, __a, 5);
+  return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_pd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmppd(__b, __a, 6);
+  return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0);
+  return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1);
+  return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmple_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2);
+  return (__m128d)__builtin_ia32_cmplesd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1);
+  __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2);
+  __m128d __c = __builtin_ia32_cmplesd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpord_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7);
+  return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpunord_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3);
+  return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpneq_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4);
+  return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5);
+  return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnle_sd(__m128d __a, __m128d __b)
 {
-  return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6);
+  return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5);
+  __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_sd(__m128d __a, __m128d __b)
 {
-  __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6);
+  __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
   return (__m128d) { __c[0], __a[1] };
 }
 
@@ -489,7 +489,7 @@
 {
   struct __loadu_pd {
     __m128d __v;
-  } __attribute__((packed, may_alias));
+  } __attribute__((__packed__, __may_alias__));
   return ((struct __loadu_pd*)__dp)->__v;
 }
 
@@ -825,11 +825,28 @@
   return __a ^ __b;
 }
 
-#define _mm_slli_si128(a, count) __extension__ ({ \
-  _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
-  __m128i __a = (a); \
-   _Pragma("clang diagnostic pop"); \
-  (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
+#define _mm_slli_si128(a, imm) __extension__ ({                         \
+  (__m128i)__builtin_shufflevector((__v16qi)_mm_setzero_si128(),        \
+                                   (__v16qi)(__m128i)(a),               \
+                                   ((imm)&0xF0) ? 0 : 16 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 17 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 18 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 19 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 20 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 21 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 22 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 23 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 24 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 25 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 26 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 27 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 28 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 29 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 30 - ((imm)&0xF), \
+                                   ((imm)&0xF0) ? 0 : 31 - ((imm)&0xF)); })
+
+#define _mm_bslli_si128(a, imm) \
+  _mm_slli_si128((a), (imm))
 
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_slli_epi16(__m128i __a, int __count)
@@ -891,12 +908,28 @@
   return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
 }
 
+#define _mm_srli_si128(a, imm) __extension__ ({                          \
+  (__m128i)__builtin_shufflevector((__v16qi)(__m128i)(a),                \
+                                   (__v16qi)_mm_setzero_si128(),         \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 0,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 1,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 2,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 3,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 4,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 5,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 6,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 7,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 8,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 9,  \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 10, \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 11, \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 12, \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 13, \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 14, \
+                                   ((imm)&0xF0) ? 16 : ((imm)&0xF) + 15); })
 
-#define _mm_srli_si128(a, count) __extension__ ({ \
-  _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
-  __m128i __a = (a); \
-  _Pragma("clang diagnostic pop"); \
-  (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
+#define _mm_bsrli_si128(a, imm) \
+  _mm_srli_si128((a), (imm))
 
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_srli_epi16(__m128i __a, int __count)
@@ -1070,7 +1103,7 @@
 {
   struct __loadu_si128 {
     __m128i __v;
-  } __attribute__((packed, may_alias));
+  } __attribute__((__packed__, __may_alias__));
   return ((struct __loadu_si128*)__p)->__v;
 }
 
@@ -1284,27 +1317,21 @@
 }
 
 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
-  _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
-  __m128i __a = (a); \
-  _Pragma("clang diagnostic pop"); \
-  (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
+  (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
+                                   (__v4si)_mm_set1_epi32(0), \
                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
 
 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
-  _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
-  __m128i __a = (a); \
-  _Pragma("clang diagnostic pop"); \
-  (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
+  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
+                                   (__v8hi)_mm_set1_epi16(0), \
                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
                                    4, 5, 6, 7); })
 
 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
-  _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
-  __m128i __a = (a); \
-  _Pragma("clang diagnostic pop"); \
-  (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
+  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
+                                   (__v8hi)_mm_set1_epi16(0), \
                                    0, 1, 2, 3, \
                                    4 + (((imm) & 0x03) >> 0), \
                                    4 + (((imm) & 0x0c) >> 2), \
@@ -1396,11 +1423,8 @@
 }
 
 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
-  _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
-  __m128d __a = (a); \
-  __m128d __b = (b); \
-  _Pragma("clang diagnostic pop"); \
-  __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
+  __builtin_shufflevector((__m128d)(a), (__m128d)(b), \
+                          (i) & 1, (((i) & 2) >> 1) + 2); })
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_castpd_ps(__m128d __a)
diff --git a/renderscript/clang-include/shaintrin.h b/renderscript/clang-include/shaintrin.h
index 66ed055..391a4bb 100644
--- a/renderscript/clang-include/shaintrin.h
+++ b/renderscript/clang-include/shaintrin.h
@@ -38,37 +38,37 @@
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
 {
-  return __builtin_ia32_sha1nexte(__X, __Y);
+  return (__m128i)__builtin_ia32_sha1nexte((__v4si)__X, (__v4si)__Y);
 }
 
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_sha1msg1_epu32(__m128i __X, __m128i __Y)
 {
-  return __builtin_ia32_sha1msg1(__X, __Y);
+  return (__m128i)__builtin_ia32_sha1msg1((__v4si)__X, (__v4si)__Y);
 }
 
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_sha1msg2_epu32(__m128i __X, __m128i __Y)
 {
-  return __builtin_ia32_sha1msg2(__X, __Y);
+  return (__m128i)__builtin_ia32_sha1msg2((__v4si)__X, (__v4si)__Y);
 }
 
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_sha256rnds2_epu32(__m128i __X, __m128i __Y, __m128i __Z)
 {
-  return __builtin_ia32_sha256rnds2(__X, __Y, __Z);
+  return (__m128i)__builtin_ia32_sha256rnds2((__v4si)__X, (__v4si)__Y, (__v4si)__Z);
 }
 
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_sha256msg1_epu32(__m128i __X, __m128i __Y)
 {
-  return __builtin_ia32_sha256msg1(__X, __Y);
+  return (__m128i)__builtin_ia32_sha256msg1((__v4si)__X, (__v4si)__Y);
 }
 
 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
 _mm_sha256msg2_epu32(__m128i __X, __m128i __Y)
 {
-  return __builtin_ia32_sha256msg2(__X, __Y);
+  return (__m128i)__builtin_ia32_sha256msg2((__v4si)__X, (__v4si)__Y);
 }
 
 #endif /* __SHAINTRIN_H */
diff --git a/renderscript/clang-include/stdatomic.h b/renderscript/clang-include/stdatomic.h
index e3c3476..e037987 100644
--- a/renderscript/clang-include/stdatomic.h
+++ b/renderscript/clang-include/stdatomic.h
@@ -71,7 +71,7 @@
 
 /* 7.17.4 Fences */
 
-// These should be provided by the libc implementation.
+/* These should be provided by the libc implementation. */
 void atomic_thread_fence(memory_order);
 void atomic_signal_fence(memory_order);
 
@@ -164,7 +164,7 @@
 
 #define ATOMIC_FLAG_INIT { 0 }
 
-// These should be provided by the libc implementation.
+/* These should be provided by the libc implementation. */
 #ifdef __cplusplus
 bool atomic_flag_test_and_set(volatile atomic_flag *);
 bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order);
diff --git a/renderscript/clang-include/unwind.h b/renderscript/clang-include/unwind.h
index 685c1df..303d792 100644
--- a/renderscript/clang-include/unwind.h
+++ b/renderscript/clang-include/unwind.h
@@ -26,8 +26,8 @@
 #ifndef __CLANG_UNWIND_H
 #define __CLANG_UNWIND_H
 
-#if __has_include_next(<unwind.h>)
-/* Darwin (from 11.x on) and libunwind provide an unwind.h. If that's available,
+#if defined(__APPLE__) && __has_include_next(<unwind.h>)
+/* Darwin (from 11.x on) provide an unwind.h. If that's available,
  * use it. libunwind wraps some of its definitions in #ifdef _GNU_SOURCE,
  * so define that around the include.*/
 # ifndef _GNU_SOURCE
@@ -199,6 +199,8 @@
 
 _Unwind_Word _Unwind_GetCFA(struct _Unwind_Context *);
 
+_Unwind_Word _Unwind_GetBSP(struct _Unwind_Context *);
+
 void *_Unwind_GetLanguageSpecificData(struct _Unwind_Context *);
 
 _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *);
@@ -233,9 +235,9 @@
 #ifdef __APPLE__
 
 _Unwind_Ptr _Unwind_GetDataRelBase(struct _Unwind_Context *)
-    __attribute__((unavailable));
+    __attribute__((__unavailable__));
 _Unwind_Ptr _Unwind_GetTextRelBase(struct _Unwind_Context *)
-    __attribute__((unavailable));
+    __attribute__((__unavailable__));
 
 /* Darwin-specific functions */
 void __register_frame(const void *);
@@ -249,15 +251,15 @@
 void *_Unwind_Find_FDE(const void *, struct dwarf_eh_bases *);
 
 void __register_frame_info_bases(const void *, void *, void *, void *)
-  __attribute__((unavailable));
-void __register_frame_info(const void *, void *) __attribute__((unavailable));
+  __attribute__((__unavailable__));
+void __register_frame_info(const void *, void *) __attribute__((__unavailable__));
 void __register_frame_info_table_bases(const void *, void*, void *, void *)
-  __attribute__((unavailable));
+  __attribute__((__unavailable__));
 void __register_frame_info_table(const void *, void *)
-  __attribute__((unavailable));
-void __register_frame_table(const void *) __attribute__((unavailable));
-void __deregister_frame_info(const void *) __attribute__((unavailable));
-void __deregister_frame_info_bases(const void *)__attribute__((unavailable));
+  __attribute__((__unavailable__));
+void __register_frame_table(const void *) __attribute__((__unavailable__));
+void __deregister_frame_info(const void *) __attribute__((__unavailable__));
+void __deregister_frame_info_bases(const void *)__attribute__((__unavailable__));
 
 #else
 
diff --git a/renderscript/clang-include/xmmintrin.h b/renderscript/clang-include/xmmintrin.h
index c9befcb..d1afe81 100644
--- a/renderscript/clang-include/xmmintrin.h
+++ b/renderscript/clang-include/xmmintrin.h
@@ -182,153 +182,153 @@
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 0);
+  return (__m128)__builtin_ia32_cmpeqss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpeq_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 0);
+  return (__m128)__builtin_ia32_cmpeqps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 1);
+  return (__m128)__builtin_ia32_cmpltss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmplt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 1);
+  return (__m128)__builtin_ia32_cmpltps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmple_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 2);
+  return (__m128)__builtin_ia32_cmpless(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmple_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 2);
+  return (__m128)__builtin_ia32_cmpleps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpss(__b, __a, 1),
+                                         __builtin_ia32_cmpltss(__b, __a),
                                          4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpgt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__b, __a, 1);
+  return (__m128)__builtin_ia32_cmpltps(__b, __a);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpss(__b, __a, 2),
+                                         __builtin_ia32_cmpless(__b, __a),
                                          4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpge_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__b, __a, 2);
+  return (__m128)__builtin_ia32_cmpleps(__b, __a);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpneq_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 4);
+  return (__m128)__builtin_ia32_cmpneqss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpneq_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 4);
+  return (__m128)__builtin_ia32_cmpneqps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnlt_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 5);
+  return (__m128)__builtin_ia32_cmpnltss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnlt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 5);
+  return (__m128)__builtin_ia32_cmpnltps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnle_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 6);
+  return (__m128)__builtin_ia32_cmpnless(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnle_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 6);
+  return (__m128)__builtin_ia32_cmpnleps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpss(__b, __a, 5),
+                                         __builtin_ia32_cmpnltss(__b, __a),
                                          4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpngt_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__b, __a, 5);
+  return (__m128)__builtin_ia32_cmpnltps(__b, __a);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_ss(__m128 __a, __m128 __b)
 {
   return (__m128)__builtin_shufflevector(__a,
-                                         __builtin_ia32_cmpss(__b, __a, 6),
+                                         __builtin_ia32_cmpnless(__b, __a),
                                          4, 1, 2, 3);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpnge_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__b, __a, 6);
+  return (__m128)__builtin_ia32_cmpnleps(__b, __a);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpord_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 7);
+  return (__m128)__builtin_ia32_cmpordss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpord_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 7);
+  return (__m128)__builtin_ia32_cmpordps(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpunord_ss(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpss(__a, __b, 3);
+  return (__m128)__builtin_ia32_cmpunordss(__a, __b);
 }
 
 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
 _mm_cmpunord_ps(__m128 __a, __m128 __b)
 {
-  return (__m128)__builtin_ia32_cmpps(__a, __b, 3);
+  return (__m128)__builtin_ia32_cmpunordps(__a, __b);
 }
 
 static __inline__ int __attribute__((__always_inline__, __nodebug__))
diff --git a/renderscript/include/rs_allocation.rsh b/renderscript/include/rs_allocation.rsh
index 6f3f8d9..2cc2a82 100644
--- a/renderscript/include/rs_allocation.rsh
+++ b/renderscript/include/rs_allocation.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,418 +14,3206 @@
  * limitations under the License.
  */
 
-/** @file rs_allocation.rsh
- *  \brief Allocation routines
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_allocation.rsh: Allocation routines
  *
- *
+ * Functions that can be used to query the characteristics of an allocation,
+ * to set and get elements of the allocation.
  */
+#ifndef RENDERSCRIPT_RS_ALLOCATION_RSH
+#define RENDERSCRIPT_RS_ALLOCATION_RSH
 
-#ifndef __RS_ALLOCATION_RSH__
-#define __RS_ALLOCATION_RSH__
-
-/**
- * Returns the Allocation for a given pointer.  The pointer should point within
- * a valid allocation.  The results are undefined if the pointer is not from a
- * valid allocation.
+/*
+ * rsAllocationCopy1DRange: Copy consecutive values between allocations
  *
- * This function is deprecated and will be removed in the SDK from a future
- * release.
- */
-extern rs_allocation __attribute__((overloadable))
-    rsGetAllocation(const void *);
-
-/**
- * Query the dimension of an allocation.
+ * Copies part of an allocation into another allocation.
  *
- * @return uint32_t The X dimension of the allocation.
- */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimX(rs_allocation);
-
-/**
- * Query the dimension of an allocation.
+ * The two allocations must be different.  Using this function to copy whithin
+ * the same allocation yields undefined results.
  *
- * @return uint32_t The Y dimension of the allocation.
+ * Parameters:
+ *   dstAlloc Allocation to copy data into.
+ *   dstOff The offset of the first element to be copied in the destination allocation.
+ *   dstMip Mip level in the destination allocation.
+ *   count The number of elements to be copied.
+ *   srcAlloc The source data allocation.
+ *   srcOff The offset of the first element in data to be copied in the source allocation.
+ *   srcMip Mip level in the source allocation.
  */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimY(rs_allocation);
-
-/**
- * Query the dimension of an allocation.
- *
- * @return uint32_t The Z dimension of the allocation.
- */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimZ(rs_allocation);
-
-/**
- * Query an allocation for the presence of more than one LOD.
- *
- * @return uint32_t Returns 1 if more than one LOD is present, 0 otherwise.
- */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimLOD(rs_allocation);
-
-/**
- * Query an allocation for the presence of more than one face.
- *
- * @return uint32_t Returns 1 if more than one face is present, 0 otherwise.
- */
-extern uint32_t __attribute__((overloadable))
-    rsAllocationGetDimFaces(rs_allocation);
-
 #if (defined(RS_VERSION) && (RS_VERSION >= 14))
-
-/**
- * Copy part of an allocation from another allocation.
- *
- * @param dstAlloc Allocation to copy data into.
- * @param dstOff The offset of the first element to be copied in
- *               the destination allocation.
- * @param dstMip Mip level in the destination allocation.
- * @param count The number of elements to be copied.
- * @param srcAlloc The source data allocation.
- * @param srcOff The offset of the first element in data to be
- *               copied in the source allocation.
- * @param srcMip Mip level in the source allocation.
- */
 extern void __attribute__((overloadable))
-    rsAllocationCopy1DRange(rs_allocation dstAlloc,
-                            uint32_t dstOff, uint32_t dstMip,
-                            uint32_t count,
-                            rs_allocation srcAlloc,
-                            uint32_t srcOff, uint32_t srcMip);
-
-/**
- * Copy a rectangular region into the allocation from another
- * allocation.
- *
- * @param dstAlloc allocation to copy data into.
- * @param dstXoff X offset of the region to update in the
- *                destination allocation.
- * @param dstYoff Y offset of the region to update in the
- *                destination allocation.
- * @param dstMip Mip level in the destination allocation.
- * @param dstFace Cubemap face of the destination allocation,
- *                ignored for allocations that aren't cubemaps.
- * @param width Width of the incoming region to update.
- * @param height Height of the incoming region to update.
- * @param srcAlloc The source data allocation.
- * @param srcXoff X offset in data of the source allocation.
- * @param srcYoff Y offset in data of the source allocation.
- * @param srcMip Mip level in the source allocation.
- * @param srcFace Cubemap face of the source allocation,
- *                ignored for allocations that aren't cubemaps.
- */
-extern void __attribute__((overloadable))
-    rsAllocationCopy2DRange(rs_allocation dstAlloc,
-                            uint32_t dstXoff, uint32_t dstYoff,
-                            uint32_t dstMip,
-                            rs_allocation_cubemap_face dstFace,
-                            uint32_t width, uint32_t height,
-                            rs_allocation srcAlloc,
-                            uint32_t srcXoff, uint32_t srcYoff,
-                            uint32_t srcMip,
-                            rs_allocation_cubemap_face srcFace);
-
-#endif //defined(RS_VERSION) && (RS_VERSION >= 14)
-
-/**
- * Extract a single element from an allocation.
- */
-extern const void * __attribute__((overloadable))
-    rsGetElementAt(rs_allocation a, uint32_t x);
-/**
- * \overload
- */
-extern const void * __attribute__((overloadable))
-    rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y);
-/**
- * \overload
- */
-extern const void * __attribute__((overloadable))
-    rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
-
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-    #define GET_ELEMENT_AT(T) \
-    extern T __attribute__((overloadable)) \
-            rsGetElementAt_##T(rs_allocation a, uint32_t x); \
-    extern T __attribute__((overloadable)) \
-            rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y);  \
-    extern T __attribute__((overloadable)) \
-            rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
-#else
-    #define GET_ELEMENT_AT(T) \
-    static inline T __attribute__((overloadable)) \
-            rsGetElementAt_##T(rs_allocation a, uint32_t x) {  \
-        return ((T *)rsGetElementAt(a, x))[0]; \
-    } \
-    static inline T __attribute__((overloadable)) \
-            rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y) {  \
-        return ((T *)rsGetElementAt(a, x, y))[0]; \
-    } \
-    static inline T __attribute__((overloadable)) \
-            rsGetElementAt_##T(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {  \
-        return ((T *)rsGetElementAt(a, x, y, z))[0]; \
-    }
+    rsAllocationCopy1DRange(rs_allocation dstAlloc, uint32_t dstOff, uint32_t dstMip, uint32_t count,
+                            rs_allocation srcAlloc, uint32_t srcOff, uint32_t srcMip);
 #endif
 
-GET_ELEMENT_AT(char)
-GET_ELEMENT_AT(char2)
-GET_ELEMENT_AT(char3)
-GET_ELEMENT_AT(char4)
-GET_ELEMENT_AT(uchar)
-GET_ELEMENT_AT(uchar2)
-GET_ELEMENT_AT(uchar3)
-GET_ELEMENT_AT(uchar4)
-GET_ELEMENT_AT(short)
-GET_ELEMENT_AT(short2)
-GET_ELEMENT_AT(short3)
-GET_ELEMENT_AT(short4)
-GET_ELEMENT_AT(ushort)
-GET_ELEMENT_AT(ushort2)
-GET_ELEMENT_AT(ushort3)
-GET_ELEMENT_AT(ushort4)
-GET_ELEMENT_AT(int)
-GET_ELEMENT_AT(int2)
-GET_ELEMENT_AT(int3)
-GET_ELEMENT_AT(int4)
-GET_ELEMENT_AT(uint)
-GET_ELEMENT_AT(uint2)
-GET_ELEMENT_AT(uint3)
-GET_ELEMENT_AT(uint4)
-GET_ELEMENT_AT(long)
-GET_ELEMENT_AT(long2)
-GET_ELEMENT_AT(long3)
-GET_ELEMENT_AT(long4)
-GET_ELEMENT_AT(ulong)
-GET_ELEMENT_AT(ulong2)
-GET_ELEMENT_AT(ulong3)
-GET_ELEMENT_AT(ulong4)
-GET_ELEMENT_AT(float)
-GET_ELEMENT_AT(float2)
-GET_ELEMENT_AT(float3)
-GET_ELEMENT_AT(float4)
-GET_ELEMENT_AT(double)
-GET_ELEMENT_AT(double2)
-GET_ELEMENT_AT(double3)
-GET_ELEMENT_AT(double4)
-
-#undef GET_ELEMENT_AT
-
-// Jelly Bean
-#if (defined(RS_VERSION) && (RS_VERSION >= 16))
-
-/**
- * Send the contents of the Allocation to the queue.
- * @param a allocation to work on
+/*
+ * rsAllocationCopy2DRange: Copy a rectangular region between allocations
+ *
+ * Copy a rectangular region into the allocation from another allocation.
+ *
+ * The two allocations must be different.  Using this function to copy whithin
+ * the same allocation yields undefined results.
+ *
+ * Parameters:
+ *   dstAlloc Allocation to copy data into.
+ *   dstXoff X offset of the region to update in the destination allocation.
+ *   dstYoff Y offset of the region to update in the destination allocation.
+ *   dstMip Mip level in the destination allocation.
+ *   dstFace Cubemap face of the destination allocation, ignored for allocations that aren't cubemaps.
+ *   width Width of the incoming region to update.
+ *   height Height of the incoming region to update.
+ *   srcAlloc The source data allocation.
+ *   srcXoff X offset in data of the source allocation.
+ *   srcYoff Y offset in data of the source allocation.
+ *   srcMip Mip level in the source allocation.
+ *   srcFace Cubemap face of the source allocation, ignored for allocations that aren't cubemaps.
  */
-extern const void __attribute__((overloadable))
-    rsAllocationIoSend(rs_allocation a);
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern void __attribute__((overloadable))
+    rsAllocationCopy2DRange(rs_allocation dstAlloc, uint32_t dstXoff, uint32_t dstYoff,
+                            uint32_t dstMip, rs_allocation_cubemap_face dstFace, uint32_t width,
+                            uint32_t height, rs_allocation srcAlloc, uint32_t srcXoff,
+                            uint32_t srcYoff, uint32_t srcMip, rs_allocation_cubemap_face srcFace);
+#endif
 
-/**
- * Receive a new set of contents from the queue.
- * @param a allocation to work on
+/*
+ * rsAllocationGetDimFaces: Presence of more than one face
+ *
+ * If the allocation is a cubemap, this function returns 1 if there's more than one
+ * face present.  In all other cases, it returns 0.
+ *
+ * Returns: Returns 1 if more than one face is present, 0 otherwise.
  */
-extern const void __attribute__((overloadable))
-    rsAllocationIoReceive(rs_allocation a);
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimFaces(rs_allocation a);
 
+/*
+ * rsAllocationGetDimLOD: Presence of levels of details
+ *
+ * Query an allocation for the presence of more than one Level Of Details.  This is useful for mipmaps.
+ *
+ * Returns: Returns 1 if more than one LOD is present, 0 otherwise.
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimLOD(rs_allocation a);
 
-/**
+/*
+ * rsAllocationGetDimX: Size of the X dimension
+ *
+ * Returns the size of the X dimension of the allocation.
+ *
+ * Returns: The X dimension of the allocation.
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimX(rs_allocation a);
+
+/*
+ * rsAllocationGetDimY: Size of the Y dimension
+ *
+ * Returns the size of the Y dimension of the allocation.
+ * If the allocation has less than two dimensions, returns 0.
+ *
+ * Returns: The Y dimension of the allocation.
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimY(rs_allocation a);
+
+/*
+ * rsAllocationGetDimZ: Size of the Z dimension
+ *
+ * Returns the size of the Z dimension of the allocation.
+ * If the allocation has less than three dimensions, returns 0.
+ *
+ * Returns: The Z dimension of the allocation.
+ */
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimZ(rs_allocation a);
+
+/*
  * Get the element object describing the allocation's layout
- * @param a allocation to get data from
- * @return element describing allocation layout
+ *
+ * Parameters:
+ *   a allocation to get data from
+ *
+ * Returns: element describing allocation layout
  */
 extern rs_element __attribute__((overloadable))
     rsAllocationGetElement(rs_allocation a);
 
-/**
- * Fetch allocation in a way described by the sampler
- * @param a 1D allocation to sample from
- * @param s sampler state
- * @param location to sample from
- */
-extern const float4 __attribute__((overloadable))
-    rsSample(rs_allocation a, rs_sampler s, float location);
-/**
- * Fetch allocation in a way described by the sampler
- * @param a 1D allocation to sample from
- * @param s sampler state
- * @param location to sample from
- * @param lod mip level to sample from, for fractional values
- *            mip levels will be interpolated if
- *            RS_SAMPLER_LINEAR_MIP_LINEAR is used
- */
-extern const float4 __attribute__((overloadable))
-    rsSample(rs_allocation a, rs_sampler s, float location, float lod);
-
-/**
- * Fetch allocation in a way described by the sampler
- * @param a 2D allocation to sample from
- * @param s sampler state
- * @param location to sample from
- */
-extern const float4 __attribute__((overloadable))
-    rsSample(rs_allocation a, rs_sampler s, float2 location);
-
-/**
- * Fetch allocation in a way described by the sampler
- * @param a 2D allocation to sample from
- * @param s sampler state
- * @param location to sample from
- * @param lod mip level to sample from, for fractional values
- *            mip levels will be interpolated if
- *            RS_SAMPLER_LINEAR_MIP_LINEAR is used
- */
-extern const float4 __attribute__((overloadable))
-    rsSample(rs_allocation a, rs_sampler s, float2 location, float lod);
-
-#endif // (defined(RS_VERSION) && (RS_VERSION >= 16))
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-
-/**
- * Set single element of an allocation.
- */
-extern void __attribute__((overloadable))
-    rsSetElementAt(rs_allocation a, void* ptr, uint32_t x);
-
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetElementAt(rs_allocation a, void* ptr, uint32_t x, uint32_t y);
-
-#define SET_ELEMENT_AT(T)                                               \
-    extern void __attribute__((overloadable))                           \
-    rsSetElementAt_##T(rs_allocation a, T val, uint32_t x);             \
-    extern void __attribute__((overloadable))                           \
-    rsSetElementAt_##T(rs_allocation a, T val, uint32_t x, uint32_t y); \
-    extern void __attribute__((overloadable))                           \
-    rsSetElementAt_##T(rs_allocation a, T val, uint32_t x, uint32_t y, uint32_t z);
-
-
-SET_ELEMENT_AT(char)
-SET_ELEMENT_AT(char2)
-SET_ELEMENT_AT(char3)
-SET_ELEMENT_AT(char4)
-SET_ELEMENT_AT(uchar)
-SET_ELEMENT_AT(uchar2)
-SET_ELEMENT_AT(uchar3)
-SET_ELEMENT_AT(uchar4)
-SET_ELEMENT_AT(short)
-SET_ELEMENT_AT(short2)
-SET_ELEMENT_AT(short3)
-SET_ELEMENT_AT(short4)
-SET_ELEMENT_AT(ushort)
-SET_ELEMENT_AT(ushort2)
-SET_ELEMENT_AT(ushort3)
-SET_ELEMENT_AT(ushort4)
-SET_ELEMENT_AT(int)
-SET_ELEMENT_AT(int2)
-SET_ELEMENT_AT(int3)
-SET_ELEMENT_AT(int4)
-SET_ELEMENT_AT(uint)
-SET_ELEMENT_AT(uint2)
-SET_ELEMENT_AT(uint3)
-SET_ELEMENT_AT(uint4)
-SET_ELEMENT_AT(long)
-SET_ELEMENT_AT(long2)
-SET_ELEMENT_AT(long3)
-SET_ELEMENT_AT(long4)
-SET_ELEMENT_AT(ulong)
-SET_ELEMENT_AT(ulong2)
-SET_ELEMENT_AT(ulong3)
-SET_ELEMENT_AT(ulong4)
-SET_ELEMENT_AT(float)
-SET_ELEMENT_AT(float2)
-SET_ELEMENT_AT(float3)
-SET_ELEMENT_AT(float4)
-SET_ELEMENT_AT(double)
-SET_ELEMENT_AT(double2)
-SET_ELEMENT_AT(double3)
-SET_ELEMENT_AT(double4)
-
-#undef SET_ELEMENT_AT
-
-
-/**
- * Extract a single element from an allocation.
- */
-extern const uchar __attribute__((overloadable))
-    rsGetElementAtYuv_uchar_Y(rs_allocation a, uint32_t x, uint32_t y);
-
-/**
- * Extract a single element from an allocation.
+/*
+ * rsAllocationIoReceive: Receive new content from the queue
  *
- * Coordinates are in the dimensions of the Y plane
- */
-extern const uchar __attribute__((overloadable))
-    rsGetElementAtYuv_uchar_U(rs_allocation a, uint32_t x, uint32_t y);
-
-/**
- * Extract a single element from an allocation.
+ * Receive a new set of contents from the queue.
  *
- * Coordinates are in the dimensions of the Y plane
+ * Parameters:
+ *   a allocation to work on
  */
-extern const uchar __attribute__((overloadable))
-    rsGetElementAtYuv_uchar_V(rs_allocation a, uint32_t x, uint32_t y);
-
-#endif // (defined(RS_VERSION) && (RS_VERSION >= 18))
-
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 999))
-
-#define VOP(T)                                                                   \
-    extern T __attribute__((overloadable))                                       \
-    rsAllocationVLoadX_##T(rs_allocation a, uint32_t x);                         \
-    extern T __attribute__((overloadable))                                       \
-    rsAllocationVLoadX_##T(rs_allocation a, uint32_t x, uint32_t y);             \
-    extern T __attribute__((overloadable))                                       \
-    rsAllocationVLoadX_##T(rs_allocation a, uint32_t x, uint32_t y, uint32_t z); \
-    extern void __attribute__((overloadable))                                    \
-    rsAllocationVStoreX_##T(rs_allocation a, T val, uint32_t x);                 \
-    extern void __attribute__((overloadable))                                    \
-    rsAllocationVStoreX_##T(rs_allocation a, T val, uint32_t x, uint32_t y);     \
-    extern void __attribute__((overloadable))                                    \
-    rsAllocationVStoreX_##T(rs_allocation a, T val, uint32_t x, uint32_t y, uint32_t z);
-
-VOP(char2)
-VOP(char3)
-VOP(char4)
-VOP(uchar2)
-VOP(uchar3)
-VOP(uchar4)
-VOP(short2)
-VOP(short3)
-VOP(short4)
-VOP(ushort2)
-VOP(ushort3)
-VOP(ushort4)
-VOP(int2)
-VOP(int3)
-VOP(int4)
-VOP(uint2)
-VOP(uint3)
-VOP(uint4)
-VOP(long2)
-VOP(long3)
-VOP(long4)
-VOP(ulong2)
-VOP(ulong3)
-VOP(ulong4)
-VOP(float2)
-VOP(float3)
-VOP(float4)
-VOP(double2)
-VOP(double3)
-VOP(double4)
-
-#undef VOP
-
-#endif //(defined(RS_VERSION) && (RS_VERSION >= 999))
-
-
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern void __attribute__((overloadable))
+    rsAllocationIoReceive(rs_allocation a);
 #endif
 
+/*
+ * rsAllocationIoSend: Send new content to the queue
+ *
+ * Send the contents of the Allocation to the queue.
+ *
+ * Parameters:
+ *   a allocation to work on
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern void __attribute__((overloadable))
+    rsAllocationIoSend(rs_allocation a);
+#endif
+
+/*
+ * Get a single element from an allocation.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern float2 __attribute__((overloadable))
+    rsAllocationVLoadX_float2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern float3 __attribute__((overloadable))
+    rsAllocationVLoadX_float3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern float4 __attribute__((overloadable))
+    rsAllocationVLoadX_float4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern double2 __attribute__((overloadable))
+    rsAllocationVLoadX_double2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern double3 __attribute__((overloadable))
+    rsAllocationVLoadX_double3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern double4 __attribute__((overloadable))
+    rsAllocationVLoadX_double4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern char2 __attribute__((overloadable))
+    rsAllocationVLoadX_char2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern char3 __attribute__((overloadable))
+    rsAllocationVLoadX_char3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern char4 __attribute__((overloadable))
+    rsAllocationVLoadX_char4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uchar2 __attribute__((overloadable))
+    rsAllocationVLoadX_uchar2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uchar3 __attribute__((overloadable))
+    rsAllocationVLoadX_uchar3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uchar4 __attribute__((overloadable))
+    rsAllocationVLoadX_uchar4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern short2 __attribute__((overloadable))
+    rsAllocationVLoadX_short2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern short3 __attribute__((overloadable))
+    rsAllocationVLoadX_short3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern short4 __attribute__((overloadable))
+    rsAllocationVLoadX_short4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ushort2 __attribute__((overloadable))
+    rsAllocationVLoadX_ushort2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ushort3 __attribute__((overloadable))
+    rsAllocationVLoadX_ushort3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ushort4 __attribute__((overloadable))
+    rsAllocationVLoadX_ushort4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern int2 __attribute__((overloadable))
+    rsAllocationVLoadX_int2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern int3 __attribute__((overloadable))
+    rsAllocationVLoadX_int3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern int4 __attribute__((overloadable))
+    rsAllocationVLoadX_int4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uint2 __attribute__((overloadable))
+    rsAllocationVLoadX_uint2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uint3 __attribute__((overloadable))
+    rsAllocationVLoadX_uint3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uint4 __attribute__((overloadable))
+    rsAllocationVLoadX_uint4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern long2 __attribute__((overloadable))
+    rsAllocationVLoadX_long2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern long3 __attribute__((overloadable))
+    rsAllocationVLoadX_long3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern long4 __attribute__((overloadable))
+    rsAllocationVLoadX_long4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ulong2 __attribute__((overloadable))
+    rsAllocationVLoadX_ulong2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ulong3 __attribute__((overloadable))
+    rsAllocationVLoadX_ulong3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ulong4 __attribute__((overloadable))
+    rsAllocationVLoadX_ulong4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern float2 __attribute__((overloadable))
+    rsAllocationVLoadX_float2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern float3 __attribute__((overloadable))
+    rsAllocationVLoadX_float3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern float4 __attribute__((overloadable))
+    rsAllocationVLoadX_float4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern double2 __attribute__((overloadable))
+    rsAllocationVLoadX_double2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern double3 __attribute__((overloadable))
+    rsAllocationVLoadX_double3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern double4 __attribute__((overloadable))
+    rsAllocationVLoadX_double4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern char2 __attribute__((overloadable))
+    rsAllocationVLoadX_char2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern char3 __attribute__((overloadable))
+    rsAllocationVLoadX_char3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern char4 __attribute__((overloadable))
+    rsAllocationVLoadX_char4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uchar2 __attribute__((overloadable))
+    rsAllocationVLoadX_uchar2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uchar3 __attribute__((overloadable))
+    rsAllocationVLoadX_uchar3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uchar4 __attribute__((overloadable))
+    rsAllocationVLoadX_uchar4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern short2 __attribute__((overloadable))
+    rsAllocationVLoadX_short2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern short3 __attribute__((overloadable))
+    rsAllocationVLoadX_short3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern short4 __attribute__((overloadable))
+    rsAllocationVLoadX_short4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ushort2 __attribute__((overloadable))
+    rsAllocationVLoadX_ushort2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ushort3 __attribute__((overloadable))
+    rsAllocationVLoadX_ushort3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ushort4 __attribute__((overloadable))
+    rsAllocationVLoadX_ushort4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern int2 __attribute__((overloadable))
+    rsAllocationVLoadX_int2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern int3 __attribute__((overloadable))
+    rsAllocationVLoadX_int3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern int4 __attribute__((overloadable))
+    rsAllocationVLoadX_int4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uint2 __attribute__((overloadable))
+    rsAllocationVLoadX_uint2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uint3 __attribute__((overloadable))
+    rsAllocationVLoadX_uint3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uint4 __attribute__((overloadable))
+    rsAllocationVLoadX_uint4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern long2 __attribute__((overloadable))
+    rsAllocationVLoadX_long2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern long3 __attribute__((overloadable))
+    rsAllocationVLoadX_long3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern long4 __attribute__((overloadable))
+    rsAllocationVLoadX_long4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ulong2 __attribute__((overloadable))
+    rsAllocationVLoadX_ulong2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ulong3 __attribute__((overloadable))
+    rsAllocationVLoadX_ulong3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ulong4 __attribute__((overloadable))
+    rsAllocationVLoadX_ulong4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern float2 __attribute__((overloadable))
+    rsAllocationVLoadX_float2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern float3 __attribute__((overloadable))
+    rsAllocationVLoadX_float3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern float4 __attribute__((overloadable))
+    rsAllocationVLoadX_float4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern double2 __attribute__((overloadable))
+    rsAllocationVLoadX_double2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern double3 __attribute__((overloadable))
+    rsAllocationVLoadX_double3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern double4 __attribute__((overloadable))
+    rsAllocationVLoadX_double4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern char2 __attribute__((overloadable))
+    rsAllocationVLoadX_char2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern char3 __attribute__((overloadable))
+    rsAllocationVLoadX_char3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern char4 __attribute__((overloadable))
+    rsAllocationVLoadX_char4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uchar2 __attribute__((overloadable))
+    rsAllocationVLoadX_uchar2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uchar3 __attribute__((overloadable))
+    rsAllocationVLoadX_uchar3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uchar4 __attribute__((overloadable))
+    rsAllocationVLoadX_uchar4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern short2 __attribute__((overloadable))
+    rsAllocationVLoadX_short2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern short3 __attribute__((overloadable))
+    rsAllocationVLoadX_short3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern short4 __attribute__((overloadable))
+    rsAllocationVLoadX_short4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ushort2 __attribute__((overloadable))
+    rsAllocationVLoadX_ushort2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ushort3 __attribute__((overloadable))
+    rsAllocationVLoadX_ushort3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ushort4 __attribute__((overloadable))
+    rsAllocationVLoadX_ushort4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern int2 __attribute__((overloadable))
+    rsAllocationVLoadX_int2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern int3 __attribute__((overloadable))
+    rsAllocationVLoadX_int3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern int4 __attribute__((overloadable))
+    rsAllocationVLoadX_int4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uint2 __attribute__((overloadable))
+    rsAllocationVLoadX_uint2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uint3 __attribute__((overloadable))
+    rsAllocationVLoadX_uint3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern uint4 __attribute__((overloadable))
+    rsAllocationVLoadX_uint4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern long2 __attribute__((overloadable))
+    rsAllocationVLoadX_long2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern long3 __attribute__((overloadable))
+    rsAllocationVLoadX_long3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern long4 __attribute__((overloadable))
+    rsAllocationVLoadX_long4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ulong2 __attribute__((overloadable))
+    rsAllocationVLoadX_ulong2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ulong3 __attribute__((overloadable))
+    rsAllocationVLoadX_ulong3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern ulong4 __attribute__((overloadable))
+    rsAllocationVLoadX_ulong4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+/*
+ * Set a single element of an allocation.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_float2(rs_allocation a, float2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_float3(rs_allocation a, float3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_float4(rs_allocation a, float4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_double2(rs_allocation a, double2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_double3(rs_allocation a, double3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_double4(rs_allocation a, double4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_char2(rs_allocation a, char2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_char3(rs_allocation a, char3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_char4(rs_allocation a, char4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uchar2(rs_allocation a, uchar2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uchar3(rs_allocation a, uchar3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uchar4(rs_allocation a, uchar4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_short2(rs_allocation a, short2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_short3(rs_allocation a, short3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_short4(rs_allocation a, short4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ushort2(rs_allocation a, ushort2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ushort3(rs_allocation a, ushort3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ushort4(rs_allocation a, ushort4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_int2(rs_allocation a, int2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_int3(rs_allocation a, int3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_int4(rs_allocation a, int4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uint2(rs_allocation a, uint2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uint3(rs_allocation a, uint3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uint4(rs_allocation a, uint4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_long2(rs_allocation a, long2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_long3(rs_allocation a, long3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_long4(rs_allocation a, long4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ulong2(rs_allocation a, ulong2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ulong3(rs_allocation a, ulong3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ulong4(rs_allocation a, ulong4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_float2(rs_allocation a, float2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_float3(rs_allocation a, float3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_float4(rs_allocation a, float4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_double2(rs_allocation a, double2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_double3(rs_allocation a, double3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_double4(rs_allocation a, double4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_char2(rs_allocation a, char2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_char3(rs_allocation a, char3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_char4(rs_allocation a, char4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uchar2(rs_allocation a, uchar2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uchar3(rs_allocation a, uchar3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uchar4(rs_allocation a, uchar4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_short2(rs_allocation a, short2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_short3(rs_allocation a, short3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_short4(rs_allocation a, short4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ushort2(rs_allocation a, ushort2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ushort3(rs_allocation a, ushort3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ushort4(rs_allocation a, ushort4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_int2(rs_allocation a, int2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_int3(rs_allocation a, int3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_int4(rs_allocation a, int4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uint2(rs_allocation a, uint2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uint3(rs_allocation a, uint3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uint4(rs_allocation a, uint4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_long2(rs_allocation a, long2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_long3(rs_allocation a, long3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_long4(rs_allocation a, long4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ulong2(rs_allocation a, ulong2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ulong3(rs_allocation a, ulong3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ulong4(rs_allocation a, ulong4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_float2(rs_allocation a, float2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_float3(rs_allocation a, float3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_float4(rs_allocation a, float4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_double2(rs_allocation a, double2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_double3(rs_allocation a, double3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_double4(rs_allocation a, double4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_char2(rs_allocation a, char2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_char3(rs_allocation a, char3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_char4(rs_allocation a, char4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uchar2(rs_allocation a, uchar2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uchar3(rs_allocation a, uchar3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uchar4(rs_allocation a, uchar4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_short2(rs_allocation a, short2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_short3(rs_allocation a, short3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_short4(rs_allocation a, short4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ushort2(rs_allocation a, ushort2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ushort3(rs_allocation a, ushort3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ushort4(rs_allocation a, ushort4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_int2(rs_allocation a, int2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_int3(rs_allocation a, int3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_int4(rs_allocation a, int4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uint2(rs_allocation a, uint2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uint3(rs_allocation a, uint3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_uint4(rs_allocation a, uint4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_long2(rs_allocation a, long2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_long3(rs_allocation a, long3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_long4(rs_allocation a, long4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ulong2(rs_allocation a, ulong2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ulong3(rs_allocation a, ulong3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 22))
+extern void __attribute__((overloadable))
+    rsAllocationVStoreX_ulong4(rs_allocation a, ulong4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+/*
+ * rsGetAllocation: Returns the Allocation for a given pointer
+ *
+ * Returns the Allocation for a given pointer.  The pointer should point within
+ * a valid allocation.  The results are undefined if the pointer is not from a
+ * valid allocation.
+ *
+ * This function is deprecated and will be removed from the SDK in a future
+ * release.
+ */
+extern rs_allocation __attribute__((overloadable))
+    rsGetAllocation(const void* p);
+
+/*
+ * rsGetElementAt: Get an element
+ *
+ * Extract a single element from an allocation.
+ */
+extern const void* __attribute__((overloadable))
+    rsGetElementAt(rs_allocation a, uint32_t x);
+
+extern const void* __attribute__((overloadable))
+    rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y);
+
+extern const void* __attribute__((overloadable))
+    rsGetElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float __attribute__((overloadable))
+    rsGetElementAt_float(rs_allocation a, uint32_t x) {
+    return ((float *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float2 __attribute__((overloadable))
+    rsGetElementAt_float2(rs_allocation a, uint32_t x) {
+    return ((float2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float3 __attribute__((overloadable))
+    rsGetElementAt_float3(rs_allocation a, uint32_t x) {
+    return ((float3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float4 __attribute__((overloadable))
+    rsGetElementAt_float4(rs_allocation a, uint32_t x) {
+    return ((float4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double __attribute__((overloadable))
+    rsGetElementAt_double(rs_allocation a, uint32_t x) {
+    return ((double *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double2 __attribute__((overloadable))
+    rsGetElementAt_double2(rs_allocation a, uint32_t x) {
+    return ((double2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double3 __attribute__((overloadable))
+    rsGetElementAt_double3(rs_allocation a, uint32_t x) {
+    return ((double3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double4 __attribute__((overloadable))
+    rsGetElementAt_double4(rs_allocation a, uint32_t x) {
+    return ((double4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char __attribute__((overloadable))
+    rsGetElementAt_char(rs_allocation a, uint32_t x) {
+    return ((char *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char2 __attribute__((overloadable))
+    rsGetElementAt_char2(rs_allocation a, uint32_t x) {
+    return ((char2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char3 __attribute__((overloadable))
+    rsGetElementAt_char3(rs_allocation a, uint32_t x) {
+    return ((char3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char4 __attribute__((overloadable))
+    rsGetElementAt_char4(rs_allocation a, uint32_t x) {
+    return ((char4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar __attribute__((overloadable))
+    rsGetElementAt_uchar(rs_allocation a, uint32_t x) {
+    return ((uchar *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar2 __attribute__((overloadable))
+    rsGetElementAt_uchar2(rs_allocation a, uint32_t x) {
+    return ((uchar2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar3 __attribute__((overloadable))
+    rsGetElementAt_uchar3(rs_allocation a, uint32_t x) {
+    return ((uchar3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar4 __attribute__((overloadable))
+    rsGetElementAt_uchar4(rs_allocation a, uint32_t x) {
+    return ((uchar4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short __attribute__((overloadable))
+    rsGetElementAt_short(rs_allocation a, uint32_t x) {
+    return ((short *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short2 __attribute__((overloadable))
+    rsGetElementAt_short2(rs_allocation a, uint32_t x) {
+    return ((short2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short3 __attribute__((overloadable))
+    rsGetElementAt_short3(rs_allocation a, uint32_t x) {
+    return ((short3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short4 __attribute__((overloadable))
+    rsGetElementAt_short4(rs_allocation a, uint32_t x) {
+    return ((short4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort __attribute__((overloadable))
+    rsGetElementAt_ushort(rs_allocation a, uint32_t x) {
+    return ((ushort *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort2 __attribute__((overloadable))
+    rsGetElementAt_ushort2(rs_allocation a, uint32_t x) {
+    return ((ushort2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort3 __attribute__((overloadable))
+    rsGetElementAt_ushort3(rs_allocation a, uint32_t x) {
+    return ((ushort3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort4 __attribute__((overloadable))
+    rsGetElementAt_ushort4(rs_allocation a, uint32_t x) {
+    return ((ushort4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int __attribute__((overloadable))
+    rsGetElementAt_int(rs_allocation a, uint32_t x) {
+    return ((int *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int2 __attribute__((overloadable))
+    rsGetElementAt_int2(rs_allocation a, uint32_t x) {
+    return ((int2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int3 __attribute__((overloadable))
+    rsGetElementAt_int3(rs_allocation a, uint32_t x) {
+    return ((int3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int4 __attribute__((overloadable))
+    rsGetElementAt_int4(rs_allocation a, uint32_t x) {
+    return ((int4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint __attribute__((overloadable))
+    rsGetElementAt_uint(rs_allocation a, uint32_t x) {
+    return ((uint *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint2 __attribute__((overloadable))
+    rsGetElementAt_uint2(rs_allocation a, uint32_t x) {
+    return ((uint2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint3 __attribute__((overloadable))
+    rsGetElementAt_uint3(rs_allocation a, uint32_t x) {
+    return ((uint3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint4 __attribute__((overloadable))
+    rsGetElementAt_uint4(rs_allocation a, uint32_t x) {
+    return ((uint4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long __attribute__((overloadable))
+    rsGetElementAt_long(rs_allocation a, uint32_t x) {
+    return ((long *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long2 __attribute__((overloadable))
+    rsGetElementAt_long2(rs_allocation a, uint32_t x) {
+    return ((long2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long3 __attribute__((overloadable))
+    rsGetElementAt_long3(rs_allocation a, uint32_t x) {
+    return ((long3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long4 __attribute__((overloadable))
+    rsGetElementAt_long4(rs_allocation a, uint32_t x) {
+    return ((long4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong __attribute__((overloadable))
+    rsGetElementAt_ulong(rs_allocation a, uint32_t x) {
+    return ((ulong *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong2 __attribute__((overloadable))
+    rsGetElementAt_ulong2(rs_allocation a, uint32_t x) {
+    return ((ulong2 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong3 __attribute__((overloadable))
+    rsGetElementAt_ulong3(rs_allocation a, uint32_t x) {
+    return ((ulong3 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong4 __attribute__((overloadable))
+    rsGetElementAt_ulong4(rs_allocation a, uint32_t x) {
+    return ((ulong4 *)rsGetElementAt(a, x))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float __attribute__((overloadable))
+    rsGetElementAt_float(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((float *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float2 __attribute__((overloadable))
+    rsGetElementAt_float2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((float2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float3 __attribute__((overloadable))
+    rsGetElementAt_float3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((float3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float4 __attribute__((overloadable))
+    rsGetElementAt_float4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((float4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double __attribute__((overloadable))
+    rsGetElementAt_double(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((double *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double2 __attribute__((overloadable))
+    rsGetElementAt_double2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((double2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double3 __attribute__((overloadable))
+    rsGetElementAt_double3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((double3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double4 __attribute__((overloadable))
+    rsGetElementAt_double4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((double4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char __attribute__((overloadable))
+    rsGetElementAt_char(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((char *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char2 __attribute__((overloadable))
+    rsGetElementAt_char2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((char2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char3 __attribute__((overloadable))
+    rsGetElementAt_char3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((char3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char4 __attribute__((overloadable))
+    rsGetElementAt_char4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((char4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar __attribute__((overloadable))
+    rsGetElementAt_uchar(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((uchar *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar2 __attribute__((overloadable))
+    rsGetElementAt_uchar2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((uchar2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar3 __attribute__((overloadable))
+    rsGetElementAt_uchar3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((uchar3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar4 __attribute__((overloadable))
+    rsGetElementAt_uchar4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((uchar4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short __attribute__((overloadable))
+    rsGetElementAt_short(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((short *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short2 __attribute__((overloadable))
+    rsGetElementAt_short2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((short2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short3 __attribute__((overloadable))
+    rsGetElementAt_short3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((short3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short4 __attribute__((overloadable))
+    rsGetElementAt_short4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((short4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort __attribute__((overloadable))
+    rsGetElementAt_ushort(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((ushort *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort2 __attribute__((overloadable))
+    rsGetElementAt_ushort2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((ushort2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort3 __attribute__((overloadable))
+    rsGetElementAt_ushort3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((ushort3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort4 __attribute__((overloadable))
+    rsGetElementAt_ushort4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((ushort4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int __attribute__((overloadable))
+    rsGetElementAt_int(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((int *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int2 __attribute__((overloadable))
+    rsGetElementAt_int2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((int2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int3 __attribute__((overloadable))
+    rsGetElementAt_int3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((int3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int4 __attribute__((overloadable))
+    rsGetElementAt_int4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((int4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint __attribute__((overloadable))
+    rsGetElementAt_uint(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((uint *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint2 __attribute__((overloadable))
+    rsGetElementAt_uint2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((uint2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint3 __attribute__((overloadable))
+    rsGetElementAt_uint3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((uint3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint4 __attribute__((overloadable))
+    rsGetElementAt_uint4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((uint4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long __attribute__((overloadable))
+    rsGetElementAt_long(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((long *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long2 __attribute__((overloadable))
+    rsGetElementAt_long2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((long2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long3 __attribute__((overloadable))
+    rsGetElementAt_long3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((long3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long4 __attribute__((overloadable))
+    rsGetElementAt_long4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((long4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong __attribute__((overloadable))
+    rsGetElementAt_ulong(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((ulong *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong2 __attribute__((overloadable))
+    rsGetElementAt_ulong2(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((ulong2 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong3 __attribute__((overloadable))
+    rsGetElementAt_ulong3(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((ulong3 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong4 __attribute__((overloadable))
+    rsGetElementAt_ulong4(rs_allocation a, uint32_t x, uint32_t y) {
+    return ((ulong4 *)rsGetElementAt(a, x, y))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float __attribute__((overloadable))
+    rsGetElementAt_float(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((float *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float2 __attribute__((overloadable))
+    rsGetElementAt_float2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((float2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float3 __attribute__((overloadable))
+    rsGetElementAt_float3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((float3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline float4 __attribute__((overloadable))
+    rsGetElementAt_float4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((float4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double __attribute__((overloadable))
+    rsGetElementAt_double(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((double *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double2 __attribute__((overloadable))
+    rsGetElementAt_double2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((double2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double3 __attribute__((overloadable))
+    rsGetElementAt_double3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((double3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline double4 __attribute__((overloadable))
+    rsGetElementAt_double4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((double4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char __attribute__((overloadable))
+    rsGetElementAt_char(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((char *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char2 __attribute__((overloadable))
+    rsGetElementAt_char2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((char2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char3 __attribute__((overloadable))
+    rsGetElementAt_char3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((char3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline char4 __attribute__((overloadable))
+    rsGetElementAt_char4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((char4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar __attribute__((overloadable))
+    rsGetElementAt_uchar(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((uchar *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar2 __attribute__((overloadable))
+    rsGetElementAt_uchar2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((uchar2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar3 __attribute__((overloadable))
+    rsGetElementAt_uchar3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((uchar3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uchar4 __attribute__((overloadable))
+    rsGetElementAt_uchar4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((uchar4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short __attribute__((overloadable))
+    rsGetElementAt_short(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((short *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short2 __attribute__((overloadable))
+    rsGetElementAt_short2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((short2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short3 __attribute__((overloadable))
+    rsGetElementAt_short3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((short3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline short4 __attribute__((overloadable))
+    rsGetElementAt_short4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((short4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort __attribute__((overloadable))
+    rsGetElementAt_ushort(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((ushort *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort2 __attribute__((overloadable))
+    rsGetElementAt_ushort2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((ushort2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort3 __attribute__((overloadable))
+    rsGetElementAt_ushort3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((ushort3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ushort4 __attribute__((overloadable))
+    rsGetElementAt_ushort4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((ushort4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int __attribute__((overloadable))
+    rsGetElementAt_int(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((int *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int2 __attribute__((overloadable))
+    rsGetElementAt_int2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((int2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int3 __attribute__((overloadable))
+    rsGetElementAt_int3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((int3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline int4 __attribute__((overloadable))
+    rsGetElementAt_int4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((int4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint __attribute__((overloadable))
+    rsGetElementAt_uint(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((uint *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint2 __attribute__((overloadable))
+    rsGetElementAt_uint2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((uint2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint3 __attribute__((overloadable))
+    rsGetElementAt_uint3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((uint3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline uint4 __attribute__((overloadable))
+    rsGetElementAt_uint4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((uint4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long __attribute__((overloadable))
+    rsGetElementAt_long(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((long *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long2 __attribute__((overloadable))
+    rsGetElementAt_long2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((long2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long3 __attribute__((overloadable))
+    rsGetElementAt_long3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((long3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline long4 __attribute__((overloadable))
+    rsGetElementAt_long4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((long4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong __attribute__((overloadable))
+    rsGetElementAt_ulong(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((ulong *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong2 __attribute__((overloadable))
+    rsGetElementAt_ulong2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((ulong2 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong3 __attribute__((overloadable))
+    rsGetElementAt_ulong3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((ulong3 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 17)
+static inline ulong4 __attribute__((overloadable))
+    rsGetElementAt_ulong4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return ((ulong4 *)rsGetElementAt(a, x, y, z))[0];
+}
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((overloadable))
+    rsGetElementAt_float(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((overloadable))
+    rsGetElementAt_float2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float3 __attribute__((overloadable))
+    rsGetElementAt_float3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float4 __attribute__((overloadable))
+    rsGetElementAt_float4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double __attribute__((overloadable))
+    rsGetElementAt_double(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double2 __attribute__((overloadable))
+    rsGetElementAt_double2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double3 __attribute__((overloadable))
+    rsGetElementAt_double3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double4 __attribute__((overloadable))
+    rsGetElementAt_double4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char __attribute__((overloadable))
+    rsGetElementAt_char(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char2 __attribute__((overloadable))
+    rsGetElementAt_char2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char3 __attribute__((overloadable))
+    rsGetElementAt_char3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char4 __attribute__((overloadable))
+    rsGetElementAt_char4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar __attribute__((overloadable))
+    rsGetElementAt_uchar(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar2 __attribute__((overloadable))
+    rsGetElementAt_uchar2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar3 __attribute__((overloadable))
+    rsGetElementAt_uchar3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar4 __attribute__((overloadable))
+    rsGetElementAt_uchar4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short __attribute__((overloadable))
+    rsGetElementAt_short(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short2 __attribute__((overloadable))
+    rsGetElementAt_short2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short3 __attribute__((overloadable))
+    rsGetElementAt_short3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short4 __attribute__((overloadable))
+    rsGetElementAt_short4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort __attribute__((overloadable))
+    rsGetElementAt_ushort(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort2 __attribute__((overloadable))
+    rsGetElementAt_ushort2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort3 __attribute__((overloadable))
+    rsGetElementAt_ushort3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort4 __attribute__((overloadable))
+    rsGetElementAt_ushort4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int __attribute__((overloadable))
+    rsGetElementAt_int(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int2 __attribute__((overloadable))
+    rsGetElementAt_int2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int3 __attribute__((overloadable))
+    rsGetElementAt_int3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int4 __attribute__((overloadable))
+    rsGetElementAt_int4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint __attribute__((overloadable))
+    rsGetElementAt_uint(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint2 __attribute__((overloadable))
+    rsGetElementAt_uint2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint3 __attribute__((overloadable))
+    rsGetElementAt_uint3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint4 __attribute__((overloadable))
+    rsGetElementAt_uint4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long __attribute__((overloadable))
+    rsGetElementAt_long(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long2 __attribute__((overloadable))
+    rsGetElementAt_long2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long3 __attribute__((overloadable))
+    rsGetElementAt_long3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long4 __attribute__((overloadable))
+    rsGetElementAt_long4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong __attribute__((overloadable))
+    rsGetElementAt_ulong(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong2 __attribute__((overloadable))
+    rsGetElementAt_ulong2(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong3 __attribute__((overloadable))
+    rsGetElementAt_ulong3(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong4 __attribute__((overloadable))
+    rsGetElementAt_ulong4(rs_allocation a, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((overloadable))
+    rsGetElementAt_float(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((overloadable))
+    rsGetElementAt_float2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float3 __attribute__((overloadable))
+    rsGetElementAt_float3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float4 __attribute__((overloadable))
+    rsGetElementAt_float4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double __attribute__((overloadable))
+    rsGetElementAt_double(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double2 __attribute__((overloadable))
+    rsGetElementAt_double2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double3 __attribute__((overloadable))
+    rsGetElementAt_double3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double4 __attribute__((overloadable))
+    rsGetElementAt_double4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char __attribute__((overloadable))
+    rsGetElementAt_char(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char2 __attribute__((overloadable))
+    rsGetElementAt_char2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char3 __attribute__((overloadable))
+    rsGetElementAt_char3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char4 __attribute__((overloadable))
+    rsGetElementAt_char4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar __attribute__((overloadable))
+    rsGetElementAt_uchar(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar2 __attribute__((overloadable))
+    rsGetElementAt_uchar2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar3 __attribute__((overloadable))
+    rsGetElementAt_uchar3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar4 __attribute__((overloadable))
+    rsGetElementAt_uchar4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short __attribute__((overloadable))
+    rsGetElementAt_short(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short2 __attribute__((overloadable))
+    rsGetElementAt_short2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short3 __attribute__((overloadable))
+    rsGetElementAt_short3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short4 __attribute__((overloadable))
+    rsGetElementAt_short4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort __attribute__((overloadable))
+    rsGetElementAt_ushort(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort2 __attribute__((overloadable))
+    rsGetElementAt_ushort2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort3 __attribute__((overloadable))
+    rsGetElementAt_ushort3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort4 __attribute__((overloadable))
+    rsGetElementAt_ushort4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int __attribute__((overloadable))
+    rsGetElementAt_int(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int2 __attribute__((overloadable))
+    rsGetElementAt_int2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int3 __attribute__((overloadable))
+    rsGetElementAt_int3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int4 __attribute__((overloadable))
+    rsGetElementAt_int4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint __attribute__((overloadable))
+    rsGetElementAt_uint(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint2 __attribute__((overloadable))
+    rsGetElementAt_uint2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint3 __attribute__((overloadable))
+    rsGetElementAt_uint3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint4 __attribute__((overloadable))
+    rsGetElementAt_uint4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long __attribute__((overloadable))
+    rsGetElementAt_long(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long2 __attribute__((overloadable))
+    rsGetElementAt_long2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long3 __attribute__((overloadable))
+    rsGetElementAt_long3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long4 __attribute__((overloadable))
+    rsGetElementAt_long4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong __attribute__((overloadable))
+    rsGetElementAt_ulong(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong2 __attribute__((overloadable))
+    rsGetElementAt_ulong2(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong3 __attribute__((overloadable))
+    rsGetElementAt_ulong3(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong4 __attribute__((overloadable))
+    rsGetElementAt_ulong4(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((overloadable))
+    rsGetElementAt_float(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((overloadable))
+    rsGetElementAt_float2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float3 __attribute__((overloadable))
+    rsGetElementAt_float3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float4 __attribute__((overloadable))
+    rsGetElementAt_float4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double __attribute__((overloadable))
+    rsGetElementAt_double(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double2 __attribute__((overloadable))
+    rsGetElementAt_double2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double3 __attribute__((overloadable))
+    rsGetElementAt_double3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern double4 __attribute__((overloadable))
+    rsGetElementAt_double4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char __attribute__((overloadable))
+    rsGetElementAt_char(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char2 __attribute__((overloadable))
+    rsGetElementAt_char2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char3 __attribute__((overloadable))
+    rsGetElementAt_char3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern char4 __attribute__((overloadable))
+    rsGetElementAt_char4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar __attribute__((overloadable))
+    rsGetElementAt_uchar(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar2 __attribute__((overloadable))
+    rsGetElementAt_uchar2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar3 __attribute__((overloadable))
+    rsGetElementAt_uchar3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar4 __attribute__((overloadable))
+    rsGetElementAt_uchar4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short __attribute__((overloadable))
+    rsGetElementAt_short(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short2 __attribute__((overloadable))
+    rsGetElementAt_short2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short3 __attribute__((overloadable))
+    rsGetElementAt_short3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern short4 __attribute__((overloadable))
+    rsGetElementAt_short4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort __attribute__((overloadable))
+    rsGetElementAt_ushort(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort2 __attribute__((overloadable))
+    rsGetElementAt_ushort2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort3 __attribute__((overloadable))
+    rsGetElementAt_ushort3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ushort4 __attribute__((overloadable))
+    rsGetElementAt_ushort4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int __attribute__((overloadable))
+    rsGetElementAt_int(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int2 __attribute__((overloadable))
+    rsGetElementAt_int2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int3 __attribute__((overloadable))
+    rsGetElementAt_int3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern int4 __attribute__((overloadable))
+    rsGetElementAt_int4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint __attribute__((overloadable))
+    rsGetElementAt_uint(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint2 __attribute__((overloadable))
+    rsGetElementAt_uint2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint3 __attribute__((overloadable))
+    rsGetElementAt_uint3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uint4 __attribute__((overloadable))
+    rsGetElementAt_uint4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long __attribute__((overloadable))
+    rsGetElementAt_long(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long2 __attribute__((overloadable))
+    rsGetElementAt_long2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long3 __attribute__((overloadable))
+    rsGetElementAt_long3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern long4 __attribute__((overloadable))
+    rsGetElementAt_long4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong __attribute__((overloadable))
+    rsGetElementAt_ulong(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong2 __attribute__((overloadable))
+    rsGetElementAt_ulong2(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong3 __attribute__((overloadable))
+    rsGetElementAt_ulong3(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern ulong4 __attribute__((overloadable))
+    rsGetElementAt_ulong4(rs_allocation a, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+/*
+ * Extract a single element from an allocation.
+ *
+ * Coordinates are in the dimensions of the Y plane
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar __attribute__((overloadable))
+    rsGetElementAtYuv_uchar_U(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+/*
+ * Extract a single element from an allocation.
+ *
+ * Coordinates are in the dimensions of the Y plane
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar __attribute__((overloadable))
+    rsGetElementAtYuv_uchar_V(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+/*
+ * Extract a single element from an allocation.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern uchar __attribute__((overloadable))
+    rsGetElementAtYuv_uchar_Y(rs_allocation a, uint32_t x, uint32_t y);
+#endif
+
+/*
+ * Fetch allocation in a way described by the sampler
+ *
+ * If your allocation is 1D, use the variant with float for location.
+ * For 2D, use the float2 variant.
+ *
+ * Parameters:
+ *   a allocation to sample from
+ *   s sampler state
+ *   location location to sample from
+ *   lod mip level to sample from, for fractional values mip levels will be interpolated if RS_SAMPLER_LINEAR_MIP_LINEAR is used
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern float4 __attribute__((overloadable))
+    rsSample(rs_allocation a, rs_sampler s, float location);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern float4 __attribute__((overloadable))
+    rsSample(rs_allocation a, rs_sampler s, float location, float lod);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern float4 __attribute__((overloadable))
+    rsSample(rs_allocation a, rs_sampler s, float2 location);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern float4 __attribute__((overloadable))
+    rsSample(rs_allocation a, rs_sampler s, float2 location, float lod);
+#endif
+
+/*
+ * rsSetElementAt: Set an element
+ *
+ * Set single element of an allocation.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt(rs_allocation a, void* ptr, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt(rs_allocation a, void* ptr, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float(rs_allocation a, float val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float2(rs_allocation a, float2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float3(rs_allocation a, float3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float4(rs_allocation a, float4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double(rs_allocation a, double val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double2(rs_allocation a, double2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double3(rs_allocation a, double3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double4(rs_allocation a, double4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char(rs_allocation a, char val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char2(rs_allocation a, char2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char3(rs_allocation a, char3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char4(rs_allocation a, char4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar(rs_allocation a, uchar val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar2(rs_allocation a, uchar2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar3(rs_allocation a, uchar3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar4(rs_allocation a, uchar4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short(rs_allocation a, short val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short2(rs_allocation a, short2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short3(rs_allocation a, short3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short4(rs_allocation a, short4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort(rs_allocation a, ushort val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort2(rs_allocation a, ushort2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort3(rs_allocation a, ushort3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort4(rs_allocation a, ushort4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int(rs_allocation a, int val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int2(rs_allocation a, int2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int3(rs_allocation a, int3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int4(rs_allocation a, int4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint(rs_allocation a, uint val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint2(rs_allocation a, uint2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint3(rs_allocation a, uint3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint4(rs_allocation a, uint4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long(rs_allocation a, long val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long2(rs_allocation a, long2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long3(rs_allocation a, long3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long4(rs_allocation a, long4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong(rs_allocation a, ulong val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong2(rs_allocation a, ulong2 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong3(rs_allocation a, ulong3 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong4(rs_allocation a, ulong4 val, uint32_t x);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float(rs_allocation a, float val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float2(rs_allocation a, float2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float3(rs_allocation a, float3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float4(rs_allocation a, float4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double(rs_allocation a, double val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double2(rs_allocation a, double2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double3(rs_allocation a, double3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double4(rs_allocation a, double4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char(rs_allocation a, char val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char2(rs_allocation a, char2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char3(rs_allocation a, char3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char4(rs_allocation a, char4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar(rs_allocation a, uchar val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar2(rs_allocation a, uchar2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar3(rs_allocation a, uchar3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar4(rs_allocation a, uchar4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short(rs_allocation a, short val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short2(rs_allocation a, short2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short3(rs_allocation a, short3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short4(rs_allocation a, short4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort(rs_allocation a, ushort val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort2(rs_allocation a, ushort2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort3(rs_allocation a, ushort3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort4(rs_allocation a, ushort4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int(rs_allocation a, int val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int2(rs_allocation a, int2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int3(rs_allocation a, int3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int4(rs_allocation a, int4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint(rs_allocation a, uint val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint2(rs_allocation a, uint2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint3(rs_allocation a, uint3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint4(rs_allocation a, uint4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long(rs_allocation a, long val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long2(rs_allocation a, long2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long3(rs_allocation a, long3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long4(rs_allocation a, long4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong(rs_allocation a, ulong val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong2(rs_allocation a, ulong2 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong3(rs_allocation a, ulong3 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong4(rs_allocation a, ulong4 val, uint32_t x, uint32_t y);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float(rs_allocation a, float val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float2(rs_allocation a, float2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float3(rs_allocation a, float3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_float4(rs_allocation a, float4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double(rs_allocation a, double val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double2(rs_allocation a, double2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double3(rs_allocation a, double3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_double4(rs_allocation a, double4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char(rs_allocation a, char val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char2(rs_allocation a, char2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char3(rs_allocation a, char3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_char4(rs_allocation a, char4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar(rs_allocation a, uchar val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar2(rs_allocation a, uchar2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar3(rs_allocation a, uchar3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uchar4(rs_allocation a, uchar4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short(rs_allocation a, short val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short2(rs_allocation a, short2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short3(rs_allocation a, short3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_short4(rs_allocation a, short4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort(rs_allocation a, ushort val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort2(rs_allocation a, ushort2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort3(rs_allocation a, ushort3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ushort4(rs_allocation a, ushort4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int(rs_allocation a, int val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int2(rs_allocation a, int2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int3(rs_allocation a, int3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_int4(rs_allocation a, int4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint(rs_allocation a, uint val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint2(rs_allocation a, uint2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint3(rs_allocation a, uint3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_uint4(rs_allocation a, uint4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long(rs_allocation a, long val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long2(rs_allocation a, long2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long3(rs_allocation a, long3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_long4(rs_allocation a, long4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong(rs_allocation a, ulong val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong2(rs_allocation a, ulong2 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong3(rs_allocation a, ulong3 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern void __attribute__((overloadable))
+    rsSetElementAt_ulong4(rs_allocation a, ulong4 val, uint32_t x, uint32_t y, uint32_t z);
+#endif
+
+#endif // RENDERSCRIPT_RS_ALLOCATION_RSH
diff --git a/renderscript/include/rs_atomic.rsh b/renderscript/include/rs_atomic.rsh
index ba847cf..29c294a 100644
--- a/renderscript/include/rs_atomic.rsh
+++ b/renderscript/include/rs_atomic.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,248 +14,243 @@
  * limitations under the License.
  */
 
-/** @file rs_atomic.rsh
- *  \brief Atomic routines
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_atomic.rsh: Atomic routines
  *
+ * To update values shared between multiple threads, use the functions below.
+ * They ensure that the values are atomically updated, i.e. that the memory
+ * reads, the updates, and the memory writes are all done in the right order.
  *
+ * These functions are slower than just doing the non-atomic variants, so use
+ * them only when synchronization is needed.
+ *
+ * Note that in RenderScript, your code is likely to be running in separate
+ * threads even though you did not explicitely create them.  The RenderScript
+ * runtime will very often split the execution of one kernel across multiple
+ * threads.  Updating globals should be done with atomic functions.  If possible,
+ * modify your algorithm to avoid them altogether.
  */
+#ifndef RENDERSCRIPT_RS_ATOMIC_RSH
+#define RENDERSCRIPT_RS_ATOMIC_RSH
 
-#ifndef __RS_ATOMIC_RSH__
-#define __RS_ATOMIC_RSH__
-
+/*
+ * rsAtomicAdd: Thread-safe addition
+ *
+ * Atomicly adds a value to the value at addr, i.e. *addr += value.
+ *
+ * Parameters:
+ *   addr Address of the value to modify
+ *   value Amount to add
+ *
+ * Returns: Old value
+ */
 #if (defined(RS_VERSION) && (RS_VERSION >= 14))
-
-/**
- * Atomic add one to the value at addr.
- * Equal to rsAtomicAdd(addr, 1)
- *
- * @param addr Address of value to increment
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicInc(volatile int32_t* addr);
-
-/**
- * Atomic subtract one from the value at addr. Equal to rsAtomicSub(addr, 1)
- *
- * @param addr Address of value to decrement
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicDec(volatile int32_t* addr);
-
-/**
- * Atomic add a value to the value at addr.  addr[0] += value
- *
- * @param addr Address of value to modify
- * @param value Amount to add to the value at addr
- *
- * @return old value
- */
 extern int32_t __attribute__((overloadable))
     rsAtomicAdd(volatile int32_t* addr, int32_t value);
-
-/**
- * Atomic Subtract a value from the value at addr.  addr[0] -= value
- *
- * @param addr Address of value to modify
- * @param value Amount to subtract from the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicSub(volatile int32_t* addr, int32_t value);
-
-/**
- * Atomic Bitwise and a value from the value at addr.  addr[0] &= value
- *
- * @param addr Address of value to modify
- * @param value Amount to and with the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicAnd(volatile int32_t* addr, int32_t value);
-
-/**
- * Atomic Bitwise or a value from the value at addr.  addr[0] |= value
- *
- * @param addr Address of value to modify
- * @param value Amount to or with the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicOr(volatile int32_t* addr, int32_t value);
-
-/**
- * Atomic Bitwise xor a value from the value at addr.  addr[0] ^= value
- *
- * @param addr Address of value to modify
- * @param value Amount to xor with the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicXor(volatile int32_t* addr, int32_t value);
-
-/**
- * Atomic Set the value at addr to the min of addr and value
- * addr[0] = rsMin(addr[0], value)
- *
- * @param addr Address of value to modify
- * @param value comparison value
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicMin(volatile uint32_t* addr, uint32_t value);
-/**
- * Atomic Set the value at addr to the min of addr and value
- * addr[0] = rsMin(addr[0], value)
- *
- * @param addr Address of value to modify
- * @param value comparison value
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicMin(volatile int32_t* addr, int32_t value);
-
-/**
- * Atomic Set the value at addr to the max of addr and value
- * addr[0] = rsMax(addr[0], value)
- *
- * @param addr Address of value to modify
- * @param value comparison value
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicMax(volatile uint32_t* addr, uint32_t value);
-/**
- * Atomic Set the value at addr to the max of addr and value
- * addr[0] = rsMin(addr[0], value)
- *
- * @param addr Address of value to modify
- * @param value comparison value
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicMax(volatile int32_t* addr, int32_t value);
-
-/**
- * Compare-and-set operation with a full memory barrier.
- *
- * If the value at addr matches compareValue then newValue is written.
- *
- * @param addr The address to compare and replace if the compare passes.
- * @param compareValue The value to test addr[0] against.
- * @param newValue The value to write if the test passes.
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicCas(volatile int32_t* addr, int32_t compareValue, int32_t newValue);
-
-/**
- * Compare-and-set operation with a full memory barrier.
- *
- * If the value at addr matches compareValue then newValue is written.
- *
- * @param addr The address to compare and replace if the compare passes.
- * @param compareValue The value to test addr[0] against.
- * @param newValue The value to write if the test passes.
- *
- * @return old value
- */
-extern uint32_t __attribute__((overloadable))
-    rsAtomicCas(volatile uint32_t* addr, uint32_t compareValue, uint32_t newValue);
-
-#endif //defined(RS_VERSION) && (RS_VERSION >= 14)
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 20))   // TODO: api 21
-
-/**
- * Atomic add one to the value at addr.
- * Equal to rsAtomicAdd(addr, 1)
- *
- * @param addr Address of value to increment
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicInc(volatile uint32_t* addr);
-
-/**
- * Atomic subtract one from the value at addr. Equal to rsAtomicSub(addr, 1)
- *
- * @param addr Address of value to decrement
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicDec(volatile uint32_t* addr);
-
-/**
- * Atomic add a value to the value at addr.  addr[0] += value
- *
- * @param addr Address of value to modify
- * @param value Amount to add to the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicAdd(volatile uint32_t* addr, uint32_t value);
-
-/**
- * Atomic Subtract a value from the value at addr.  addr[0] -= value
- *
- * @param addr Address of value to modify
- * @param value Amount to subtract from the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicSub(volatile uint32_t* addr, uint32_t value);
-
-/**
- * Atomic Bitwise and a value from the value at addr.  addr[0] &= value
- *
- * @param addr Address of value to modify
- * @param value Amount to and with the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicAnd(volatile uint32_t* addr, uint32_t value);
-
-/**
- * Atomic Bitwise or a value from the value at addr.  addr[0] |= value
- *
- * @param addr Address of value to modify
- * @param value Amount to or with the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicOr(volatile uint32_t* addr, uint32_t value);
-
-/**
- * Atomic Bitwise xor a value from the value at addr.  addr[0] ^= value
- *
- * @param addr Address of value to modify
- * @param value Amount to xor with the value at addr
- *
- * @return old value
- */
-extern int32_t __attribute__((overloadable))
-    rsAtomicXor(volatile uint32_t* addr, uint32_t value);
-
-#endif //defined(RS_VERSION) && (RS_VERSION >= 21)
-
 #endif
 
+#if (defined(RS_VERSION) && (RS_VERSION >= 20))
+extern int32_t __attribute__((overloadable))
+    rsAtomicAdd(volatile uint32_t* addr, uint32_t value);
+#endif
+
+/*
+ * rsAtomicAnd: Thread-safe bitwise and
+ *
+ * Atomicly performs a bitwise and of two values, storing the result back at addr,
+ * i.e. *addr &= value
+ *
+ * Parameters:
+ *   addr Address of the value to modify
+ *   value Value to and with
+ *
+ * Returns: Old value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern int32_t __attribute__((overloadable))
+    rsAtomicAnd(volatile int32_t* addr, int32_t value);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 20))
+extern int32_t __attribute__((overloadable))
+    rsAtomicAnd(volatile uint32_t* addr, uint32_t value);
+#endif
+
+/*
+ * rsAtomicCas: Thread-safe compare and set
+ *
+ * If the value at addr matches compareValue then the newValue is written at addr,
+ * i.e. if (*addr == compareValue) { *addr = newValue; }
+ *
+ * You can check that the value was written by checking that the value returned
+ * by rsAtomicCas is compareValue.
+ *
+ * Parameters:
+ *   addr The address to compare and replace if the compare passes.
+ *   compareValue The value to test *addr against.
+ *   newValue The value to write if the test passes.
+ *
+ * Returns: Old value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern int32_t __attribute__((overloadable))
+    rsAtomicCas(volatile int32_t* addr, int32_t compareValue, int32_t newValue);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern uint32_t __attribute__((overloadable))
+    rsAtomicCas(volatile uint32_t* addr, uint32_t compareValue, uint32_t newValue);
+#endif
+
+/*
+ * rsAtomicDec: Thread-safe decrement
+ *
+ * Atomicly subtracts one from the value at addr.  Equal to rsAtomicSub(addr, 1)
+ *
+ * Parameters:
+ *   addr Address of the value to decrement
+ *
+ * Returns: Old value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern int32_t __attribute__((overloadable))
+    rsAtomicDec(volatile int32_t* addr);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 20))
+extern int32_t __attribute__((overloadable))
+    rsAtomicDec(volatile uint32_t* addr);
+#endif
+
+/*
+ * rsAtomicInc: Thread-safe increment
+ *
+ * Atomicly adds one to the value at addr.  Equal to rsAtomicAdd(addr, 1)
+ *
+ * Parameters:
+ *   addr Address of the value to increment
+ *
+ * Returns: Old value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern int32_t __attribute__((overloadable))
+    rsAtomicInc(volatile int32_t* addr);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 20))
+extern int32_t __attribute__((overloadable))
+    rsAtomicInc(volatile uint32_t* addr);
+#endif
+
+/*
+ * rsAtomicMax: Thread-safe maximum
+ *
+ * Atomicly sets the value at addr to the maximum of addr and value, i.e.
+ * *addr = max(*addr, value)
+ *
+ * Parameters:
+ *   addr Address of the value to modify
+ *   value Comparison value
+ *
+ * Returns: Old value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern uint32_t __attribute__((overloadable))
+    rsAtomicMax(volatile uint32_t* addr, uint32_t value);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern int32_t __attribute__((overloadable))
+    rsAtomicMax(volatile int32_t* addr, int32_t value);
+#endif
+
+/*
+ * rsAtomicMin: Thread-safe minimum
+ *
+ * Atomicly sets the value at addr to the minimum of addr and value, i.e.
+ * *addr = min(*addr, value)
+ *
+ * Parameters:
+ *   addr Address of the value to modify
+ *   value Comparison value
+ *
+ * Returns: Old value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern uint32_t __attribute__((overloadable))
+    rsAtomicMin(volatile uint32_t* addr, uint32_t value);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern int32_t __attribute__((overloadable))
+    rsAtomicMin(volatile int32_t* addr, int32_t value);
+#endif
+
+/*
+ * rsAtomicOr: Thread-safe bitwise or
+ *
+ * Atomicly perform a bitwise or two values, storing the result at addr,
+ * i.e. *addr |= value
+ *
+ * Parameters:
+ *   addr Address of the value to modify
+ *   value Value to or with
+ *
+ * Returns: Old value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern int32_t __attribute__((overloadable))
+    rsAtomicOr(volatile int32_t* addr, int32_t value);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 20))
+extern int32_t __attribute__((overloadable))
+    rsAtomicOr(volatile uint32_t* addr, uint32_t value);
+#endif
+
+/*
+ * rsAtomicSub: Thread-safe subtraction
+ *
+ * Atomicly subtracts a value from the value at addr, i.e. *addr -= value
+ *
+ * Parameters:
+ *   addr Address of the value to modify
+ *   value Amount to subtract
+ *
+ * Returns: Old value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern int32_t __attribute__((overloadable))
+    rsAtomicSub(volatile int32_t* addr, int32_t value);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 20))
+extern int32_t __attribute__((overloadable))
+    rsAtomicSub(volatile uint32_t* addr, uint32_t value);
+#endif
+
+/*
+ * rsAtomicXor: Thread-safe bitwise exclusive or
+ *
+ * Atomicly performs a bitwise xor of two values, storing the result at addr,
+ * i.e. *addr ^= value
+ *
+ * Parameters:
+ *   addr Address of the value to modify
+ *   value Value to xor with
+ *
+ * Returns: Old value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern int32_t __attribute__((overloadable))
+    rsAtomicXor(volatile int32_t* addr, int32_t value);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 20))
+extern int32_t __attribute__((overloadable))
+    rsAtomicXor(volatile uint32_t* addr, uint32_t value);
+#endif
+
+#endif // RENDERSCRIPT_RS_ATOMIC_RSH
diff --git a/renderscript/include/rs_core.rsh b/renderscript/include/rs_core.rsh
index 3489e44..c86d23e 100644
--- a/renderscript/include/rs_core.rsh
+++ b/renderscript/include/rs_core.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011-2012 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,39 +14,27 @@
  * limitations under the License.
  */
 
- /*! \mainpage notitle
-  *
-  * RenderScript is a high-performance runtime that provides
-  * compute operations at the native level. RenderScript code is compiled on devices
-  * at runtime to allow platform-independence as well.
-  * This reference documentation describes the RenderScript runtime APIs, which you
-  * can utilize to write RenderScript code in C99. The RenderScript compute header
-  * files are automatically included for you.
-  *
-  * To use RenderScript, you need to utilize the RenderScript runtime APIs documented here
-  * as well as the Android framework APIs for RenderScript.
-  * For documentation on the Android framework APIs, see the <a target="_parent" href=
-  * "http://developer.android.com/reference/android/renderscript/package-summary.html">
-  * android.renderscript</a> package reference.
-  * For more information on how to develop with RenderScript and how the runtime and
-  * Android framework APIs interact, see the <a target="_parent" href=
-  * "http://developer.android.com/guide/topics/renderscript/index.html">RenderScript
-  * developer guide</a> and the <a target="_parent" href=
-  * "http://developer.android.com/resources/samples/RenderScript/index.html">
-  * RenderScript samples</a>.
-  */
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
 
-/** @file rs_core.rsh
- *  \brief todo-jsams
+/*
+ * rs_core.rsh: TODO
  *
- *  todo-jsams
+ * RenderScript is a high-performance runtime that provides
+ * compute operations at the native level. RenderScript code is compiled on devices
+ * at runtime to allow platform-independence as well.
+ * This reference documentation describes the RenderScript runtime APIs, which you
+ * can utilize to write RenderScript code in C99. The RenderScript compute header
+ * files are automatically included for you.
  *
+ * To use RenderScript, you need to utilize the RenderScript runtime APIs documented here
+ * as well as the Android framework APIs for RenderScript.
+ * For documentation on the Android framework APIs, see the android.renderscript package reference.
+ * For more information on how to develop with RenderScript and how the runtime and
+ * Android framework APIs interact, see the RenderScript developer guide
+ * and the RenderScript samples.
  */
-
-#ifndef __RS_CORE_RSH__
-#define __RS_CORE_RSH__
-
-#define _RS_RUNTIME extern
+#ifndef RENDERSCRIPT_RS_CORE_RSH
+#define RENDERSCRIPT_RS_CORE_RSH
 
 #define RS_KERNEL __attribute__((kernel))
 
@@ -63,54 +51,41 @@
 #include "rs_sampler.rsh"
 #include "rs_time.rsh"
 
-/**
- * Send a message back to the client.  Will not block and returns true
- * if the message was sendable and false if the fifo was full.
- * A message ID is required.  Data payload is optional.
- */
-extern bool __attribute__((overloadable))
-    rsSendToClient(int cmdID);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsSendToClient(int cmdID, const void *data, uint len);
-/**
- * Send a message back to the client, blocking until the message is queued.
- * A message ID is required.  Data payload is optional.
- */
-extern void __attribute__((overloadable))
-    rsSendToClientBlocking(int cmdID);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSendToClientBlocking(int cmdID, const void *data, uint len);
-
-
-/**
+/*
+ * rs_for_each_strategy_t: Launch order hint for rsForEach calls
+ *
  * Launch order hint for rsForEach calls.  This provides a hint to the system to
  * determine in which order the root function of the target is called with each
  * cell of the allocation.
  *
  * This is a hint and implementations may not obey the order.
  */
-enum rs_for_each_strategy {
+typedef enum rs_for_each_strategy {
     RS_FOR_EACH_STRATEGY_SERIAL = 0,
     RS_FOR_EACH_STRATEGY_DONT_CARE = 1,
     RS_FOR_EACH_STRATEGY_DST_LINEAR = 2,
-    RS_FOR_EACH_STRATEGY_TILE_SMALL= 3,
+    RS_FOR_EACH_STRATEGY_TILE_SMALL = 3,
     RS_FOR_EACH_STRATEGY_TILE_MEDIUM = 4,
     RS_FOR_EACH_STRATEGY_TILE_LARGE = 5
-};
+} rs_for_each_strategy_t;
 
+/*
+ * rs_kernel_context: Opaque handle to RenderScript kernel invocation context
+ *
+ * TODO
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+typedef const struct rs_kernel_context_t * rs_kernel_context;
+#endif
 
-/**
+/*
+ * rs_script_call_t: Provides extra information to a rsForEach call
+ *
  * Structure to provide extra information to a rsForEach call.  Primarly used to
  * restrict the call to a subset of cells in the allocation.
  */
 typedef struct rs_script_call {
-    enum rs_for_each_strategy strategy;
+    rs_for_each_strategy_t strategy;
     uint32_t xStart;
     uint32_t xEnd;
     uint32_t yStart;
@@ -121,71 +96,291 @@
     uint32_t arrayEnd;
 } rs_script_call_t;
 
-/**
+/*
  * Make a script to script call to launch work. One of the input or output is
  * required to be a valid object. The input and output must be of the same
  * dimensions.
- * API 10-13
  *
- * @param script The target script to call
- * @param input The allocation to source data from
- * @param output the allocation to write date into
- * @param usrData The user definied params to pass to the root script.  May be
- *                NULL.
- * @param sc Extra control infomation used to select a sub-region of the
- *           allocation to be processed or suggest a walking strategy.  May be
- *           NULL.
- *
- *  */
-#if !defined(RS_VERSION) || (RS_VERSION < 14)
-extern void __attribute__((overloadable))
-    rsForEach(rs_script script, rs_allocation input,
-              rs_allocation output, const void * usrData,
-              const rs_script_call_t *sc);
-/**
- * \overload
+ * Parameters:
+ *   script The target script to call
+ *   input The allocation to source data from
+ *   output the allocation to write date into
+ *   usrData The user defined params to pass to the root script.  May be NULL.
+ *   sc Extra control infomation used to select a sub-region of the allocation to be processed or suggest a walking strategy.  May be NULL.
+ *   usrDataLen The size of the userData structure.  This will be used to perform a shallow copy of the data if necessary.
  */
+#if !defined(RS_VERSION) || (RS_VERSION <= 13)
 extern void __attribute__((overloadable))
-    rsForEach(rs_script script, rs_allocation input,
-              rs_allocation output, const void * usrData);
-#else
+    rsForEach(rs_script script, rs_allocation input, rs_allocation output, const void* usrData,
+              const rs_script_call_t* sc);
+#endif
 
-/**
- * Make a script to script call to launch work. One of the input or output is
- * required to be a valid object. The input and output must be of the same
- * dimensions.
- * API 14+
- *
- * @param script The target script to call
- * @param input The allocation to source data from
- * @param output the allocation to write date into
- * @param usrData The user definied params to pass to the root script.  May be
- *                NULL.
- * @param usrDataLen The size of the userData structure.  This will be used to
- *                   perform a shallow copy of the data if necessary.
- * @param sc Extra control infomation used to select a sub-region of the
- *           allocation to be processed or suggest a walking strategy.  May be
- *           NULL.
- *
- */
+#if !defined(RS_VERSION) || (RS_VERSION <= 13)
 extern void __attribute__((overloadable))
-    rsForEach(rs_script script, rs_allocation input, rs_allocation output,
-              const void * usrData, size_t usrDataLen, const rs_script_call_t *);
-/**
- * \overload
- */
+    rsForEach(rs_script script, rs_allocation input, rs_allocation output, const void* usrData);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14) && (RS_VERSION <= 20))
 extern void __attribute__((overloadable))
-    rsForEach(rs_script script, rs_allocation input, rs_allocation output,
-              const void * usrData, size_t usrDataLen);
-/**
- * \overload
- */
+    rsForEach(rs_script script, rs_allocation input, rs_allocation output, const void* usrData,
+              size_t usrDataLen, const rs_script_call_t* sc);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14) && (RS_VERSION <= 20))
+extern void __attribute__((overloadable))
+    rsForEach(rs_script script, rs_allocation input, rs_allocation output, const void* usrData,
+              size_t usrDataLen);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
 extern void __attribute__((overloadable))
     rsForEach(rs_script script, rs_allocation input, rs_allocation output);
 #endif
 
+/*
+ * Send a message back to the client.  Will not block and returns true
+ * if the message was sendable and false if the fifo was full.
+ * A message ID is required.  Data payload is optional.
+ */
+extern bool __attribute__((overloadable))
+    rsSendToClient(int cmdID);
 
+extern bool __attribute__((overloadable))
+    rsSendToClient(int cmdID, const void* data, uint len);
 
-#undef _RS_RUNTIME
+/*
+ * Send a message back to the client, blocking until the message is queued.
+ * A message ID is required.  Data payload is optional.
+ */
+extern void __attribute__((overloadable))
+    rsSendToClientBlocking(int cmdID);
 
+extern void __attribute__((overloadable))
+    rsSendToClientBlocking(int cmdID, const void* data, uint len);
+
+/*
+ * rsGetArray0: Index in the Array0 dimension for the specified context
+ *
+ * Returns the index in the Array0 dimension of the cell being processed,
+ * as specified by the supplied context.
+ *
+ * This context is created when a kernel is launched and updated at each
+ * iteration.  It contains common characteristics of the allocations being
+ * iterated over and rarely used indexes, like the Array0 index.
+ *
+ * You can access the context by adding a rs_kernel_context argument to your
+ * kernel function.  E.g.
+ * short RS_KERNEL myKernel(short value, uint32_t x, rs_kernel_context context) {
+ *   // The current index in the common x, y, z, w dimensions are accessed by
+ *   // adding these variables as arguments.  For the more rarely used indexes
+ *   // to the other dimensions, extract them from the context:
+ *   uint32_t index_a0 = rsGetArray0(context);
+ *   //...
+ * }
+ *
+ * This function returns 0 if the Array0 dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetArray0(rs_kernel_context ctxt);
 #endif
+
+/*
+ * rsGetArray1: Index in the Array1 dimension for the specified context
+ *
+ * Returns the index in the Array1 dimension of the cell being processed,
+ * as specified by the supplied context.  See rsGetArray0() for an explanation
+ * of the context.
+ *
+ * Returns 0 if the Array1 dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetArray1(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetArray2: Index in the Array2 dimension for the specified context
+ *
+ * Returns the index in the Array2 dimension of the cell being processed,
+ * as specified by the supplied context.  See rsGetArray0() for an explanation
+ * of the context.
+ *
+ * Returns 0 if the Array2 dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetArray2(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetArray3: Index in the Array3 dimension for the specified context
+ *
+ * Returns the index in the Array3 dimension of the cell being processed,
+ * as specified by the supplied context.  See rsGetArray0() for an explanation
+ * of the context.
+ *
+ * Returns 0 if the Array3 dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetArray3(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetDimArray0: Size of the Array0 dimension for the specified context
+ *
+ * Returns the size of the Array0 dimension for the specified context.
+ * See rsGetDimX() for an explanation of the context.
+ *
+ * Returns 0 if the Array0 dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetDimArray0(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetDimArray1: Size of the Array1 dimension for the specified context
+ *
+ * Returns the size of the Array1 dimension for the specified context.
+ * See rsGetDimX() for an explanation of the context.
+ *
+ * Returns 0 if the Array1 dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetDimArray1(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetDimArray2: Size of the Array2 dimension for the specified context
+ *
+ * Returns the size of the Array2 dimension for the specified context.
+ * See rsGetDimX() for an explanation of the context.
+ *
+ * Returns 0 if the Array2 dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetDimArray2(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetDimArray3: Size of the Array3 dimension for the specified context
+ *
+ * Returns the size of the Array3 dimension for the specified context.
+ * See rsGetDimX() for an explanation of the context.
+ *
+ * Returns 0 if the Array3 dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetDimArray3(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetDimHasFaces: Presence of more than one face for the specified context
+ *
+ * If the context refers to a cubemap, this function returns true if there's
+ * more than one face present.  In all other cases, it returns false.
+ * See rsGetDimX() for an explanation of the context.
+ *
+ * rsAllocationGetDimFaces() is similar but returns 0 or 1 instead of a bool.
+ *
+ * Returns: Returns true if more than one face is present, false otherwise.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern bool __attribute__((overloadable))
+    rsGetDimHasFaces(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetDimLod: Number of levels of detail for the specified context
+ *
+ * Returns the number of levels of detail for the specified context.
+ * This is useful for mipmaps.  See rsGetDimX() for an explanation of the context.
+ * Returns 0 if Level of Detail is not used.
+ *
+ * rsAllocationGetDimLOD() is similar but returns 0 or 1 instead the actual
+ * number of levels.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetDimLod(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetDimX: Size of the X dimension for the specified context
+ *
+ * Returns the size of the X dimension for the specified context.
+ *
+ * This context is created when a kernel is launched.  It contains common
+ * characteristics of the allocations being iterated over by the kernel in
+ * a very efficient structure.  It also contains rarely used indexes.
+ *
+ * You can access it by adding a rs_kernel_context argument to your kernel
+ * function.  E.g.
+ * int4 RS_KERNEL myKernel(int4 value, rs_kernel_context context) {
+ *   uint32_t size = rsGetDimX(context); //...
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetDimX(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetDimY: Size of the Y dimension for the specified context
+ *
+ * Returns the size of the X dimension for the specified context.
+ * See rsGetDimX() for an explanation of the context.
+ *
+ * Returns 0 if the Y dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetDimY(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetDimZ: Size of the Z dimension for the specified context
+ *
+ * Returns the size of the Z dimension for the specified context.
+ * See rsGetDimX() for an explanation of the context.
+ *
+ * Returns 0 if the Z dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetDimZ(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetFace: Coordinate of the Face for the specified context
+ *
+ * Returns the face on which the cell being processed is found, as specified
+ * by the supplied context.  See rsGetArray0() for an explanation of the context.
+ *
+ * Returns RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X if the face dimension is not
+ * present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern rs_allocation_cubemap_face __attribute__((overloadable))
+    rsGetFace(rs_kernel_context ctxt);
+#endif
+
+/*
+ * rsGetLod: Index in the Levels of Detail dimension for the specified context.
+ *
+ * Returns the index in the Levels of Detail dimension of the cell being
+ * processed, as specified by the supplied context.  See rsGetArray0() for
+ * an explanation of the context.
+ *
+ * Returns 0 if the Levels of Detail dimension is not present.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 23))
+extern uint32_t __attribute__((overloadable))
+    rsGetLod(rs_kernel_context ctxt);
+#endif
+
+#endif // RENDERSCRIPT_RS_CORE_RSH
diff --git a/renderscript/include/rs_core_math.rsh b/renderscript/include/rs_core_math.rsh
index 287a1b9..afa3638 100644
--- a/renderscript/include/rs_core_math.rsh
+++ b/renderscript/include/rs_core_math.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,9873 +16,5418 @@
 
 // Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
 
-#ifndef __rs_core_math_rsh__
-#define __rs_core_math_rsh__
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the absolute value of a value.
+ * rs_core_math.rsh: Mathematical functions
  *
- * Supported by API versions 9 and newer.
- */
-extern uchar __attribute__((const, overloadable))abs(char value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
+ * Most mathematical functions can be applied to scalars and vectors.
+ * When applied to vectors, a vector of the function applied to each entry
+ * of the input is returned.
  *
- * Supported by API versions 9 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))abs(char2 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
+ * For example:
  *
- * Supported by API versions 9 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))abs(char3 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
+ * float3 a, b;
+ * // The following call sets
+ * //   a.x to sin(b.x),
+ * //   a.y to sin(b.y), and
+ * //   a.z to sin(b.z).
+ * a = sin(b);
  *
- * Supported by API versions 9 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))abs(char4 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
  *
- * Supported by API versions 9 and newer.
- */
-extern ushort __attribute__((const, overloadable))abs(short value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
+ * A few functions like distance() and length() interpret instead the input
+ * as a single vector in n-dimensional space.
  *
- * Supported by API versions 9 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))abs(short2 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
+ * The precision of the mathematical operations is affected by the pragmas
+ * rs_fp_relaxed and rs_fp_full.
  *
- * Supported by API versions 9 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))abs(short3 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
+ * Different precision/speed tradeoffs can be achieved by using three variants
+ * of common math functions.  Functions with a name starting with
+ * native_ may have custom hardware implementations with weaker precision,
+ * half_ may perform internal computations using 16 bit floats, and
+ * fast_ are n-dimensional space computations that may use 16 bit floats.
  *
- * Supported by API versions 9 and newer.
  */
-extern ushort4 __attribute__((const, overloadable))abs(short4 value);
-#endif
+#ifndef RENDERSCRIPT_RS_CORE_MATH_RSH
+#define RENDERSCRIPT_RS_CORE_MATH_RSH
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the absolute value of a value.
+ * abs: Absolute value of an integer
  *
- * Supported by API versions 9 and newer.
- */
-extern uint __attribute__((const, overloadable))abs(int value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
+ * Returns the absolute value of an integer.
  *
- * Supported by API versions 9 and newer.
+ * For floats, use fabs().
  */
-extern uint2 __attribute__((const, overloadable))abs(int2 value);
-#endif
+extern uchar __attribute__((const, overloadable))
+    abs(char v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern uint3 __attribute__((const, overloadable))abs(int3 value);
-#endif
+extern uchar2 __attribute__((const, overloadable))
+    abs(char2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern uint4 __attribute__((const, overloadable))abs(int4 value);
-#endif
+extern uchar3 __attribute__((const, overloadable))
+    abs(char3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acos
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))acos(float v);
-#endif
+extern uchar4 __attribute__((const, overloadable))
+    abs(char4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acos
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))acos(float2 v);
-#endif
+extern ushort __attribute__((const, overloadable))
+    abs(short v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acos
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))acos(float3 v);
-#endif
+extern ushort2 __attribute__((const, overloadable))
+    abs(short2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acos
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))acos(float4 v);
-#endif
+extern ushort3 __attribute__((const, overloadable))
+    abs(short3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acosh
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))acosh(float);
-#endif
+extern ushort4 __attribute__((const, overloadable))
+    abs(short4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acosh
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))acosh(float2);
-#endif
+extern uint __attribute__((const, overloadable))
+    abs(int v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acosh
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))acosh(float3);
-#endif
+extern uint2 __attribute__((const, overloadable))
+    abs(int2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acosh
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))acosh(float4);
-#endif
+extern uint3 __attribute__((const, overloadable))
+    abs(int3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acospi
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))acospi(float v);
-#endif
+extern uint4 __attribute__((const, overloadable))
+    abs(int4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * acospi
+ * acos: Inverse cosine
  *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))acospi(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acospi
+ * Returns the inverse cosine, in radians.
  *
- * Supported by API versions 9 and newer.
+ * See also native_acos().
  */
-extern float3 __attribute__((const, overloadable))acospi(float3 v);
-#endif
+extern float __attribute__((const, overloadable))
+    acos(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * acospi
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))acospi(float4 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    acos(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * asin
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))asin(float v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    acos(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * asin
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))asin(float2 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    acos(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * asin
+ * acosh: Inverse hyperbolic cosine
  *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))asin(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * asin
+ * Returns the inverse hyperbolic cosine, in radians.
  *
- * Supported by API versions 9 and newer.
+ * See also native_acosh().
  */
-extern float4 __attribute__((const, overloadable))asin(float4 v);
-#endif
+extern float __attribute__((const, overloadable))
+    acosh(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * asinh
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))asinh(float);
-#endif
+extern float2 __attribute__((const, overloadable))
+    acosh(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * asinh
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))asinh(float2);
-#endif
+extern float3 __attribute__((const, overloadable))
+    acosh(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * asinh
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))asinh(float3);
-#endif
+extern float4 __attribute__((const, overloadable))
+    acosh(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * asinh
+ * acospi: Inverse cosine divided by pi
  *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))asinh(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse sine divided by PI.
+ * Returns the inverse cosine in radians, divided by pi.
  *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))asinpi(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse sine divided by PI.
+ * To get an inverse cosine measured in degrees, use acospi(a) * 180.f.
  *
- * Supported by API versions 9 and newer.
+ * See also native_acospi().
  */
-extern float2 __attribute__((const, overloadable))asinpi(float2 v);
-#endif
+extern float __attribute__((const, overloadable))
+    acospi(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse sine divided by PI.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))asinpi(float3 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    acospi(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse sine divided by PI.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))asinpi(float4 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    acospi(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))atan(float v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    acospi(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the inverse tangent.
+ * asin: Inverse sine
  *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))atan(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent.
+ * Returns the inverse sine, in radians.
  *
- * Supported by API versions 9 and newer.
+ * See also native_asin().
  */
-extern float3 __attribute__((const, overloadable))atan(float3 v);
-#endif
+extern float __attribute__((const, overloadable))
+    asin(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))atan(float4 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    asin(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    asin(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    asin(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the inverse tangent of y / x.
+ * asinh: Inverse hyperbolic sine
  *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))atan2(float y, float x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent of y / x.
+ * Returns the inverse hyperbolic sine, in radians.
  *
- * Supported by API versions 9 and newer.
+ * See also native_asinh().
  */
-extern float2 __attribute__((const, overloadable))atan2(float2 y, float2 x);
-#endif
+extern float __attribute__((const, overloadable))
+    asinh(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent of y / x.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))atan2(float3 y, float3 x);
-#endif
+extern float2 __attribute__((const, overloadable))
+    asinh(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent of y / x.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))atan2(float4 y, float4 x);
-#endif
+extern float3 __attribute__((const, overloadable))
+    asinh(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent of y / x, divided by PI.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))atan2pi(float y, float x);
-#endif
+extern float4 __attribute__((const, overloadable))
+    asinh(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the inverse tangent of y / x, divided by PI.
+ * asinpi: Inverse sine divided by pi
  *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))atan2pi(float2 y, float2 x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent of y / x, divided by PI.
+ * Returns the inverse sine in radians, divided by pi.
  *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))atan2pi(float3 y, float3 x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent of y / x, divided by PI.
+ * To get an inverse sine measured in degrees, use asinpi(a) * 180.f.
  *
- * Supported by API versions 9 and newer.
+ * See also native_asinpi().
  */
-extern float4 __attribute__((const, overloadable))atan2pi(float4 y, float4 x);
-#endif
+extern float __attribute__((const, overloadable))
+    asinpi(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse hyperbolic tangent.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))atanh(float v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    asinpi(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse hyperbolic tangent.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))atanh(float2 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    asinpi(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse hyperbolic tangent.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))atanh(float3 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    asinpi(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the inverse hyperbolic tangent.
+ * atan: Inverse tangent
  *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))atanh(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent divided by PI.
+ * Returns the inverse tangent, in radians.
  *
- * Supported by API versions 9 and newer.
+ * See also native_atan().
  */
-extern float __attribute__((const, overloadable))atanpi(float v);
-#endif
+extern float __attribute__((const, overloadable))
+    atan(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent divided by PI.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))atanpi(float2 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    atan(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent divided by PI.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))atanpi(float3 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    atan(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the inverse tangent divided by PI.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))atanpi(float4 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    atan(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the cube root.
+ * atan2: Inverse tangent of a ratio
  *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))cbrt(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the cube root.
+ * Returns the inverse tangent of (numerator / denominator), in radians.
  *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))cbrt(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the cube root.
+ * See also native_atan2().
  *
- * Supported by API versions 9 and newer.
+ * Parameters:
+ *   numerator The numerator
+ *   denominator The denominator.  Can be 0.
  */
-extern float3 __attribute__((const, overloadable))cbrt(float3);
-#endif
+extern float __attribute__((const, overloadable))
+    atan2(float numerator, float denominator);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the cube root.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))cbrt(float4);
-#endif
+extern float2 __attribute__((const, overloadable))
+    atan2(float2 numerator, float2 denominator);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the smallest integer not less than a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))ceil(float);
-#endif
+extern float3 __attribute__((const, overloadable))
+    atan2(float3 numerator, float3 denominator);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the smallest integer not less than a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))ceil(float2);
-#endif
+extern float4 __attribute__((const, overloadable))
+    atan2(float4 numerator, float4 denominator);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the smallest integer not less than a value.
+ * atan2pi: Inverse tangent of a ratio, divided by pi
  *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))ceil(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the smallest integer not less than a value.
+ * Returns the inverse tangent of (numerator / denominator), in radians, divided by pi.
  *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))ceil(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Clamp a value to a specified high and low bound.
+ * To get an inverse tangent measured in degrees, use atan2pi(n, d) * 180.f.
  *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
+ * See also native_atan2pi().
  *
- * Supported by API versions 9 and newer.
+ * Parameters:
+ *   numerator The numerator
+ *   denominator The denominator.  Can be 0.
  */
-extern float __attribute__((const, overloadable))clamp(float value, float min_value, float max_value);
-#endif
+extern float __attribute__((const, overloadable))
+    atan2pi(float numerator, float denominator);
+
+extern float2 __attribute__((const, overloadable))
+    atan2pi(float2 numerator, float2 denominator);
+
+extern float3 __attribute__((const, overloadable))
+    atan2pi(float3 numerator, float3 denominator);
+
+extern float4 __attribute__((const, overloadable))
+    atan2pi(float4 numerator, float4 denominator);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Clamp a value to a specified high and low bound.
+ * atanh: Inverse hyperbolic tangent
  *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
+ * Returns the inverse hyperbolic tangent, in radians.
  *
- * Supported by API versions 9 and newer.
+ * See also native_atanh().
  */
-extern float2 __attribute__((const, overloadable))clamp(float2 value, float2 min_value, float2 max_value);
-#endif
+extern float __attribute__((const, overloadable))
+    atanh(float v);
+
+extern float2 __attribute__((const, overloadable))
+    atanh(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    atanh(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    atanh(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Clamp a value to a specified high and low bound.
+ * atanpi: Inverse tangent divided by pi
  *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
+ * Returns the inverse tangent in radians, divided by pi.
+ *
+ * To get an inverse tangent measured in degrees, use atanpi(a) * 180.f.
  *
- * Supported by API versions 9 and newer.
+ * See also native_atanpi().
  */
-extern float3 __attribute__((const, overloadable))clamp(float3 value, float3 min_value, float3 max_value);
-#endif
+extern float __attribute__((const, overloadable))
+    atanpi(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
+extern float2 __attribute__((const, overloadable))
+    atanpi(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    atanpi(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    atanpi(float4 v);
+
 /*
- * Clamp a value to a specified high and low bound.
+ * cbrt: Cube root
  *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
+ * Returns the cube root.
  *
- * Supported by API versions 9 and newer.
+ * See also native_cbrt().
  */
-extern float4 __attribute__((const, overloadable))clamp(float4 value, float4 min_value, float4 max_value);
-#endif
+extern float __attribute__((const, overloadable))
+    cbrt(float v);
+
+extern float2 __attribute__((const, overloadable))
+    cbrt(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    cbrt(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    cbrt(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Clamp a value to a specified high and low bound.
+ * ceil: Smallest integer not less than a value
+ *
+ * Returns the smallest integer not less than a value.
  *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
+ * For example, ceil(1.2f) returns 2.f, and ceil(-1.2f) returns -1.f.
  *
- * Supported by API versions 9 and newer.
+ * See also floor().
  */
-extern float2 __attribute__((const, overloadable))clamp(float2 value, float min_value, float max_value);
-#endif
+extern float __attribute__((const, overloadable))
+    ceil(float v);
+
+extern float2 __attribute__((const, overloadable))
+    ceil(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    ceil(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    ceil(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Clamp a value to a specified high and low bound.
+ * clamp: Restrain a value to a range
  *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
+ * Clamps a value to a specified high and low bound.  clamp() returns min_value
+ * if value < min_value, max_value if value > max_value, otherwise value.
  *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))clamp(float3 value, float min_value, float max_value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Clamp a value to a specified high and low bound.
+ * There are two variants of clamp: one where the min and max are scalars applied
+ * to all entries of the value, the other where the min and max are also vectors.
  *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
+ * If min_value is greater than max_value, the results are undefined.
  *
- * Supported by API versions 9 and newer.
+ * Parameters:
+ *   value Value to be clamped.
+ *   min_value Lower bound, a scalar or matching vector.
+ *   max_value High bound, must match the type of low.
  */
-extern float4 __attribute__((const, overloadable))clamp(float4 value, float min_value, float max_value);
+extern float __attribute__((const, overloadable))
+    clamp(float value, float min_value, float max_value);
+
+extern float2 __attribute__((const, overloadable))
+    clamp(float2 value, float2 min_value, float2 max_value);
+
+extern float3 __attribute__((const, overloadable))
+    clamp(float3 value, float3 min_value, float3 max_value);
+
+extern float4 __attribute__((const, overloadable))
+    clamp(float4 value, float4 min_value, float4 max_value);
+
+extern float2 __attribute__((const, overloadable))
+    clamp(float2 value, float min_value, float max_value);
+
+extern float3 __attribute__((const, overloadable))
+    clamp(float3 value, float min_value, float max_value);
+
+extern float4 __attribute__((const, overloadable))
+    clamp(float4 value, float min_value, float max_value);
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 19))
+extern char __attribute__((const, overloadable))
+    clamp(char value, char min_value, char max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern char __attribute__((const, overloadable))clamp(char value, char min_value, char max_value);
+extern char2 __attribute__((const, overloadable))
+    clamp(char2 value, char2 min_value, char2 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern char2 __attribute__((const, overloadable))clamp(char2 value, char2 min_value, char2 max_value);
+extern char3 __attribute__((const, overloadable))
+    clamp(char3 value, char3 min_value, char3 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern char3 __attribute__((const, overloadable))clamp(char3 value, char3 min_value, char3 max_value);
+extern char4 __attribute__((const, overloadable))
+    clamp(char4 value, char4 min_value, char4 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern char4 __attribute__((const, overloadable))clamp(char4 value, char4 min_value, char4 max_value);
+extern uchar __attribute__((const, overloadable))
+    clamp(uchar value, uchar min_value, uchar max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uchar __attribute__((const, overloadable))clamp(uchar value, uchar min_value, uchar max_value);
+extern uchar2 __attribute__((const, overloadable))
+    clamp(uchar2 value, uchar2 min_value, uchar2 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))clamp(uchar2 value, uchar2 min_value, uchar2 max_value);
+extern uchar3 __attribute__((const, overloadable))
+    clamp(uchar3 value, uchar3 min_value, uchar3 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))clamp(uchar3 value, uchar3 min_value, uchar3 max_value);
+extern uchar4 __attribute__((const, overloadable))
+    clamp(uchar4 value, uchar4 min_value, uchar4 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))clamp(uchar4 value, uchar4 min_value, uchar4 max_value);
+extern short __attribute__((const, overloadable))
+    clamp(short value, short min_value, short max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern short __attribute__((const, overloadable))clamp(short value, short min_value, short max_value);
+extern short2 __attribute__((const, overloadable))
+    clamp(short2 value, short2 min_value, short2 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern short2 __attribute__((const, overloadable))clamp(short2 value, short2 min_value, short2 max_value);
+extern short3 __attribute__((const, overloadable))
+    clamp(short3 value, short3 min_value, short3 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern short3 __attribute__((const, overloadable))clamp(short3 value, short3 min_value, short3 max_value);
+extern short4 __attribute__((const, overloadable))
+    clamp(short4 value, short4 min_value, short4 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern short4 __attribute__((const, overloadable))clamp(short4 value, short4 min_value, short4 max_value);
+extern ushort __attribute__((const, overloadable))
+    clamp(ushort value, ushort min_value, ushort max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ushort __attribute__((const, overloadable))clamp(ushort value, ushort min_value, ushort max_value);
+extern ushort2 __attribute__((const, overloadable))
+    clamp(ushort2 value, ushort2 min_value, ushort2 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))clamp(ushort2 value, ushort2 min_value, ushort2 max_value);
+extern ushort3 __attribute__((const, overloadable))
+    clamp(ushort3 value, ushort3 min_value, ushort3 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))clamp(ushort3 value, ushort3 min_value, ushort3 max_value);
+extern ushort4 __attribute__((const, overloadable))
+    clamp(ushort4 value, ushort4 min_value, ushort4 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))clamp(ushort4 value, ushort4 min_value, ushort4 max_value);
+extern int __attribute__((const, overloadable))
+    clamp(int value, int min_value, int max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern int __attribute__((const, overloadable))clamp(int value, int min_value, int max_value);
+extern int2 __attribute__((const, overloadable))
+    clamp(int2 value, int2 min_value, int2 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern int2 __attribute__((const, overloadable))clamp(int2 value, int2 min_value, int2 max_value);
+extern int3 __attribute__((const, overloadable))
+    clamp(int3 value, int3 min_value, int3 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern int3 __attribute__((const, overloadable))clamp(int3 value, int3 min_value, int3 max_value);
+extern int4 __attribute__((const, overloadable))
+    clamp(int4 value, int4 min_value, int4 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern int4 __attribute__((const, overloadable))clamp(int4 value, int4 min_value, int4 max_value);
+extern uint __attribute__((const, overloadable))
+    clamp(uint value, uint min_value, uint max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uint __attribute__((const, overloadable))clamp(uint value, uint min_value, uint max_value);
+extern uint2 __attribute__((const, overloadable))
+    clamp(uint2 value, uint2 min_value, uint2 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uint2 __attribute__((const, overloadable))clamp(uint2 value, uint2 min_value, uint2 max_value);
+extern uint3 __attribute__((const, overloadable))
+    clamp(uint3 value, uint3 min_value, uint3 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uint3 __attribute__((const, overloadable))clamp(uint3 value, uint3 min_value, uint3 max_value);
+extern uint4 __attribute__((const, overloadable))
+    clamp(uint4 value, uint4 min_value, uint4 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uint4 __attribute__((const, overloadable))clamp(uint4 value, uint4 min_value, uint4 max_value);
+extern long __attribute__((const, overloadable))
+    clamp(long value, long min_value, long max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern long __attribute__((const, overloadable))clamp(long value, long min_value, long max_value);
+extern long2 __attribute__((const, overloadable))
+    clamp(long2 value, long2 min_value, long2 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern long2 __attribute__((const, overloadable))clamp(long2 value, long2 min_value, long2 max_value);
+extern long3 __attribute__((const, overloadable))
+    clamp(long3 value, long3 min_value, long3 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern long3 __attribute__((const, overloadable))clamp(long3 value, long3 min_value, long3 max_value);
+extern long4 __attribute__((const, overloadable))
+    clamp(long4 value, long4 min_value, long4 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern long4 __attribute__((const, overloadable))clamp(long4 value, long4 min_value, long4 max_value);
+extern ulong __attribute__((const, overloadable))
+    clamp(ulong value, ulong min_value, ulong max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ulong __attribute__((const, overloadable))clamp(ulong value, ulong min_value, ulong max_value);
+extern ulong2 __attribute__((const, overloadable))
+    clamp(ulong2 value, ulong2 min_value, ulong2 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))clamp(ulong2 value, ulong2 min_value, ulong2 max_value);
+extern ulong3 __attribute__((const, overloadable))
+    clamp(ulong3 value, ulong3 min_value, ulong3 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))clamp(ulong3 value, ulong3 min_value, ulong3 max_value);
+extern ulong4 __attribute__((const, overloadable))
+    clamp(ulong4 value, ulong4 min_value, ulong4 max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))clamp(ulong4 value, ulong4 min_value, ulong4 max_value);
+extern char2 __attribute__((const, overloadable))
+    clamp(char2 value, char min_value, char max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern char2 __attribute__((const, overloadable))clamp(char2 value, char min_value, char max_value);
+extern char3 __attribute__((const, overloadable))
+    clamp(char3 value, char min_value, char max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern char3 __attribute__((const, overloadable))clamp(char3 value, char min_value, char max_value);
+extern char4 __attribute__((const, overloadable))
+    clamp(char4 value, char min_value, char max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern char4 __attribute__((const, overloadable))clamp(char4 value, char min_value, char max_value);
+extern uchar2 __attribute__((const, overloadable))
+    clamp(uchar2 value, uchar min_value, uchar max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))clamp(uchar2 value, uchar min_value, uchar max_value);
+extern uchar3 __attribute__((const, overloadable))
+    clamp(uchar3 value, uchar min_value, uchar max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))clamp(uchar3 value, uchar min_value, uchar max_value);
+extern uchar4 __attribute__((const, overloadable))
+    clamp(uchar4 value, uchar min_value, uchar max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))clamp(uchar4 value, uchar min_value, uchar max_value);
+extern short2 __attribute__((const, overloadable))
+    clamp(short2 value, short min_value, short max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern short2 __attribute__((const, overloadable))clamp(short2 value, short min_value, short max_value);
+extern short3 __attribute__((const, overloadable))
+    clamp(short3 value, short min_value, short max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern short3 __attribute__((const, overloadable))clamp(short3 value, short min_value, short max_value);
+extern short4 __attribute__((const, overloadable))
+    clamp(short4 value, short min_value, short max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern short4 __attribute__((const, overloadable))clamp(short4 value, short min_value, short max_value);
+extern ushort2 __attribute__((const, overloadable))
+    clamp(ushort2 value, ushort min_value, ushort max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))clamp(ushort2 value, ushort min_value, ushort max_value);
+extern ushort3 __attribute__((const, overloadable))
+    clamp(ushort3 value, ushort min_value, ushort max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))clamp(ushort3 value, ushort min_value, ushort max_value);
+extern ushort4 __attribute__((const, overloadable))
+    clamp(ushort4 value, ushort min_value, ushort max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))clamp(ushort4 value, ushort min_value, ushort max_value);
+extern int2 __attribute__((const, overloadable))
+    clamp(int2 value, int min_value, int max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern int2 __attribute__((const, overloadable))clamp(int2 value, int min_value, int max_value);
+extern int3 __attribute__((const, overloadable))
+    clamp(int3 value, int min_value, int max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern int3 __attribute__((const, overloadable))clamp(int3 value, int min_value, int max_value);
+extern int4 __attribute__((const, overloadable))
+    clamp(int4 value, int min_value, int max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern int4 __attribute__((const, overloadable))clamp(int4 value, int min_value, int max_value);
+extern uint2 __attribute__((const, overloadable))
+    clamp(uint2 value, uint min_value, uint max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uint2 __attribute__((const, overloadable))clamp(uint2 value, uint min_value, uint max_value);
+extern uint3 __attribute__((const, overloadable))
+    clamp(uint3 value, uint min_value, uint max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uint3 __attribute__((const, overloadable))clamp(uint3 value, uint min_value, uint max_value);
+extern uint4 __attribute__((const, overloadable))
+    clamp(uint4 value, uint min_value, uint max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern uint4 __attribute__((const, overloadable))clamp(uint4 value, uint min_value, uint max_value);
+extern long2 __attribute__((const, overloadable))
+    clamp(long2 value, long min_value, long max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern long2 __attribute__((const, overloadable))clamp(long2 value, long min_value, long max_value);
+extern long3 __attribute__((const, overloadable))
+    clamp(long3 value, long min_value, long max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern long3 __attribute__((const, overloadable))clamp(long3 value, long min_value, long max_value);
+extern long4 __attribute__((const, overloadable))
+    clamp(long4 value, long min_value, long max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern long4 __attribute__((const, overloadable))clamp(long4 value, long min_value, long max_value);
+extern ulong2 __attribute__((const, overloadable))
+    clamp(ulong2 value, ulong min_value, ulong max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))clamp(ulong2 value, ulong min_value, ulong max_value);
+extern ulong3 __attribute__((const, overloadable))
+    clamp(ulong3 value, ulong min_value, ulong max_value);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 19))
-/*
- * Clamp a value to a specified high and low bound.
- *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
- *
- * Supported by API versions 19 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))clamp(ulong3 value, ulong min_value, ulong max_value);
+extern ulong4 __attribute__((const, overloadable))
+    clamp(ulong4 value, ulong min_value, ulong max_value);
 #endif
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 19))
 /*
- * Clamp a value to a specified high and low bound.
+ * clz: Number of leading 0 bits
  *
- * @param amount value to be clamped.  Supports 1,2,3,4 components
- * @param min_value Lower bound, must be scalar or matching vector.
- * @param max_value High bound, must match type of low
+ * Returns the number of leading 0-bits in a value.
  *
- * Supported by API versions 19 and newer.
+ * For example, clz((char)0x03) returns 6.
  */
-extern ulong4 __attribute__((const, overloadable))clamp(ulong4 value, ulong min_value, ulong max_value);
-#endif
+extern char __attribute__((const, overloadable))
+    clz(char value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern char __attribute__((const, overloadable))clz(char value);
-#endif
+extern char2 __attribute__((const, overloadable))
+    clz(char2 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern char2 __attribute__((const, overloadable))clz(char2 value);
-#endif
+extern char3 __attribute__((const, overloadable))
+    clz(char3 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern char3 __attribute__((const, overloadable))clz(char3 value);
-#endif
+extern char4 __attribute__((const, overloadable))
+    clz(char4 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern char4 __attribute__((const, overloadable))clz(char4 value);
-#endif
+extern uchar __attribute__((const, overloadable))
+    clz(uchar value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar __attribute__((const, overloadable))clz(uchar value);
-#endif
+extern uchar2 __attribute__((const, overloadable))
+    clz(uchar2 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))clz(uchar2 value);
-#endif
+extern uchar3 __attribute__((const, overloadable))
+    clz(uchar3 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))clz(uchar3 value);
-#endif
+extern uchar4 __attribute__((const, overloadable))
+    clz(uchar4 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))clz(uchar4 value);
-#endif
+extern short __attribute__((const, overloadable))
+    clz(short value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern short __attribute__((const, overloadable))clz(short value);
-#endif
+extern short2 __attribute__((const, overloadable))
+    clz(short2 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern short2 __attribute__((const, overloadable))clz(short2 value);
-#endif
+extern short3 __attribute__((const, overloadable))
+    clz(short3 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern short3 __attribute__((const, overloadable))clz(short3 value);
-#endif
+extern short4 __attribute__((const, overloadable))
+    clz(short4 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern short4 __attribute__((const, overloadable))clz(short4 value);
-#endif
+extern ushort __attribute__((const, overloadable))
+    clz(ushort value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort __attribute__((const, overloadable))clz(ushort value);
-#endif
+extern ushort2 __attribute__((const, overloadable))
+    clz(ushort2 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))clz(ushort2 value);
-#endif
+extern ushort3 __attribute__((const, overloadable))
+    clz(ushort3 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))clz(ushort3 value);
-#endif
+extern ushort4 __attribute__((const, overloadable))
+    clz(ushort4 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))clz(ushort4 value);
-#endif
+extern int __attribute__((const, overloadable))
+    clz(int value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern int __attribute__((const, overloadable))clz(int value);
-#endif
+extern int2 __attribute__((const, overloadable))
+    clz(int2 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern int2 __attribute__((const, overloadable))clz(int2 value);
-#endif
+extern int3 __attribute__((const, overloadable))
+    clz(int3 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern int3 __attribute__((const, overloadable))clz(int3 value);
-#endif
+extern int4 __attribute__((const, overloadable))
+    clz(int4 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern int4 __attribute__((const, overloadable))clz(int4 value);
-#endif
+extern uint __attribute__((const, overloadable))
+    clz(uint value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern uint __attribute__((const, overloadable))clz(uint value);
-#endif
+extern uint2 __attribute__((const, overloadable))
+    clz(uint2 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern uint2 __attribute__((const, overloadable))clz(uint2 value);
-#endif
+extern uint3 __attribute__((const, overloadable))
+    clz(uint3 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the number of leading 0-bits in a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern uint3 __attribute__((const, overloadable))clz(uint3 value);
-#endif
+extern uint4 __attribute__((const, overloadable))
+    clz(uint4 value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the number of leading 0-bits in a value.
+ * convert: Converts numerical vectors
  *
- * Supported by API versions 9 and newer.
- */
-extern uint4 __attribute__((const, overloadable))clz(uint4 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float2 to float2
+ * Component wise conversion from a numerical type to another.
  *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float3 to float3
+ * Conversions of floating point values to integer will truncate.
  *
- * Supported by API versions 9 and newer.
+ * Conversions of numbers too large to fit the destination type yield undefined results.
+ * For example, converting a float that contains 1.0e18 to a short is undefined.
+ * Use clamp() to avoid this.
  */
-extern float3 __attribute__((const, overloadable))convert_float3(float3 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    convert_float2(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float4 to float4
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(float4 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    convert_float3(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char2 to float2
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(char2 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    convert_float4(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char3 to float3
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))convert_float3(char3 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    convert_float2(char2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char4 to float4
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(char4 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    convert_float3(char3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar2 to float2
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(uchar2 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    convert_float4(char4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar3 to float3
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))convert_float3(uchar3 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    convert_float2(uchar2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar4 to float4
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(uchar4 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    convert_float3(uchar3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short2 to float2
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(short2 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    convert_float4(uchar4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short3 to float3
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))convert_float3(short3 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    convert_float2(short2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short4 to float4
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(short4 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    convert_float3(short3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort2 to float2
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(ushort2 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    convert_float4(short4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort3 to float3
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))convert_float3(ushort3 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    convert_float2(ushort2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort4 to float4
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(ushort4 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    convert_float3(ushort3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int2 to float2
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(int2 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    convert_float4(ushort4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int3 to float3
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))convert_float3(int3 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    convert_float2(int2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int4 to float4
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(int4 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    convert_float3(int3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint2 to float2
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(uint2 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    convert_float4(int4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint3 to float3
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))convert_float3(uint3 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    convert_float2(uint2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint4 to float4
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(uint4 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    convert_float3(uint3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float2 to char2
- *
- * Supported by API versions 9 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(float2 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    convert_float4(uint4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float3 to char3
- *
- * Supported by API versions 9 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(float3 v);
-#endif
+extern char2 __attribute__((const, overloadable))
+    convert_char2(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float4 to char4
- *
- * Supported by API versions 9 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(float4 v);
-#endif
+extern char3 __attribute__((const, overloadable))
+    convert_char3(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char2 to char2
- *
- * Supported by API versions 9 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(char2 v);
-#endif
+extern char4 __attribute__((const, overloadable))
+    convert_char4(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char3 to char3
- *
- * Supported by API versions 9 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(char3 v);
-#endif
+extern char2 __attribute__((const, overloadable))
+    convert_char2(char2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char4 to char4
- *
- * Supported by API versions 9 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(char4 v);
-#endif
+extern char3 __attribute__((const, overloadable))
+    convert_char3(char3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar2 to char2
- *
- * Supported by API versions 9 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(uchar2 v);
-#endif
+extern char4 __attribute__((const, overloadable))
+    convert_char4(char4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar3 to char3
- *
- * Supported by API versions 9 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(uchar3 v);
-#endif
+extern char2 __attribute__((const, overloadable))
+    convert_char2(uchar2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar4 to char4
- *
- * Supported by API versions 9 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(uchar4 v);
-#endif
+extern char3 __attribute__((const, overloadable))
+    convert_char3(uchar3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short2 to char2
- *
- * Supported by API versions 9 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(short2 v);
-#endif
+extern char4 __attribute__((const, overloadable))
+    convert_char4(uchar4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short3 to char3
- *
- * Supported by API versions 9 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(short3 v);
-#endif
+extern char2 __attribute__((const, overloadable))
+    convert_char2(short2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short4 to char4
- *
- * Supported by API versions 9 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(short4 v);
-#endif
+extern char3 __attribute__((const, overloadable))
+    convert_char3(short3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort2 to char2
- *
- * Supported by API versions 9 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(ushort2 v);
-#endif
+extern char4 __attribute__((const, overloadable))
+    convert_char4(short4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort3 to char3
- *
- * Supported by API versions 9 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(ushort3 v);
-#endif
+extern char2 __attribute__((const, overloadable))
+    convert_char2(ushort2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort4 to char4
- *
- * Supported by API versions 9 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(ushort4 v);
-#endif
+extern char3 __attribute__((const, overloadable))
+    convert_char3(ushort3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int2 to char2
- *
- * Supported by API versions 9 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(int2 v);
-#endif
+extern char4 __attribute__((const, overloadable))
+    convert_char4(ushort4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int3 to char3
- *
- * Supported by API versions 9 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(int3 v);
-#endif
+extern char2 __attribute__((const, overloadable))
+    convert_char2(int2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int4 to char4
- *
- * Supported by API versions 9 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(int4 v);
-#endif
+extern char3 __attribute__((const, overloadable))
+    convert_char3(int3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint2 to char2
- *
- * Supported by API versions 9 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(uint2 v);
-#endif
+extern char4 __attribute__((const, overloadable))
+    convert_char4(int4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint3 to char3
- *
- * Supported by API versions 9 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(uint3 v);
-#endif
+extern char2 __attribute__((const, overloadable))
+    convert_char2(uint2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint4 to char4
- *
- * Supported by API versions 9 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(uint4 v);
-#endif
+extern char3 __attribute__((const, overloadable))
+    convert_char3(uint3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float2 to uchar2
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(float2 v);
-#endif
+extern char4 __attribute__((const, overloadable))
+    convert_char4(uint4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float3 to uchar3
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(float3 v);
-#endif
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float4 to uchar4
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(float4 v);
-#endif
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char2 to uchar2
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(char2 v);
-#endif
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char3 to uchar3
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(char3 v);
-#endif
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(char2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char4 to uchar4
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(char4 v);
-#endif
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(char3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar2 to uchar2
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(uchar2 v);
-#endif
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(char4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar3 to uchar3
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(uchar3 v);
-#endif
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(uchar2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar4 to uchar4
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(uchar4 v);
-#endif
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(uchar3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short2 to uchar2
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(short2 v);
-#endif
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(uchar4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short3 to uchar3
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(short3 v);
-#endif
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(short2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short4 to uchar4
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(short4 v);
-#endif
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(short3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort2 to uchar2
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(ushort2 v);
-#endif
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(short4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort3 to uchar3
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(ushort3 v);
-#endif
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(ushort2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort4 to uchar4
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(ushort4 v);
-#endif
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(ushort3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int2 to uchar2
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(int2 v);
-#endif
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(ushort4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int3 to uchar3
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(int3 v);
-#endif
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(int2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int4 to uchar4
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(int4 v);
-#endif
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(int3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint2 to uchar2
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(uint2 v);
-#endif
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(int4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint3 to uchar3
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(uint3 v);
-#endif
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(uint2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint4 to uchar4
- *
- * Supported by API versions 9 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(uint4 v);
-#endif
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(uint3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float2 to short2
- *
- * Supported by API versions 9 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(float2 v);
-#endif
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(uint4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float3 to short3
- *
- * Supported by API versions 9 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(float3 v);
-#endif
+extern short2 __attribute__((const, overloadable))
+    convert_short2(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float4 to short4
- *
- * Supported by API versions 9 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(float4 v);
-#endif
+extern short3 __attribute__((const, overloadable))
+    convert_short3(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char2 to short2
- *
- * Supported by API versions 9 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(char2 v);
-#endif
+extern short4 __attribute__((const, overloadable))
+    convert_short4(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char3 to short3
- *
- * Supported by API versions 9 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(char3 v);
-#endif
+extern short2 __attribute__((const, overloadable))
+    convert_short2(char2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char4 to short4
- *
- * Supported by API versions 9 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(char4 v);
-#endif
+extern short3 __attribute__((const, overloadable))
+    convert_short3(char3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar2 to short2
- *
- * Supported by API versions 9 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(uchar2 v);
-#endif
+extern short4 __attribute__((const, overloadable))
+    convert_short4(char4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar3 to short3
- *
- * Supported by API versions 9 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(uchar3 v);
-#endif
+extern short2 __attribute__((const, overloadable))
+    convert_short2(uchar2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar4 to short4
- *
- * Supported by API versions 9 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(uchar4 v);
-#endif
+extern short3 __attribute__((const, overloadable))
+    convert_short3(uchar3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short2 to short2
- *
- * Supported by API versions 9 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(short2 v);
-#endif
+extern short4 __attribute__((const, overloadable))
+    convert_short4(uchar4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short3 to short3
- *
- * Supported by API versions 9 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(short3 v);
-#endif
+extern short2 __attribute__((const, overloadable))
+    convert_short2(short2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short4 to short4
- *
- * Supported by API versions 9 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(short4 v);
-#endif
+extern short3 __attribute__((const, overloadable))
+    convert_short3(short3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort2 to short2
- *
- * Supported by API versions 9 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(ushort2 v);
-#endif
+extern short4 __attribute__((const, overloadable))
+    convert_short4(short4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort3 to short3
- *
- * Supported by API versions 9 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(ushort3 v);
-#endif
+extern short2 __attribute__((const, overloadable))
+    convert_short2(ushort2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort4 to short4
- *
- * Supported by API versions 9 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(ushort4 v);
-#endif
+extern short3 __attribute__((const, overloadable))
+    convert_short3(ushort3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int2 to short2
- *
- * Supported by API versions 9 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(int2 v);
-#endif
+extern short4 __attribute__((const, overloadable))
+    convert_short4(ushort4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int3 to short3
- *
- * Supported by API versions 9 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(int3 v);
-#endif
+extern short2 __attribute__((const, overloadable))
+    convert_short2(int2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int4 to short4
- *
- * Supported by API versions 9 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(int4 v);
-#endif
+extern short3 __attribute__((const, overloadable))
+    convert_short3(int3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint2 to short2
- *
- * Supported by API versions 9 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(uint2 v);
-#endif
+extern short4 __attribute__((const, overloadable))
+    convert_short4(int4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint3 to short3
- *
- * Supported by API versions 9 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(uint3 v);
-#endif
+extern short2 __attribute__((const, overloadable))
+    convert_short2(uint2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint4 to short4
- *
- * Supported by API versions 9 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(uint4 v);
-#endif
+extern short3 __attribute__((const, overloadable))
+    convert_short3(uint3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float2 to ushort2
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(float2 v);
-#endif
+extern short4 __attribute__((const, overloadable))
+    convert_short4(uint4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float3 to ushort3
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(float3 v);
-#endif
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float4 to ushort4
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(float4 v);
-#endif
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char2 to ushort2
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(char2 v);
-#endif
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char3 to ushort3
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(char3 v);
-#endif
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(char2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char4 to ushort4
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(char4 v);
-#endif
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(char3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar2 to ushort2
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(uchar2 v);
-#endif
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(char4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar3 to ushort3
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(uchar3 v);
-#endif
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(uchar2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar4 to ushort4
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(uchar4 v);
-#endif
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(uchar3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short2 to ushort2
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(short2 v);
-#endif
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(uchar4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short3 to ushort3
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(short3 v);
-#endif
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(short2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short4 to ushort4
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(short4 v);
-#endif
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(short3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort2 to ushort2
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(ushort2 v);
-#endif
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(short4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort3 to ushort3
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(ushort3 v);
-#endif
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(ushort2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort4 to ushort4
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(ushort4 v);
-#endif
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(ushort3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int2 to ushort2
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(int2 v);
-#endif
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(ushort4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int3 to ushort3
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(int3 v);
-#endif
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(int2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int4 to ushort4
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(int4 v);
-#endif
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(int3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint2 to ushort2
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(uint2 v);
-#endif
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(int4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint3 to ushort3
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(uint3 v);
-#endif
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(uint2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint4 to ushort4
- *
- * Supported by API versions 9 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(uint4 v);
-#endif
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(uint3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float2 to int2
- *
- * Supported by API versions 9 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(float2 v);
-#endif
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(uint4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float3 to int3
- *
- * Supported by API versions 9 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(float3 v);
-#endif
+extern int2 __attribute__((const, overloadable))
+    convert_int2(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float4 to int4
- *
- * Supported by API versions 9 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(float4 v);
-#endif
+extern int3 __attribute__((const, overloadable))
+    convert_int3(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char2 to int2
- *
- * Supported by API versions 9 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(char2 v);
-#endif
+extern int4 __attribute__((const, overloadable))
+    convert_int4(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char3 to int3
- *
- * Supported by API versions 9 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(char3 v);
-#endif
+extern int2 __attribute__((const, overloadable))
+    convert_int2(char2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char4 to int4
- *
- * Supported by API versions 9 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(char4 v);
-#endif
+extern int3 __attribute__((const, overloadable))
+    convert_int3(char3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar2 to int2
- *
- * Supported by API versions 9 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(uchar2 v);
-#endif
+extern int4 __attribute__((const, overloadable))
+    convert_int4(char4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar3 to int3
- *
- * Supported by API versions 9 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(uchar3 v);
-#endif
+extern int2 __attribute__((const, overloadable))
+    convert_int2(uchar2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar4 to int4
- *
- * Supported by API versions 9 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(uchar4 v);
-#endif
+extern int3 __attribute__((const, overloadable))
+    convert_int3(uchar3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short2 to int2
- *
- * Supported by API versions 9 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(short2 v);
-#endif
+extern int4 __attribute__((const, overloadable))
+    convert_int4(uchar4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short3 to int3
- *
- * Supported by API versions 9 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(short3 v);
-#endif
+extern int2 __attribute__((const, overloadable))
+    convert_int2(short2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short4 to int4
- *
- * Supported by API versions 9 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(short4 v);
-#endif
+extern int3 __attribute__((const, overloadable))
+    convert_int3(short3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort2 to int2
- *
- * Supported by API versions 9 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(ushort2 v);
-#endif
+extern int4 __attribute__((const, overloadable))
+    convert_int4(short4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort3 to int3
- *
- * Supported by API versions 9 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(ushort3 v);
-#endif
+extern int2 __attribute__((const, overloadable))
+    convert_int2(ushort2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort4 to int4
- *
- * Supported by API versions 9 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(ushort4 v);
-#endif
+extern int3 __attribute__((const, overloadable))
+    convert_int3(ushort3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int2 to int2
- *
- * Supported by API versions 9 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(int2 v);
-#endif
+extern int4 __attribute__((const, overloadable))
+    convert_int4(ushort4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int3 to int3
- *
- * Supported by API versions 9 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(int3 v);
-#endif
+extern int2 __attribute__((const, overloadable))
+    convert_int2(int2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int4 to int4
- *
- * Supported by API versions 9 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(int4 v);
-#endif
+extern int3 __attribute__((const, overloadable))
+    convert_int3(int3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint2 to int2
- *
- * Supported by API versions 9 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(uint2 v);
-#endif
+extern int4 __attribute__((const, overloadable))
+    convert_int4(int4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint3 to int3
- *
- * Supported by API versions 9 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(uint3 v);
-#endif
+extern int2 __attribute__((const, overloadable))
+    convert_int2(uint2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint4 to int4
- *
- * Supported by API versions 9 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(uint4 v);
-#endif
+extern int3 __attribute__((const, overloadable))
+    convert_int3(uint3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float2 to uint2
- *
- * Supported by API versions 9 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(float2 v);
-#endif
+extern int4 __attribute__((const, overloadable))
+    convert_int4(uint4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float3 to uint3
- *
- * Supported by API versions 9 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(float3 v);
-#endif
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from float4 to uint4
- *
- * Supported by API versions 9 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(float4 v);
-#endif
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char2 to uint2
- *
- * Supported by API versions 9 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(char2 v);
-#endif
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char3 to uint3
- *
- * Supported by API versions 9 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(char3 v);
-#endif
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(char2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from char4 to uint4
- *
- * Supported by API versions 9 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(char4 v);
-#endif
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(char3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar2 to uint2
- *
- * Supported by API versions 9 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(uchar2 v);
-#endif
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(char4 v);
+
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(uchar2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar3 to uint3
- *
- * Supported by API versions 9 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(uchar3 v);
-#endif
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(uchar3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uchar4 to uint4
- *
- * Supported by API versions 9 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(uchar4 v);
-#endif
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(uchar4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short2 to uint2
- *
- * Supported by API versions 9 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(short2 v);
-#endif
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(short2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short3 to uint3
- *
- * Supported by API versions 9 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(short3 v);
-#endif
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(short3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from short4 to uint4
- *
- * Supported by API versions 9 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(short4 v);
-#endif
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(short4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort2 to uint2
- *
- * Supported by API versions 9 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(ushort2 v);
-#endif
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(ushort2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort3 to uint3
- *
- * Supported by API versions 9 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(ushort3 v);
-#endif
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(ushort3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from ushort4 to uint4
- *
- * Supported by API versions 9 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(ushort4 v);
-#endif
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(ushort4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int2 to uint2
- *
- * Supported by API versions 9 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(int2 v);
-#endif
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(int2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int3 to uint3
- *
- * Supported by API versions 9 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(int3 v);
-#endif
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(int3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from int4 to uint4
- *
- * Supported by API versions 9 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(int4 v);
-#endif
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(int4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint2 to uint2
- *
- * Supported by API versions 9 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(uint2 v);
-#endif
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(uint2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint3 to uint3
- *
- * Supported by API versions 9 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(uint3 v);
-#endif
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(uint3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Component wise conversion from uint4 to uint4
- *
- * Supported by API versions 9 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(uint4 v);
-#endif
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(uint4 v);
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(double2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(double3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(double4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(long2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(long3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(long4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(ulong2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(ulong3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(ulong4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(double2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(double3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(double4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(long2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(long3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(long4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(ulong2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(ulong3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(ulong4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(double2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(double3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(double4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(long2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(long3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(long4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(ulong2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(ulong3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(ulong4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to float2
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(double2 v);
+extern float2 __attribute__((const, overloadable))
+    convert_float2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to float3
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))convert_float3(double3 v);
+extern float3 __attribute__((const, overloadable))
+    convert_float3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to float4
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(double4 v);
+extern float4 __attribute__((const, overloadable))
+    convert_float4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to float2
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(long2 v);
+extern float2 __attribute__((const, overloadable))
+    convert_float2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to float3
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))convert_float3(long3 v);
+extern float3 __attribute__((const, overloadable))
+    convert_float3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to float4
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(long4 v);
+extern float4 __attribute__((const, overloadable))
+    convert_float4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to float2
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))convert_float2(ulong2 v);
+extern float2 __attribute__((const, overloadable))
+    convert_float2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to float3
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))convert_float3(ulong3 v);
+extern float3 __attribute__((const, overloadable))
+    convert_float3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to float4
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))convert_float4(ulong4 v);
+extern float4 __attribute__((const, overloadable))
+    convert_float4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to char2
- *
- * Supported by API versions 21 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(double2 v);
+extern char2 __attribute__((const, overloadable))
+    convert_char2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to char3
- *
- * Supported by API versions 21 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(double3 v);
+extern char3 __attribute__((const, overloadable))
+    convert_char3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to char4
- *
- * Supported by API versions 21 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(double4 v);
+extern char4 __attribute__((const, overloadable))
+    convert_char4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to char2
- *
- * Supported by API versions 21 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(long2 v);
+extern char2 __attribute__((const, overloadable))
+    convert_char2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to char3
- *
- * Supported by API versions 21 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(long3 v);
+extern char3 __attribute__((const, overloadable))
+    convert_char3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to char4
- *
- * Supported by API versions 21 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(long4 v);
+extern char4 __attribute__((const, overloadable))
+    convert_char4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to char2
- *
- * Supported by API versions 21 and newer.
- */
-extern char2 __attribute__((const, overloadable))convert_char2(ulong2 v);
+extern char2 __attribute__((const, overloadable))
+    convert_char2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to char3
- *
- * Supported by API versions 21 and newer.
- */
-extern char3 __attribute__((const, overloadable))convert_char3(ulong3 v);
+extern char3 __attribute__((const, overloadable))
+    convert_char3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to char4
- *
- * Supported by API versions 21 and newer.
- */
-extern char4 __attribute__((const, overloadable))convert_char4(ulong4 v);
+extern char4 __attribute__((const, overloadable))
+    convert_char4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to uchar2
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(double2 v);
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to uchar3
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(double3 v);
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to uchar4
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(double4 v);
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to uchar2
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(long2 v);
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to uchar3
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(long3 v);
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to uchar4
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(long4 v);
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to uchar2
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))convert_uchar2(ulong2 v);
+extern uchar2 __attribute__((const, overloadable))
+    convert_uchar2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to uchar3
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))convert_uchar3(ulong3 v);
+extern uchar3 __attribute__((const, overloadable))
+    convert_uchar3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to uchar4
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))convert_uchar4(ulong4 v);
+extern uchar4 __attribute__((const, overloadable))
+    convert_uchar4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to short2
- *
- * Supported by API versions 21 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(double2 v);
+extern short2 __attribute__((const, overloadable))
+    convert_short2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to short3
- *
- * Supported by API versions 21 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(double3 v);
+extern short3 __attribute__((const, overloadable))
+    convert_short3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to short4
- *
- * Supported by API versions 21 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(double4 v);
+extern short4 __attribute__((const, overloadable))
+    convert_short4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to short2
- *
- * Supported by API versions 21 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(long2 v);
+extern short2 __attribute__((const, overloadable))
+    convert_short2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to short3
- *
- * Supported by API versions 21 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(long3 v);
+extern short3 __attribute__((const, overloadable))
+    convert_short3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to short4
- *
- * Supported by API versions 21 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(long4 v);
+extern short4 __attribute__((const, overloadable))
+    convert_short4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to short2
- *
- * Supported by API versions 21 and newer.
- */
-extern short2 __attribute__((const, overloadable))convert_short2(ulong2 v);
+extern short2 __attribute__((const, overloadable))
+    convert_short2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to short3
- *
- * Supported by API versions 21 and newer.
- */
-extern short3 __attribute__((const, overloadable))convert_short3(ulong3 v);
+extern short3 __attribute__((const, overloadable))
+    convert_short3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to short4
- *
- * Supported by API versions 21 and newer.
- */
-extern short4 __attribute__((const, overloadable))convert_short4(ulong4 v);
+extern short4 __attribute__((const, overloadable))
+    convert_short4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to ushort2
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(double2 v);
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to ushort3
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(double3 v);
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to ushort4
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(double4 v);
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to ushort2
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(long2 v);
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to ushort3
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(long3 v);
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to ushort4
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(long4 v);
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to ushort2
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))convert_ushort2(ulong2 v);
+extern ushort2 __attribute__((const, overloadable))
+    convert_ushort2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to ushort3
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))convert_ushort3(ulong3 v);
+extern ushort3 __attribute__((const, overloadable))
+    convert_ushort3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to ushort4
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))convert_ushort4(ulong4 v);
+extern ushort4 __attribute__((const, overloadable))
+    convert_ushort4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to int2
- *
- * Supported by API versions 21 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(double2 v);
+extern int2 __attribute__((const, overloadable))
+    convert_int2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to int3
- *
- * Supported by API versions 21 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(double3 v);
+extern int3 __attribute__((const, overloadable))
+    convert_int3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to int4
- *
- * Supported by API versions 21 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(double4 v);
+extern int4 __attribute__((const, overloadable))
+    convert_int4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to int2
- *
- * Supported by API versions 21 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(long2 v);
+extern int2 __attribute__((const, overloadable))
+    convert_int2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to int3
- *
- * Supported by API versions 21 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(long3 v);
+extern int3 __attribute__((const, overloadable))
+    convert_int3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to int4
- *
- * Supported by API versions 21 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(long4 v);
+extern int4 __attribute__((const, overloadable))
+    convert_int4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to int2
- *
- * Supported by API versions 21 and newer.
- */
-extern int2 __attribute__((const, overloadable))convert_int2(ulong2 v);
+extern int2 __attribute__((const, overloadable))
+    convert_int2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to int3
- *
- * Supported by API versions 21 and newer.
- */
-extern int3 __attribute__((const, overloadable))convert_int3(ulong3 v);
+extern int3 __attribute__((const, overloadable))
+    convert_int3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to int4
- *
- * Supported by API versions 21 and newer.
- */
-extern int4 __attribute__((const, overloadable))convert_int4(ulong4 v);
+extern int4 __attribute__((const, overloadable))
+    convert_int4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double2 to uint2
- *
- * Supported by API versions 21 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(double2 v);
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(double2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double3 to uint3
- *
- * Supported by API versions 21 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(double3 v);
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(double3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from double4 to uint4
- *
- * Supported by API versions 21 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(double4 v);
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(double4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long2 to uint2
- *
- * Supported by API versions 21 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(long2 v);
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(long2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long3 to uint3
- *
- * Supported by API versions 21 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(long3 v);
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(long3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from long4 to uint4
- *
- * Supported by API versions 21 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(long4 v);
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(long4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong2 to uint2
- *
- * Supported by API versions 21 and newer.
- */
-extern uint2 __attribute__((const, overloadable))convert_uint2(ulong2 v);
+extern uint2 __attribute__((const, overloadable))
+    convert_uint2(ulong2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong3 to uint3
- *
- * Supported by API versions 21 and newer.
- */
-extern uint3 __attribute__((const, overloadable))convert_uint3(ulong3 v);
+extern uint3 __attribute__((const, overloadable))
+    convert_uint3(ulong3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ulong4 to uint4
- *
- * Supported by API versions 21 and newer.
- */
-extern uint4 __attribute__((const, overloadable))convert_uint4(ulong4 v);
+extern uint4 __attribute__((const, overloadable))
+    convert_uint4(ulong4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from float2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(float2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from float3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(float3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(float3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from float4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(float4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(float4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from char2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(char2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(char2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from char3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(char3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(char3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from char4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(char4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(char4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uchar2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(uchar2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(uchar2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uchar3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(uchar3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(uchar3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uchar4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(uchar4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(uchar4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from short2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(short2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(short2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from short3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(short3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(short3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from short4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(short4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(short4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ushort2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(ushort2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(ushort2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ushort3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(ushort3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(ushort3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ushort4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(ushort4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(ushort4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from int2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(int2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(int2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from int3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(int3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(int3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from int4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(int4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(int4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uint2 to double2
- *
- * Supported by API versions 21 and newer.
- */
-extern double2 __attribute__((const, overloadable))convert_double2(uint2 v);
+extern double2 __attribute__((const, overloadable))
+    convert_double2(uint2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uint3 to double3
- *
- * Supported by API versions 21 and newer.
- */
-extern double3 __attribute__((const, overloadable))convert_double3(uint3 v);
+extern double3 __attribute__((const, overloadable))
+    convert_double3(uint3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uint4 to double4
- *
- * Supported by API versions 21 and newer.
- */
-extern double4 __attribute__((const, overloadable))convert_double4(uint4 v);
+extern double4 __attribute__((const, overloadable))
+    convert_double4(uint4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from float2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(float2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from float3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(float3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(float3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from float4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(float4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(float4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from char2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(char2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(char2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from char3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(char3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(char3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from char4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(char4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(char4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uchar2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(uchar2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(uchar2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uchar3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(uchar3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(uchar3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uchar4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(uchar4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(uchar4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from short2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(short2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(short2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from short3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(short3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(short3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from short4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(short4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(short4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ushort2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(ushort2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(ushort2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ushort3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(ushort3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(ushort3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ushort4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(ushort4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(ushort4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from int2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(int2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(int2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from int3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(int3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(int3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from int4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(int4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(int4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uint2 to long2
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))convert_long2(uint2 v);
+extern long2 __attribute__((const, overloadable))
+    convert_long2(uint2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uint3 to long3
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))convert_long3(uint3 v);
+extern long3 __attribute__((const, overloadable))
+    convert_long3(uint3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uint4 to long4
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))convert_long4(uint4 v);
+extern long4 __attribute__((const, overloadable))
+    convert_long4(uint4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from float2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(float2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from float3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(float3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(float3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from float4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(float4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(float4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from char2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(char2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(char2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from char3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(char3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(char3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from char4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(char4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(char4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uchar2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(uchar2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(uchar2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uchar3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(uchar3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(uchar3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uchar4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(uchar4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(uchar4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from short2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(short2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(short2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from short3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(short3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(short3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from short4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(short4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(short4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ushort2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(ushort2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(ushort2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ushort3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(ushort3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(ushort3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from ushort4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(ushort4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(ushort4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from int2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(int2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(int2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from int3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(int3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(int3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from int4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(int4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(int4 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uint2 to ulong2
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))convert_ulong2(uint2 v);
+extern ulong2 __attribute__((const, overloadable))
+    convert_ulong2(uint2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uint3 to ulong3
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))convert_ulong3(uint3 v);
+extern ulong3 __attribute__((const, overloadable))
+    convert_ulong3(uint3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Component wise conversion from uint4 to ulong4
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))convert_ulong4(uint4 v);
+extern ulong4 __attribute__((const, overloadable))
+    convert_ulong4(uint4 v);
 #endif
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Copy the sign bit from y to x.
+ * copysign: Copies the sign of a number to another
  *
- * Supported by API versions 9 and newer.
+ * Copies the sign from sign_value to magnitude_value.
+ *
+ * The value returned is either magnitude_value or -magnitude_value.
+ *
+ * For example, copysign(4.0f, -2.7f) returns -4.0f and copysign(-4.0f, 2.7f) returns 4.0f.
  */
-extern float __attribute__((const, overloadable))copysign(float x, float y);
-#endif
+extern float __attribute__((const, overloadable))
+    copysign(float magnitude_value, float sign_value);
+
+extern float2 __attribute__((const, overloadable))
+    copysign(float2 magnitude_value, float2 sign_value);
+
+extern float3 __attribute__((const, overloadable))
+    copysign(float3 magnitude_value, float3 sign_value);
+
+extern float4 __attribute__((const, overloadable))
+    copysign(float4 magnitude_value, float4 sign_value);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Copy the sign bit from y to x.
+ * cos: Cosine
+ *
+ * Returns the cosine of an angle measured in radians.
  *
- * Supported by API versions 9 and newer.
+ * See also native_cos().
  */
-extern float2 __attribute__((const, overloadable))copysign(float2 x, float2 y);
-#endif
+extern float __attribute__((const, overloadable))
+    cos(float v);
+
+extern float2 __attribute__((const, overloadable))
+    cos(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Copy the sign bit from y to x.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))copysign(float3 x, float3 y);
-#endif
+extern float3 __attribute__((const, overloadable))
+    cos(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Copy the sign bit from y to x.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))copysign(float4 x, float4 y);
-#endif
+extern float4 __attribute__((const, overloadable))
+    cos(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the cosine.
+ * cosh: Hypebolic cosine
  *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))cos(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the cosine.
+ * Returns the hypebolic cosine of v, where v is measured in radians.
  *
- * Supported by API versions 9 and newer.
+ * See also native_cosh().
  */
-extern float2 __attribute__((const, overloadable))cos(float2);
-#endif
+extern float __attribute__((const, overloadable))
+    cosh(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the cosine.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))cos(float3);
-#endif
+extern float2 __attribute__((const, overloadable))
+    cosh(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the cosine.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))cos(float4);
-#endif
+extern float3 __attribute__((const, overloadable))
+    cosh(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hypebolic cosine.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))cosh(float);
-#endif
+extern float4 __attribute__((const, overloadable))
+    cosh(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the hypebolic cosine.
+ * cospi: Cosine of a number multiplied by pi
  *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))cosh(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hypebolic cosine.
+ * Returns the cosine of (v * pi), where (v * pi) is measured in radians.
  *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))cosh(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hypebolic cosine.
+ * To get the cosine of a value measured in degrees, call cospi(v / 180.f).
  *
- * Supported by API versions 9 and newer.
+ * See also native_cospi().
  */
-extern float4 __attribute__((const, overloadable))cosh(float4);
-#endif
+extern float __attribute__((const, overloadable))
+    cospi(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the cosine of the value * PI.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))cospi(float);
-#endif
+extern float2 __attribute__((const, overloadable))
+    cospi(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the cosine of the value * PI.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))cospi(float2);
-#endif
+extern float3 __attribute__((const, overloadable))
+    cospi(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the cosine of the value * PI.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))cospi(float3);
-#endif
+extern float4 __attribute__((const, overloadable))
+    cospi(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the cosine of the value * PI.
+ * cross: Cross product of two vectors
  *
- * Supported by API versions 9 and newer.
+ * Computes the cross product of two vectors.
  */
-extern float4 __attribute__((const, overloadable))cospi(float4);
-#endif
+extern float3 __attribute__((const, overloadable))
+    cross(float3 left_vector, float3 right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the cross product of two vectors.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))cross(float3 lhs, float3 rhs);
-#endif
+extern float4 __attribute__((const, overloadable))
+    cross(float4 left_vector, float4 right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Compute the cross product of two vectors.
+ * degrees: Converts radians into degrees
  *
- * Supported by API versions 9 and newer.
+ * Converts from radians to degrees.
  */
-extern float4 __attribute__((const, overloadable))cross(float4 lhs, float4 rhs);
-#endif
+extern float __attribute__((const, overloadable))
+    degrees(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Convert from radians to degrees.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))degrees(float value);
-#endif
+extern float2 __attribute__((const, overloadable))
+    degrees(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Convert from radians to degrees.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))degrees(float2 value);
-#endif
+extern float3 __attribute__((const, overloadable))
+    degrees(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Convert from radians to degrees.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))degrees(float3 value);
-#endif
+extern float4 __attribute__((const, overloadable))
+    degrees(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Convert from radians to degrees.
+ * distance: Distance between two points
  *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))degrees(float4 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
  * Compute the distance between two points.
  *
- * Supported by API versions 9 and newer.
+ * See also fast_distance(), native_distance().
  */
-extern float __attribute__((const, overloadable))distance(float lhs, float rhs);
-#endif
+extern float __attribute__((const, overloadable))
+    distance(float left_vector, float right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the distance between two points.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))distance(float2 lhs, float2 rhs);
-#endif
+extern float __attribute__((const, overloadable))
+    distance(float2 left_vector, float2 right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the distance between two points.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))distance(float3 lhs, float3 rhs);
-#endif
+extern float __attribute__((const, overloadable))
+    distance(float3 left_vector, float3 right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the distance between two points.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))distance(float4 lhs, float4 rhs);
-#endif
+extern float __attribute__((const, overloadable))
+    distance(float4 left_vector, float4 right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Compute the dot product of two vectors.
+ * dot: Dot product of two vectors
  *
- * Supported by API versions 9 and newer.
+ * Computes the dot product of two vectors.
  */
-extern float __attribute__((const, overloadable))dot(float lhs, float rhs);
-#endif
+extern float __attribute__((const, overloadable))
+    dot(float left_vector, float right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the dot product of two vectors.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))dot(float2 lhs, float2 rhs);
-#endif
+extern float __attribute__((const, overloadable))
+    dot(float2 left_vector, float2 right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the dot product of two vectors.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))dot(float3 lhs, float3 rhs);
-#endif
+extern float __attribute__((const, overloadable))
+    dot(float3 left_vector, float3 right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the dot product of two vectors.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))dot(float4 lhs, float4 rhs);
-#endif
+extern float __attribute__((const, overloadable))
+    dot(float4 left_vector, float4 right_vector);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the error function.
+ * erf: Mathematical error function
  *
- * Supported by API versions 9 and newer.
+ * Returns the error function.
  */
-extern float __attribute__((const, overloadable))erf(float);
-#endif
+extern float __attribute__((const, overloadable))
+    erf(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the error function.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))erf(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    erf(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the error function.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))erf(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    erf(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the error function.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))erf(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    erf(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the complementary error function.
+ * erfc: Mathematical complementary error function
  *
- * Supported by API versions 9 and newer.
+ * Returns the complementary error function.
  */
-extern float __attribute__((const, overloadable))erfc(float);
-#endif
+extern float __attribute__((const, overloadable))
+    erfc(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the complementary error function.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))erfc(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    erfc(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the complementary error function.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))erfc(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    erfc(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the complementary error function.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))erfc(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    erfc(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return e ^ value.
+ * exp: e raised to a number
  *
- * Supported by API versions 9 and newer.
+ * Returns e raised to v, i.e. e ^ v.
+ *
+ * See also native_exp().
  */
-extern float __attribute__((const, overloadable))exp(float);
-#endif
+extern float __attribute__((const, overloadable))
+    exp(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return e ^ value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))exp(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    exp(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return e ^ value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))exp(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    exp(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return e ^ value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))exp(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    exp(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return 10 ^ value.
+ * exp10: 10 raised to a number
  *
- * Supported by API versions 9 and newer.
+ * Returns 10 raised to v, i.e. 10.f ^ v.
+ *
+ * See also native_exp10().
  */
-extern float __attribute__((const, overloadable))exp10(float);
-#endif
+extern float __attribute__((const, overloadable))
+    exp10(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return 10 ^ value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))exp10(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    exp10(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return 10 ^ value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))exp10(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    exp10(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return 10 ^ value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))exp10(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    exp10(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return 2 ^ value.
+ * exp2: 2 raised to a number
  *
- * Supported by API versions 9 and newer.
+ * Returns 2 raised to v, i.e. 2.f ^ v.
+ *
+ * See also native_exp2().
  */
-extern float __attribute__((const, overloadable))exp2(float);
-#endif
+extern float __attribute__((const, overloadable))
+    exp2(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return 2 ^ value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))exp2(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    exp2(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return 2 ^ value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))exp2(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    exp2(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return 2 ^ value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))exp2(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    exp2(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return (e ^ value) - 1.
+ * expm1: e raised to a number minus one
  *
- * Supported by API versions 9 and newer.
+ * Returns e raised to v minus 1, i.e. (e ^ v) - 1.
+ *
+ * See also native_expm1().
  */
-extern float __attribute__((const, overloadable))expm1(float);
-#endif
+extern float __attribute__((const, overloadable))
+    expm1(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (e ^ value) - 1.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))expm1(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    expm1(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (e ^ value) - 1.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))expm1(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    expm1(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (e ^ value) - 1.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))expm1(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    expm1(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the absolute value of a value.
+ * fabs: Absolute value of a float
  *
- * Supported by API versions 9 and newer.
+ * Returns the absolute value of the float v.
+ *
+ * For integers, use abs().
  */
-extern float __attribute__((const, overloadable))fabs(float);
-#endif
+extern float __attribute__((const, overloadable))
+    fabs(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))fabs(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    fabs(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the absolute value of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))fabs(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    fabs(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
+extern float4 __attribute__((const, overloadable))
+    fabs(float4 v);
+
 /*
- * Return the absolute value of a value.
+ * fast_distance: Approximate distance between two points
  *
- * Supported by API versions 9 and newer.
+ * Computes the approximate distance between two points.
+ *
+ * The precision is what would be expected from doing the computation using 16 bit floating point values.
+ *
+ * See also distance(), native_distance().
  */
-extern float4 __attribute__((const, overloadable))fabs(float4);
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float __attribute__((const, overloadable))
+    fast_distance(float left_vector, float right_vector);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Compute the approximate distance between two points.
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))fast_distance(float lhs, float rhs);
+extern float __attribute__((const, overloadable))
+    fast_distance(float2 left_vector, float2 right_vector);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Compute the approximate distance between two points.
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))fast_distance(float2 lhs, float2 rhs);
+extern float __attribute__((const, overloadable))
+    fast_distance(float3 left_vector, float3 right_vector);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float __attribute__((const, overloadable))
+    fast_distance(float4 left_vector, float4 right_vector);
+#endif
+
 /*
- * Compute the approximate distance between two points.
+ * fast_length: Approximate length of a vector
  *
- * Supported by API versions 17 and newer.
+ * Computes the approximate length of a vector.
+ *
+ * The precision is what would be expected from doing the computation using 16 bit floating point values.
+ *
+ * See also length(), native_length().
  */
-extern float __attribute__((const, overloadable))fast_distance(float3 lhs, float3 rhs);
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float __attribute__((const, overloadable))
+    fast_length(float v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Compute the approximate distance between two points.
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))fast_distance(float4 lhs, float4 rhs);
+extern float __attribute__((const, overloadable))
+    fast_length(float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float __attribute__((const, overloadable))
+    fast_length(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float __attribute__((const, overloadable))
+    fast_length(float4 v);
+#endif
+
 /*
+ * fast_normalize: Approximate normalized vector
+ *
+ * Approximately normalizes a vector.
+ *
+ * For vectors of size 1, returns -1.f for negative values, 0.f for null values, and 1.f for positive values.
+ *
+ * The precision is what would be expected from doing the computation using 16 bit floating point values.
+ *
+ * See also normalize(), native_normalize().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float __attribute__((const, overloadable))
+    fast_normalize(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float2 __attribute__((const, overloadable))
+    fast_normalize(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float3 __attribute__((const, overloadable))
+    fast_normalize(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float4 __attribute__((const, overloadable))
+    fast_normalize(float4 v);
+#endif
+
+/*
+ * fdim: Positive difference between two values
+ *
+ * Returns the positive difference between two values.
+ *
+ * If a > b, returns (a - b) otherwise returns 0f.
+ */
+extern float __attribute__((const, overloadable))
+    fdim(float a, float b);
+
+extern float2 __attribute__((const, overloadable))
+    fdim(float2 a, float2 b);
+
+extern float3 __attribute__((const, overloadable))
+    fdim(float3 a, float3 b);
+
+extern float4 __attribute__((const, overloadable))
+    fdim(float4 a, float4 b);
+
+/*
+ * floor: Smallest integer not greater than a value
+ *
+ * Returns the smallest integer not greater than a value.
+ *
+ * For example, floor(1.2f) returns 1.f, and floor(-1.2f) returns -2.f.
+ *
+ * See also ceil().
+ */
+extern float __attribute__((const, overloadable))
+    floor(float v);
+
+extern float2 __attribute__((const, overloadable))
+    floor(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    floor(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    floor(float4 v);
+
+/*
+ * fma: Multiply and add
+ *
+ * Multiply and add.  Returns (multiplicand1 * multiplicand2) + offset.
+ *
+ * This function is similar to mad().  fma() retains full precision of the
+ * multiplied result and rounds only after the addition.  mad() rounds after the
+ * multiplication and the addition.  This extra precision is not guaranteed in
+ * rs_fp_relaxed mode.
+ */
+extern float __attribute__((const, overloadable))
+    fma(float multiplicand1, float multiplicand2, float offset);
+
+extern float2 __attribute__((const, overloadable))
+    fma(float2 multiplicand1, float2 multiplicand2, float2 offset);
+
+extern float3 __attribute__((const, overloadable))
+    fma(float3 multiplicand1, float3 multiplicand2, float3 offset);
+
+extern float4 __attribute__((const, overloadable))
+    fma(float4 multiplicand1, float4 multiplicand2, float4 offset);
+
+/*
+ * fmax: Maximum of two floats
+ *
+ * Returns the maximum of a and b, i.e. (a < b ? b : a).
+ *
+ * The max() function returns identical results but can be applied to more data types.
+ */
+extern float __attribute__((const, overloadable))
+    fmax(float a, float b);
+
+extern float2 __attribute__((const, overloadable))
+    fmax(float2 a, float2 b);
+
+extern float3 __attribute__((const, overloadable))
+    fmax(float3 a, float3 b);
+
+extern float4 __attribute__((const, overloadable))
+    fmax(float4 a, float4 b);
+
+extern float2 __attribute__((const, overloadable))
+    fmax(float2 a, float b);
+
+extern float3 __attribute__((const, overloadable))
+    fmax(float3 a, float b);
+
+extern float4 __attribute__((const, overloadable))
+    fmax(float4 a, float b);
+
+/*
+ * fmin: Minimum of two floats
+ *
+ * Returns the minimum of a and b, i.e. (a > b ? b : a).
+ *
+ * The min() function returns identical results but can be applied to more data types.
+ */
+extern float __attribute__((const, overloadable))
+    fmin(float a, float b);
+
+extern float2 __attribute__((const, overloadable))
+    fmin(float2 a, float2 b);
+
+extern float3 __attribute__((const, overloadable))
+    fmin(float3 a, float3 b);
+
+extern float4 __attribute__((const, overloadable))
+    fmin(float4 a, float4 b);
+
+extern float2 __attribute__((const, overloadable))
+    fmin(float2 a, float b);
+
+extern float3 __attribute__((const, overloadable))
+    fmin(float3 a, float b);
+
+extern float4 __attribute__((const, overloadable))
+    fmin(float4 a, float b);
+
+/*
+ * fmod: Modulo
+ *
+ * Returns the remainder of (numerator / denominator), where the quotient is rounded towards zero.
+ *
+ * The function remainder() is similar but rounds toward the closest interger.
+ * For example, fmod(-3.8f, 2.f) returns -1.8f (-3.8f - -1.f * 2.f)
+ * while remainder(-3.8f, 2.f) returns 0.2f (-3.8f - -2.f * 2.f).
+ */
+extern float __attribute__((const, overloadable))
+    fmod(float numerator, float denominator);
+
+extern float2 __attribute__((const, overloadable))
+    fmod(float2 numerator, float2 denominator);
+
+extern float3 __attribute__((const, overloadable))
+    fmod(float3 numerator, float3 denominator);
+
+extern float4 __attribute__((const, overloadable))
+    fmod(float4 numerator, float4 denominator);
+
+/*
+ * fract: Positive fractional part
+ *
+ * Returns the positive fractional part of v, i.e. v - floor(v).
+ *
+ * For example, fract(1.3f, &val) returns 0.3f and sets val to 1.f.
+ * fract(-1.3f, &val) returns 0.7f and sets val to -2.f.
+ *
+ * Parameters:
+ *   v Input value.
+ *   floor If floor is not null, *floor will be set to the floor of v.
+ */
+extern float __attribute__((overloadable))
+    fract(float v, float* floor);
+
+extern float2 __attribute__((overloadable))
+    fract(float2 v, float2* floor);
+
+extern float3 __attribute__((overloadable))
+    fract(float3 v, float3* floor);
+
+extern float4 __attribute__((overloadable))
+    fract(float4 v, float4* floor);
+
+static inline float __attribute__((const, overloadable))
+    fract(float v) {
+    float unused;
+    return fract(v, &unused);
+}
+
+static inline float2 __attribute__((const, overloadable))
+    fract(float2 v) {
+    float2 unused;
+    return fract(v, &unused);
+}
+
+static inline float3 __attribute__((const, overloadable))
+    fract(float3 v) {
+    float3 unused;
+    return fract(v, &unused);
+}
+
+static inline float4 __attribute__((const, overloadable))
+    fract(float4 v) {
+    float4 unused;
+    return fract(v, &unused);
+}
+
+/*
+ * frexp: Binary mantissa and exponent
+ *
+ * Returns the binary mantissa and exponent of v, i.e. v == mantissa * 2 ^ exponent.
+ *
+ * The mantissa is always between 0.5 (inclusive) and 1.0 (exclusive).
+ *
+ * See ldexp() for the reverse operation.  See also logb() and ilogb().
+ *
+ * Parameters:
+ *   v Input value.
+ *   exponent If exponent is not null, *exponent will be set to the exponent of v.
+ */
+extern float __attribute__((overloadable))
+    frexp(float v, int* exponent);
+
+extern float2 __attribute__((overloadable))
+    frexp(float2 v, int2* exponent);
+
+extern float3 __attribute__((overloadable))
+    frexp(float3 v, int3* exponent);
+
+extern float4 __attribute__((overloadable))
+    frexp(float4 v, int4* exponent);
+
+/*
+ * half_recip: Reciprocal computed to 16 bit precision
+ *
+ * Returns the approximate reciprocal of a value.
+ *
+ * The precision is that of a 16 bit floating point value.
+ *
+ * See also native_recip().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float __attribute__((const, overloadable))
+    half_recip(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float2 __attribute__((const, overloadable))
+    half_recip(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float3 __attribute__((const, overloadable))
+    half_recip(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float4 __attribute__((const, overloadable))
+    half_recip(float4 v);
+#endif
+
+/*
+ * half_rsqrt: Reciprocal of a square root computed to 16 bit precision
+ *
+ * Returns the approximate value of (1.f / sqrt(value)).
+ *
+ * The precision is that of a 16 bit floating point value.
+ *
+ * See also rsqrt(), native_rsqrt().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float __attribute__((const, overloadable))
+    half_rsqrt(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float2 __attribute__((const, overloadable))
+    half_rsqrt(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float3 __attribute__((const, overloadable))
+    half_rsqrt(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float4 __attribute__((const, overloadable))
+    half_rsqrt(float4 v);
+#endif
+
+/*
+ * half_sqrt: Square root computed to 16 bit precision
+ *
+ * Returns the approximate square root of a value.
+ *
+ * The precision is that of a 16 bit floating point value.
+ *
+ * See also sqrt(), native_sqrt().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float __attribute__((const, overloadable))
+    half_sqrt(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float2 __attribute__((const, overloadable))
+    half_sqrt(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float3 __attribute__((const, overloadable))
+    half_sqrt(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern float4 __attribute__((const, overloadable))
+    half_sqrt(float4 v);
+#endif
+
+/*
+ * hypot: Hypotenuse
+ *
+ * Returns the hypotenuse, i.e. sqrt(a * a + b * b).
+ *
+ * See also native_hypot().
+ */
+extern float __attribute__((const, overloadable))
+    hypot(float a, float b);
+
+extern float2 __attribute__((const, overloadable))
+    hypot(float2 a, float2 b);
+
+extern float3 __attribute__((const, overloadable))
+    hypot(float3 a, float3 b);
+
+extern float4 __attribute__((const, overloadable))
+    hypot(float4 a, float4 b);
+
+/*
+ * ilogb: Base two exponent
+ *
+ * Returns the base two exponent of a value, where the mantissa is between
+ * 1.f (inclusive) and 2.f (exclusive).
+ *
+ * For example, ilogb(8.5f) returns 3.
+ *
+ * Because of the difference in mantissa, this number is one less than
+ * is returned by frexp().
+ *
+ * logb() is similar but returns a float.
+ */
+extern int __attribute__((const, overloadable))
+    ilogb(float v);
+
+extern int2 __attribute__((const, overloadable))
+    ilogb(float2 v);
+
+extern int3 __attribute__((const, overloadable))
+    ilogb(float3 v);
+
+extern int4 __attribute__((const, overloadable))
+    ilogb(float4 v);
+
+/*
+ * ldexp: Creates a floating point from mantissa and exponent
+ *
+ * Returns the floating point created from the mantissa and exponent,
+ * i.e. (mantissa * 2 ^ exponent).
+ *
+ * See frexp() for the reverse operation.
+ *
+ * Parameters:
+ *   mantissa The mantissa
+ *   exponent The exponent, a single component or matching vector.
+ */
+extern float __attribute__((const, overloadable))
+    ldexp(float mantissa, int exponent);
+
+extern float2 __attribute__((const, overloadable))
+    ldexp(float2 mantissa, int2 exponent);
+
+extern float3 __attribute__((const, overloadable))
+    ldexp(float3 mantissa, int3 exponent);
+
+extern float4 __attribute__((const, overloadable))
+    ldexp(float4 mantissa, int4 exponent);
+
+extern float2 __attribute__((const, overloadable))
+    ldexp(float2 mantissa, int exponent);
+
+extern float3 __attribute__((const, overloadable))
+    ldexp(float3 mantissa, int exponent);
+
+extern float4 __attribute__((const, overloadable))
+    ldexp(float4 mantissa, int exponent);
+
+/*
+ * length: Length of a vector
+ *
+ * Computes the length of a vector.
+ *
+ * See also fast_length(), native_length().
+ */
+extern float __attribute__((const, overloadable))
+    length(float v);
+
+extern float __attribute__((const, overloadable))
+    length(float2 v);
+
+extern float __attribute__((const, overloadable))
+    length(float3 v);
+
+extern float __attribute__((const, overloadable))
+    length(float4 v);
+
+/*
+ * lgamma: Natural logarithm of the gamma function
+ *
+ * Returns the natural logarithm of the absolute value of the gamma function,
+ * i.e. log(fabs(tgamma(v))).
+ *
+ * See also tgamma().
+ *
+ * Parameters:
+ *   sign_of_gamma If sign_of_gamma is not null, *sign_of_gamma will be set to -1.f if the gamma of v is negative, otherwise to 1.f.
+ */
+extern float __attribute__((const, overloadable))
+    lgamma(float v);
+
+extern float2 __attribute__((const, overloadable))
+    lgamma(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    lgamma(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    lgamma(float4 v);
+
+extern float __attribute__((overloadable))
+    lgamma(float v, int* sign_of_gamma);
+
+extern float2 __attribute__((overloadable))
+    lgamma(float2 v, int2* sign_of_gamma);
+
+extern float3 __attribute__((overloadable))
+    lgamma(float3 v, int3* sign_of_gamma);
+
+extern float4 __attribute__((overloadable))
+    lgamma(float4 v, int4* sign_of_gamma);
+
+/*
+ * log: Natural logarithm
+ *
+ * Returns the natural logarithm.
+ *
+ * See also native_log().
+ */
+extern float __attribute__((const, overloadable))
+    log(float v);
+
+extern float2 __attribute__((const, overloadable))
+    log(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    log(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    log(float4 v);
+
+/*
+ * log10: Base 10 logarithm
+ *
+ * Returns the base 10 logarithm.
+ *
+ * See also native_log10().
+ */
+extern float __attribute__((const, overloadable))
+    log10(float v);
+
+extern float2 __attribute__((const, overloadable))
+    log10(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    log10(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    log10(float4 v);
+
+/*
+ * log1p: Natural logarithm of a value plus 1
+ *
+ * Returns the natural logarithm of (v + 1.f).
+ *
+ * See also native_log1p().
+ */
+extern float __attribute__((const, overloadable))
+    log1p(float v);
+
+extern float2 __attribute__((const, overloadable))
+    log1p(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    log1p(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    log1p(float4 v);
+
+/*
+ * log2: Base 2 logarithm
+ *
+ * Returns the base 2 logarithm.
+ *
+ * See also native_log2().
+ */
+extern float __attribute__((const, overloadable))
+    log2(float v);
+
+extern float2 __attribute__((const, overloadable))
+    log2(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    log2(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    log2(float4 v);
+
+/*
+ * logb: Base two exponent
+ *
+ * Returns the base two exponent of a value, where the mantissa is between
+ * 1.f (inclusive) and 2.f (exclusive).
+ *
+ * For example, logb(8.5f) returns 3.f.
+ *
+ * Because of the difference in mantissa, this number is one less than
+ * is returned by frexp().
+ *
+ * ilogb() is similar but returns an integer.
+ */
+extern float __attribute__((const, overloadable))
+    logb(float v);
+
+extern float2 __attribute__((const, overloadable))
+    logb(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    logb(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    logb(float4 v);
+
+/*
+ * mad: Multiply and add
+ *
+ * Multiply and add.  Returns (multiplicand1 * multiplicand2) + offset.
+ *
+ * This function is similar to fma().  fma() retains full precision of the
+ * multiplied result and rounds only after the addition.  mad() rounds after the
+ * multiplication and the addition.  In rs_fp_relaxed mode, mad() may not do the
+ * rounding after multiplicaiton.
+ */
+extern float __attribute__((const, overloadable))
+    mad(float multiplicand1, float multiplicand2, float offset);
+
+extern float2 __attribute__((const, overloadable))
+    mad(float2 multiplicand1, float2 multiplicand2, float2 offset);
+
+extern float3 __attribute__((const, overloadable))
+    mad(float3 multiplicand1, float3 multiplicand2, float3 offset);
+
+extern float4 __attribute__((const, overloadable))
+    mad(float4 multiplicand1, float4 multiplicand2, float4 offset);
+
+/*
+ * max: Maximum
+ *
+ * Returns the maximum value of two arguments.
+ */
+extern float __attribute__((const, overloadable))
+    max(float a, float b);
+
+extern float2 __attribute__((const, overloadable))
+    max(float2 a, float2 b);
+
+extern float3 __attribute__((const, overloadable))
+    max(float3 a, float3 b);
+
+extern float4 __attribute__((const, overloadable))
+    max(float4 a, float4 b);
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline char __attribute__((const, overloadable))
+    max(char a, char b) {
+    return (a > b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uchar __attribute__((const, overloadable))
+    max(uchar a, uchar b) {
+    return (a > b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline short __attribute__((const, overloadable))
+    max(short a, short b) {
+    return (a > b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline ushort __attribute__((const, overloadable))
+    max(ushort a, ushort b) {
+    return (a > b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline int __attribute__((const, overloadable))
+    max(int a, int b) {
+    return (a > b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uint __attribute__((const, overloadable))
+    max(uint a, uint b) {
+    return (a > b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline char2 __attribute__((const, overloadable))
+    max(char2 a, char2 b) {
+    char2 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uchar2 __attribute__((const, overloadable))
+    max(uchar2 a, uchar2 b) {
+    uchar2 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline short2 __attribute__((const, overloadable))
+    max(short2 a, short2 b) {
+    short2 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline ushort2 __attribute__((const, overloadable))
+    max(ushort2 a, ushort2 b) {
+    ushort2 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline int2 __attribute__((const, overloadable))
+    max(int2 a, int2 b) {
+    int2 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uint2 __attribute__((const, overloadable))
+    max(uint2 a, uint2 b) {
+    uint2 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline char3 __attribute__((const, overloadable))
+    max(char3 a, char3 b) {
+    char3 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uchar3 __attribute__((const, overloadable))
+    max(uchar3 a, uchar3 b) {
+    uchar3 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline short3 __attribute__((const, overloadable))
+    max(short3 a, short3 b) {
+    short3 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline ushort3 __attribute__((const, overloadable))
+    max(ushort3 a, ushort3 b) {
+    ushort3 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline int3 __attribute__((const, overloadable))
+    max(int3 a, int3 b) {
+    int3 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uint3 __attribute__((const, overloadable))
+    max(uint3 a, uint3 b) {
+    uint3 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline char4 __attribute__((const, overloadable))
+    max(char4 a, char4 b) {
+    char4 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    tmp.w = (a.w > b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uchar4 __attribute__((const, overloadable))
+    max(uchar4 a, uchar4 b) {
+    uchar4 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    tmp.w = (a.w > b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline short4 __attribute__((const, overloadable))
+    max(short4 a, short4 b) {
+    short4 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    tmp.w = (a.w > b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline ushort4 __attribute__((const, overloadable))
+    max(ushort4 a, ushort4 b) {
+    ushort4 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    tmp.w = (a.w > b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline int4 __attribute__((const, overloadable))
+    max(int4 a, int4 b) {
+    int4 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    tmp.w = (a.w > b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uint4 __attribute__((const, overloadable))
+    max(uint4 a, uint4 b) {
+    uint4 tmp;
+    tmp.x = (a.x > b.x ? a.x : b.x);
+    tmp.y = (a.y > b.y ? a.y : b.y);
+    tmp.z = (a.z > b.z ? a.z : b.z);
+    tmp.w = (a.w > b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern char __attribute__((const, overloadable))
+    max(char a, char b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern char2 __attribute__((const, overloadable))
+    max(char2 a, char2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern char3 __attribute__((const, overloadable))
+    max(char3 a, char3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern char4 __attribute__((const, overloadable))
+    max(char4 a, char4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uchar __attribute__((const, overloadable))
+    max(uchar a, uchar b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uchar2 __attribute__((const, overloadable))
+    max(uchar2 a, uchar2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uchar3 __attribute__((const, overloadable))
+    max(uchar3 a, uchar3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uchar4 __attribute__((const, overloadable))
+    max(uchar4 a, uchar4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern short __attribute__((const, overloadable))
+    max(short a, short b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern short2 __attribute__((const, overloadable))
+    max(short2 a, short2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern short3 __attribute__((const, overloadable))
+    max(short3 a, short3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern short4 __attribute__((const, overloadable))
+    max(short4 a, short4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ushort __attribute__((const, overloadable))
+    max(ushort a, ushort b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ushort2 __attribute__((const, overloadable))
+    max(ushort2 a, ushort2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ushort3 __attribute__((const, overloadable))
+    max(ushort3 a, ushort3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ushort4 __attribute__((const, overloadable))
+    max(ushort4 a, ushort4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern int __attribute__((const, overloadable))
+    max(int a, int b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern int2 __attribute__((const, overloadable))
+    max(int2 a, int2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern int3 __attribute__((const, overloadable))
+    max(int3 a, int3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern int4 __attribute__((const, overloadable))
+    max(int4 a, int4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uint __attribute__((const, overloadable))
+    max(uint a, uint b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uint2 __attribute__((const, overloadable))
+    max(uint2 a, uint2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uint3 __attribute__((const, overloadable))
+    max(uint3 a, uint3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uint4 __attribute__((const, overloadable))
+    max(uint4 a, uint4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern long __attribute__((const, overloadable))
+    max(long a, long b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern long2 __attribute__((const, overloadable))
+    max(long2 a, long2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern long3 __attribute__((const, overloadable))
+    max(long3 a, long3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern long4 __attribute__((const, overloadable))
+    max(long4 a, long4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ulong __attribute__((const, overloadable))
+    max(ulong a, ulong b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ulong2 __attribute__((const, overloadable))
+    max(ulong2 a, ulong2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ulong3 __attribute__((const, overloadable))
+    max(ulong3 a, ulong3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ulong4 __attribute__((const, overloadable))
+    max(ulong4 a, ulong4 b);
+#endif
+
+/*
+ * min: Minimum
+ *
+ * Returns the minimum value of two arguments.
+ */
+extern float __attribute__((const, overloadable))
+    min(float a, float b);
+
+extern float2 __attribute__((const, overloadable))
+    min(float2 a, float2 b);
+
+extern float3 __attribute__((const, overloadable))
+    min(float3 a, float3 b);
+
+extern float4 __attribute__((const, overloadable))
+    min(float4 a, float4 b);
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline char __attribute__((const, overloadable))
+    min(char a, char b) {
+    return (a < b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uchar __attribute__((const, overloadable))
+    min(uchar a, uchar b) {
+    return (a < b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline short __attribute__((const, overloadable))
+    min(short a, short b) {
+    return (a < b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline ushort __attribute__((const, overloadable))
+    min(ushort a, ushort b) {
+    return (a < b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline int __attribute__((const, overloadable))
+    min(int a, int b) {
+    return (a < b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uint __attribute__((const, overloadable))
+    min(uint a, uint b) {
+    return (a < b ? a : b);
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline char2 __attribute__((const, overloadable))
+    min(char2 a, char2 b) {
+    char2 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uchar2 __attribute__((const, overloadable))
+    min(uchar2 a, uchar2 b) {
+    uchar2 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline short2 __attribute__((const, overloadable))
+    min(short2 a, short2 b) {
+    short2 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline ushort2 __attribute__((const, overloadable))
+    min(ushort2 a, ushort2 b) {
+    ushort2 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline int2 __attribute__((const, overloadable))
+    min(int2 a, int2 b) {
+    int2 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uint2 __attribute__((const, overloadable))
+    min(uint2 a, uint2 b) {
+    uint2 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline char3 __attribute__((const, overloadable))
+    min(char3 a, char3 b) {
+    char3 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uchar3 __attribute__((const, overloadable))
+    min(uchar3 a, uchar3 b) {
+    uchar3 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline short3 __attribute__((const, overloadable))
+    min(short3 a, short3 b) {
+    short3 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline ushort3 __attribute__((const, overloadable))
+    min(ushort3 a, ushort3 b) {
+    ushort3 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline int3 __attribute__((const, overloadable))
+    min(int3 a, int3 b) {
+    int3 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uint3 __attribute__((const, overloadable))
+    min(uint3 a, uint3 b) {
+    uint3 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline char4 __attribute__((const, overloadable))
+    min(char4 a, char4 b) {
+    char4 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    tmp.w = (a.w < b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uchar4 __attribute__((const, overloadable))
+    min(uchar4 a, uchar4 b) {
+    uchar4 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    tmp.w = (a.w < b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline short4 __attribute__((const, overloadable))
+    min(short4 a, short4 b) {
+    short4 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    tmp.w = (a.w < b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline ushort4 __attribute__((const, overloadable))
+    min(ushort4 a, ushort4 b) {
+    ushort4 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    tmp.w = (a.w < b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline int4 __attribute__((const, overloadable))
+    min(int4 a, int4 b) {
+    int4 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    tmp.w = (a.w < b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+static inline uint4 __attribute__((const, overloadable))
+    min(uint4 a, uint4 b) {
+    uint4 tmp;
+    tmp.x = (a.x < b.x ? a.x : b.x);
+    tmp.y = (a.y < b.y ? a.y : b.y);
+    tmp.z = (a.z < b.z ? a.z : b.z);
+    tmp.w = (a.w < b.w ? a.w : b.w);
+    return tmp;
+}
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern char __attribute__((const, overloadable))
+    min(char a, char b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern char2 __attribute__((const, overloadable))
+    min(char2 a, char2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern char3 __attribute__((const, overloadable))
+    min(char3 a, char3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern char4 __attribute__((const, overloadable))
+    min(char4 a, char4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uchar __attribute__((const, overloadable))
+    min(uchar a, uchar b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uchar2 __attribute__((const, overloadable))
+    min(uchar2 a, uchar2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uchar3 __attribute__((const, overloadable))
+    min(uchar3 a, uchar3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uchar4 __attribute__((const, overloadable))
+    min(uchar4 a, uchar4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern short __attribute__((const, overloadable))
+    min(short a, short b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern short2 __attribute__((const, overloadable))
+    min(short2 a, short2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern short3 __attribute__((const, overloadable))
+    min(short3 a, short3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern short4 __attribute__((const, overloadable))
+    min(short4 a, short4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ushort __attribute__((const, overloadable))
+    min(ushort a, ushort b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ushort2 __attribute__((const, overloadable))
+    min(ushort2 a, ushort2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ushort3 __attribute__((const, overloadable))
+    min(ushort3 a, ushort3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ushort4 __attribute__((const, overloadable))
+    min(ushort4 a, ushort4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern int __attribute__((const, overloadable))
+    min(int a, int b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern int2 __attribute__((const, overloadable))
+    min(int2 a, int2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern int3 __attribute__((const, overloadable))
+    min(int3 a, int3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern int4 __attribute__((const, overloadable))
+    min(int4 a, int4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uint __attribute__((const, overloadable))
+    min(uint a, uint b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uint2 __attribute__((const, overloadable))
+    min(uint2 a, uint2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uint3 __attribute__((const, overloadable))
+    min(uint3 a, uint3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern uint4 __attribute__((const, overloadable))
+    min(uint4 a, uint4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern long __attribute__((const, overloadable))
+    min(long a, long b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern long2 __attribute__((const, overloadable))
+    min(long2 a, long2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern long3 __attribute__((const, overloadable))
+    min(long3 a, long3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern long4 __attribute__((const, overloadable))
+    min(long4 a, long4 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ulong __attribute__((const, overloadable))
+    min(ulong a, ulong b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ulong2 __attribute__((const, overloadable))
+    min(ulong2 a, ulong2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ulong3 __attribute__((const, overloadable))
+    min(ulong3 a, ulong3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern ulong4 __attribute__((const, overloadable))
+    min(ulong4 a, ulong4 b);
+#endif
+
+/*
+ * mix: Mixes two values
+ *
+ * Returns start + ((stop - start) * fraction).
+ *
+ * This can be useful for mixing two values.  For example, to create a new color that is 40% color1 and 60% color2, use mix(color1, color2, 0.6f).
+ */
+extern float __attribute__((const, overloadable))
+    mix(float start, float stop, float fraction);
+
+extern float2 __attribute__((const, overloadable))
+    mix(float2 start, float2 stop, float2 fraction);
+
+extern float3 __attribute__((const, overloadable))
+    mix(float3 start, float3 stop, float3 fraction);
+
+extern float4 __attribute__((const, overloadable))
+    mix(float4 start, float4 stop, float4 fraction);
+
+extern float2 __attribute__((const, overloadable))
+    mix(float2 start, float2 stop, float fraction);
+
+extern float3 __attribute__((const, overloadable))
+    mix(float3 start, float3 stop, float fraction);
+
+extern float4 __attribute__((const, overloadable))
+    mix(float4 start, float4 stop, float fraction);
+
+/*
+ * modf: Integral and fractional components
+ *
+ * Returns the integral and fractional components of a number.
+ *
+ * Both components will have the same sign as x.  For example, for an input of -3.72f, iret will be set to -3.f and .72f will be returned.
+ *
+ * Parameters:
+ *   v Source value
+ *   integral_part *integral_part will be set to the integral portion of the number.
+ *
+ * Returns: The floating point portion of the value.
+ */
+extern float __attribute__((overloadable))
+    modf(float v, float* integral_part);
+
+extern float2 __attribute__((overloadable))
+    modf(float2 v, float2* integral_part);
+
+extern float3 __attribute__((overloadable))
+    modf(float3 v, float3* integral_part);
+
+extern float4 __attribute__((overloadable))
+    modf(float4 v, float4* integral_part);
+
+/*
+ * nan: Not a Number
+ *
+ * Returns a NaN value (Not a Number).
+ *
+ * Parameters:
+ *   v Not used.
+ */
+extern float __attribute__((const, overloadable))
+    nan(uint v);
+
+/*
+ * native_acos: Approximate inverse cosine
+ *
+ * Returns the approximate inverse cosine, in radians.
+ *
+ * This function yields undefined results from input values less than -1 or greater
+ * than 1.
+ *
+ * See also acos().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_acos(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_acos(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_acos(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_acos(float4 v);
+#endif
+
+/*
+ * native_acosh: Approximate inverse hyperbolic cosine
+ *
+ * Returns the approximate inverse hyperbolic cosine, in radians.
+ *
+ * See also acosh().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_acosh(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_acosh(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_acosh(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_acosh(float4 v);
+#endif
+
+/*
+ * native_acospi: Approximate inverse cosine divided by pi
+ *
+ * Returns the approximate inverse cosine in radians, divided by pi.
+ *
+ * To get an inverse cosine measured in degrees, use acospi(a) * 180.f.
+ *
+ * This function yields undefined results from input values less than -1 or greater
+ * than 1.
+ *
+ * See also acospi().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_acospi(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_acospi(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_acospi(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_acospi(float4 v);
+#endif
+
+/*
+ * native_asin: Approximate inverse sine
+ *
+ * Returns the approximate inverse sine, in radians.
+ *
+ * This function yields undefined results from input values less than -1 or greater
+ * than 1.
+ *
+ * See also asin().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_asin(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_asin(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_asin(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_asin(float4 v);
+#endif
+
+/*
+ * native_asinh: Approximate inverse hyperbolic sine
+ *
+ * Returns the approximate inverse hyperbolic sine, in radians.
+ *
+ * See also asinh().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_asinh(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_asinh(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_asinh(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_asinh(float4 v);
+#endif
+
+/*
+ * native_asinpi: Approximate inverse sine divided by pi
+ *
+ * Returns the approximate inverse sine in radians, divided by pi.
+ *
+ * To get an inverse sine measured in degrees, use asinpi(a) * 180.f.
+ *
+ * This function yields undefined results from input values less than -1 or greater
+ * than 1.
+ *
+ * See also asinpi().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_asinpi(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_asinpi(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_asinpi(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_asinpi(float4 v);
+#endif
+
+/*
+ * native_atan: Approximate inverse tangent
+ *
+ * Returns the approximate inverse tangent, in radians.
+ *
+ * See also atan().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_atan(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_atan(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_atan(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_atan(float4 v);
+#endif
+
+/*
+ * native_atan2: Approximate inverse tangent of a ratio
+ *
+ * Returns the approximate inverse tangent of (numerator / denominator), in radians.
+ *
+ * See also atan2().
+ *
+ * Parameters:
+ *   numerator The numerator
+ *   denominator The denominator.  Can be 0.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_atan2(float numerator, float denominator);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_atan2(float2 numerator, float2 denominator);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_atan2(float3 numerator, float3 denominator);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_atan2(float4 numerator, float4 denominator);
+#endif
+
+/*
+ * native_atan2pi: Approximate inverse tangent of a ratio, divided by pi
+ *
+ * Returns the approximate inverse tangent of (numerator / denominator), in radians, divided by pi.
+ *
+ * To get an inverse tangent measured in degrees, use atan2pi(n, d) * 180.f.
+ *
+ * See also atan2pi().
+ *
+ * Parameters:
+ *   numerator The numerator
+ *   denominator The denominator.  Can be 0.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_atan2pi(float numerator, float denominator);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_atan2pi(float2 numerator, float2 denominator);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_atan2pi(float3 numerator, float3 denominator);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_atan2pi(float4 numerator, float4 denominator);
+#endif
+
+/*
+ * native_atanh: Approximate inverse hyperbolic tangent
+ *
+ * Returns the approximate inverse hyperbolic tangent, in radians.
+ *
+ * See also atanh().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_atanh(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_atanh(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_atanh(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_atanh(float4 v);
+#endif
+
+/*
+ * native_atanpi: Approximate inverse tangent divided by pi
+ *
+ * Returns the approximate inverse tangent in radians, divided by pi.
+ *
+ * To get an inverse tangent measured in degrees, use atanpi(a) * 180.f.
+ *
+ * See also atanpi().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_atanpi(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_atanpi(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_atanpi(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_atanpi(float4 v);
+#endif
+
+/*
+ * native_cbrt: Approximate cube root
+ *
+ * Returns the approximate cubic root.
+ *
+ * See also cbrt().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_cbrt(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_cbrt(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_cbrt(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_cbrt(float4 v);
+#endif
+
+/*
+ * native_cos: Approximate cosine
+ *
+ * Returns the approximate cosine of an angle measured in radians.
+ *
+ * See also cos().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_cos(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_cos(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_cos(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_cos(float4 v);
+#endif
+
+/*
+ * native_cosh: Approximate hypebolic cosine
+ *
+ * Returns the approximate hypebolic cosine.
+ *
+ * See also cosh().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_cosh(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_cosh(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_cosh(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_cosh(float4 v);
+#endif
+
+/*
+ * native_cospi: Approximate cosine of a number multiplied by pi
+ *
+ * Returns the approximate cosine of (v * pi), where (v * pi) is measured in radians.
+ *
+ * To get the cosine of a value measured in degrees, call cospi(v / 180.f).
+ *
+ * See also cospi().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_cospi(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_cospi(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_cospi(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_cospi(float4 v);
+#endif
+
+/*
+ * native_distance: Approximate distance between two points
+ *
+ * Computes the approximate distance between two points.
+ *
+ * See also distance(), fast_distance().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_distance(float left_vector, float right_vector);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_distance(float2 left_vector, float2 right_vector);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_distance(float3 left_vector, float3 right_vector);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_distance(float4 left_vector, float4 right_vector);
+#endif
+
+/*
+ * native_divide: Approximate division
+ *
+ * Computes the approximate division of two values.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_divide(float left_vector, float right_vector);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_divide(float2 left_vector, float2 right_vector);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_divide(float3 left_vector, float3 right_vector);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_divide(float4 left_vector, float4 right_vector);
+#endif
+
+/*
+ * native_exp: Approximate e raised to a number
+ *
+ * Fast approximate exp.
+ *
+ * It is valid for inputs from -86.f to 86.f.  The precision is no worse than what would be expected from using 16 bit floating point values.
+ *
+ * See also exp().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((const, overloadable))
+    native_exp(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((const, overloadable))
+    native_exp(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float3 __attribute__((const, overloadable))
+    native_exp(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float4 __attribute__((const, overloadable))
+    native_exp(float4 v);
+#endif
+
+/*
+ * native_exp10: Approximate 10 raised to a number
+ *
+ * Fast approximate exp10.
+ *
+ * It is valid for inputs from -37.f to 37.f.  The precision is no worse than what would be expected from using 16 bit floating point values.
+ *
+ * See also exp10().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((const, overloadable))
+    native_exp10(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((const, overloadable))
+    native_exp10(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float3 __attribute__((const, overloadable))
+    native_exp10(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float4 __attribute__((const, overloadable))
+    native_exp10(float4 v);
+#endif
+
+/*
+ * native_exp2: Approximate 2 raised to a number
+ *
+ * Fast approximate exp2.
+ *
+ * It is valid for inputs from -125.f to 125.f.  The precision is no worse than what would be expected from using 16 bit floating point values.
+ *
+ * See also exp2().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((const, overloadable))
+    native_exp2(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((const, overloadable))
+    native_exp2(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float3 __attribute__((const, overloadable))
+    native_exp2(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float4 __attribute__((const, overloadable))
+    native_exp2(float4 v);
+#endif
+
+/*
+ * native_expm1: Approximate e raised to a number minus one
+ *
+ * Returns the approximate (e ^ v) - 1.
+ *
+ * See also expm1().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_expm1(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_expm1(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_expm1(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_expm1(float4 v);
+#endif
+
+/*
+ * native_hypot: Approximate hypotenuse
+ *
+ * Returns the approximate native_sqrt(a * a + b * b)
+ *
+ * See also hypot().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_hypot(float a, float b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_hypot(float2 a, float2 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_hypot(float3 a, float3 b);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_hypot(float4 a, float4 b);
+#endif
+
+/*
+ * native_length: Approximate length of a vector
+ *
  * Compute the approximate length of a vector.
  *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))fast_length(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Compute the approximate length of a vector.
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))fast_length(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Compute the approximate length of a vector.
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))fast_length(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Compute the approximate length of a vector.
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))fast_length(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Approximately normalize a vector.
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))fast_normalize(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Approximately normalize a vector.
- *
- * Supported by API versions 17 and newer.
- */
-extern float2 __attribute__((const, overloadable))fast_normalize(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Approximately normalize a vector.
- *
- * Supported by API versions 17 and newer.
- */
-extern float3 __attribute__((const, overloadable))fast_normalize(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Approximately normalize a vector.
- *
- * Supported by API versions 17 and newer.
- */
-extern float4 __attribute__((const, overloadable))fast_normalize(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the positive difference between two values.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))fdim(float a, float b);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the positive difference between two values.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))fdim(float2 a, float2 b);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the positive difference between two values.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))fdim(float3 a, float3 b);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the positive difference between two values.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))fdim(float4 a, float4 b);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the smallest integer not greater than a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))floor(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the smallest integer not greater than a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))floor(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the smallest integer not greater than a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))floor(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the smallest integer not greater than a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))floor(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (a * b) + c.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))fma(float a, float b, float c);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (a * b) + c.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))fma(float2 a, float2 b, float2 c);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (a * b) + c.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))fma(float3 a, float3 b, float3 c);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (a * b) + c.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))fma(float4 a, float4 b, float4 c);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x < y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))fmax(float x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x < y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))fmax(float2 x, float2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x < y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))fmax(float3 x, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x < y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))fmax(float4 x, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x < y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))fmax(float2 x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x < y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))fmax(float3 x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x < y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))fmax(float4 x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x > y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))fmin(float x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x > y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))fmin(float2 x, float2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x > y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))fmin(float3 x, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x > y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))fmin(float4 x, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x > y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))fmin(float2 x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x > y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))fmin(float3 x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x > y ? y : x)
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))fmin(float4 x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the remainder from x / y
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))fmod(float x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the remainder from x / y
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))fmod(float2 x, float2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the remainder from x / y
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))fmod(float3 x, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the remainder from x / y
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))fmod(float4 x, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return fractional part of v
- *
- * @param floor  floor[0] will be set to the floor of the input value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((overloadable))fract(float v, float* floor);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return fractional part of v
- *
- * @param floor  floor[0] will be set to the floor of the input value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((overloadable))fract(float2 v, float2* floor);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return fractional part of v
- *
- * @param floor  floor[0] will be set to the floor of the input value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((overloadable))fract(float3 v, float3* floor);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return fractional part of v
- *
- * @param floor  floor[0] will be set to the floor of the input value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((overloadable))fract(float4 v, float4* floor);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return fractional part of v
- *
- * Supported by API versions 9 and newer.
- */
-static float __attribute__((const, overloadable))fract(float v) {
- float unused;
- return fract(v, &unused);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return fractional part of v
- *
- * Supported by API versions 9 and newer.
- */
-static float2 __attribute__((const, overloadable))fract(float2 v) {
- float2 unused;
- return fract(v, &unused);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return fractional part of v
- *
- * Supported by API versions 9 and newer.
- */
-static float3 __attribute__((const, overloadable))fract(float3 v) {
- float3 unused;
- return fract(v, &unused);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return fractional part of v
- *
- * Supported by API versions 9 and newer.
- */
-static float4 __attribute__((const, overloadable))fract(float4 v) {
- float4 unused;
- return fract(v, &unused);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the mantissa and place the exponent into iptr[0]
- *
- * @param v Supports float, float2, float3, float4.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((overloadable))frexp(float v, int* iptr);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the mantissa and place the exponent into iptr[0]
- *
- * @param v Supports float, float2, float3, float4.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((overloadable))frexp(float2 v, int2* iptr);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the mantissa and place the exponent into iptr[0]
- *
- * @param v Supports float, float2, float3, float4.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((overloadable))frexp(float3 v, int3* iptr);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the mantissa and place the exponent into iptr[0]
- *
- * @param v Supports float, float2, float3, float4.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((overloadable))frexp(float4 v, int4* iptr);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate reciprocal of a value.
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))half_recip(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate reciprocal of a value.
- *
- * Supported by API versions 17 and newer.
- */
-extern float2 __attribute__((const, overloadable))half_recip(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate reciprocal of a value.
- *
- * Supported by API versions 17 and newer.
- */
-extern float3 __attribute__((const, overloadable))half_recip(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate reciprocal of a value.
- *
- * Supported by API versions 17 and newer.
- */
-extern float4 __attribute__((const, overloadable))half_recip(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate value of (1.f / sqrt(value)).
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))half_rsqrt(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate value of (1.f / sqrt(value)).
- *
- * Supported by API versions 17 and newer.
- */
-extern float2 __attribute__((const, overloadable))half_rsqrt(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate value of (1.f / sqrt(value)).
- *
- * Supported by API versions 17 and newer.
- */
-extern float3 __attribute__((const, overloadable))half_rsqrt(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate value of (1.f / sqrt(value)).
- *
- * Supported by API versions 17 and newer.
- */
-extern float4 __attribute__((const, overloadable))half_rsqrt(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate square root of a value.
- *
- * Supported by API versions 17 and newer.
- */
-extern float __attribute__((const, overloadable))half_sqrt(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate square root of a value.
- *
- * Supported by API versions 17 and newer.
- */
-extern float2 __attribute__((const, overloadable))half_sqrt(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate square root of a value.
- *
- * Supported by API versions 17 and newer.
- */
-extern float3 __attribute__((const, overloadable))half_sqrt(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/*
- * Return the approximate square root of a value.
- *
- * Supported by API versions 17 and newer.
- */
-extern float4 __attribute__((const, overloadable))half_sqrt(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return sqrt(x*x + y*y)
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))hypot(float x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return sqrt(x*x + y*y)
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))hypot(float2 x, float2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return sqrt(x*x + y*y)
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))hypot(float3 x, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return sqrt(x*x + y*y)
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))hypot(float4 x, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the integer exponent of a value
- *
- * Supported by API versions 9 and newer.
- */
-extern int __attribute__((const, overloadable))ilogb(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the integer exponent of a value
- *
- * Supported by API versions 9 and newer.
- */
-extern int2 __attribute__((const, overloadable))ilogb(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the integer exponent of a value
- *
- * Supported by API versions 9 and newer.
- */
-extern int3 __attribute__((const, overloadable))ilogb(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the integer exponent of a value
- *
- * Supported by API versions 9 and newer.
- */
-extern int4 __attribute__((const, overloadable))ilogb(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x * 2^y)
- *
- * @param x Supports 1,2,3,4 components
- * @param y Supports single component or matching vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))ldexp(float x, int y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x * 2^y)
- *
- * @param x Supports 1,2,3,4 components
- * @param y Supports single component or matching vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))ldexp(float2 x, int2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x * 2^y)
- *
- * @param x Supports 1,2,3,4 components
- * @param y Supports single component or matching vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))ldexp(float3 x, int3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x * 2^y)
- *
- * @param x Supports 1,2,3,4 components
- * @param y Supports single component or matching vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))ldexp(float4 x, int4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x * 2^y)
- *
- * @param x Supports 1,2,3,4 components
- * @param y Supports single component or matching vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))ldexp(float2 x, int y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x * 2^y)
- *
- * @param x Supports 1,2,3,4 components
- * @param y Supports single component or matching vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))ldexp(float3 x, int y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (x * 2^y)
- *
- * @param x Supports 1,2,3,4 components
- * @param y Supports single component or matching vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))ldexp(float4 x, int y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the length of a vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))length(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the length of a vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))length(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the length of a vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))length(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the length of a vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))length(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the log gamma
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))lgamma(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the log gamma
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))lgamma(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the log gamma
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))lgamma(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the log gamma
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))lgamma(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the log gamma and sign
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((overloadable))lgamma(float x, int* y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the log gamma and sign
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((overloadable))lgamma(float2 x, int2* y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the log gamma and sign
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((overloadable))lgamma(float3 x, int3* y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the log gamma and sign
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((overloadable))lgamma(float4 x, int4* y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the natural logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))log(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the natural logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))log(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the natural logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))log(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the natural logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))log(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the base 10 logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))log10(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the base 10 logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))log10(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the base 10 logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))log10(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the base 10 logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))log10(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the natural logarithm of (v + 1.0f)
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))log1p(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the natural logarithm of (v + 1.0f)
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))log1p(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the natural logarithm of (v + 1.0f)
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))log1p(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the natural logarithm of (v + 1.0f)
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))log1p(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the base 2 logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))log2(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the base 2 logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))log2(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the base 2 logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))log2(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the base 2 logarithm.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))log2(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the exponent of the value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))logb(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the exponent of the value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))logb(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the exponent of the value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))logb(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the exponent of the value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))logb(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute (a * b) + c
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))mad(float a, float b, float c);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute (a * b) + c
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))mad(float2 a, float2 b, float2 c);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute (a * b) + c
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))mad(float3 a, float3 b, float3 c);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute (a * b) + c
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))mad(float4 a, float4 b, float4 c);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))max(float, float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))max(float2, float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))max(float3, float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))max(float4, float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static char __attribute__((const, overloadable))max(char v1, char v2) {
- return (v1 > v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uchar __attribute__((const, overloadable))max(uchar v1, uchar v2) {
- return (v1 > v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static short __attribute__((const, overloadable))max(short v1, short v2) {
- return (v1 > v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static ushort __attribute__((const, overloadable))max(ushort v1, ushort v2) {
- return (v1 > v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static int __attribute__((const, overloadable))max(int v1, int v2) {
- return (v1 > v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uint __attribute__((const, overloadable))max(uint v1, uint v2) {
- return (v1 > v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static char2 __attribute__((const, overloadable))max(char2 v1, char2 v2) {
- char2 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uchar2 __attribute__((const, overloadable))max(uchar2 v1, uchar2 v2) {
- uchar2 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static short2 __attribute__((const, overloadable))max(short2 v1, short2 v2) {
- short2 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static ushort2 __attribute__((const, overloadable))max(ushort2 v1, ushort2 v2) {
- ushort2 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static int2 __attribute__((const, overloadable))max(int2 v1, int2 v2) {
- int2 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uint2 __attribute__((const, overloadable))max(uint2 v1, uint2 v2) {
- uint2 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static char3 __attribute__((const, overloadable))max(char3 v1, char3 v2) {
- char3 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uchar3 __attribute__((const, overloadable))max(uchar3 v1, uchar3 v2) {
- uchar3 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static short3 __attribute__((const, overloadable))max(short3 v1, short3 v2) {
- short3 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static ushort3 __attribute__((const, overloadable))max(ushort3 v1, ushort3 v2) {
- ushort3 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static int3 __attribute__((const, overloadable))max(int3 v1, int3 v2) {
- int3 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uint3 __attribute__((const, overloadable))max(uint3 v1, uint3 v2) {
- uint3 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static char4 __attribute__((const, overloadable))max(char4 v1, char4 v2) {
- char4 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- tmp.w = (v1.w > v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uchar4 __attribute__((const, overloadable))max(uchar4 v1, uchar4 v2) {
- uchar4 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- tmp.w = (v1.w > v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static short4 __attribute__((const, overloadable))max(short4 v1, short4 v2) {
- short4 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- tmp.w = (v1.w > v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static ushort4 __attribute__((const, overloadable))max(ushort4 v1, ushort4 v2) {
- ushort4 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- tmp.w = (v1.w > v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static int4 __attribute__((const, overloadable))max(int4 v1, int4 v2) {
- int4 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- tmp.w = (v1.w > v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the maximum value from two arguments
- *
- * Suppored by API versions 9 - 19
+ * See also length(), fast_length().
  */
-static uint4 __attribute__((const, overloadable))max(uint4 v1, uint4 v2) {
- uint4 tmp;
- tmp.x = (v1.x > v2.x ? v1.x : v2.x);
- tmp.y = (v1.y > v2.y ? v1.y : v2.y);
- tmp.z = (v1.z > v2.z ? v1.z : v2.z);
- tmp.w = (v1.w > v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern char __attribute__((const, overloadable))max(char v1, char v2);
+extern float __attribute__((const, overloadable))
+    native_length(float v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern char2 __attribute__((const, overloadable))max(char2 v1, char2 v2);
+extern float __attribute__((const, overloadable))
+    native_length(float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern char3 __attribute__((const, overloadable))max(char3 v1, char3 v2);
+extern float __attribute__((const, overloadable))
+    native_length(float3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern char4 __attribute__((const, overloadable))max(char4 v1, char4 v2);
+extern float __attribute__((const, overloadable))
+    native_length(float4 v);
 #endif
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
 /*
- * Return the maximum value from two arguments
+ * native_log: Approximate natural logarithm
  *
- * Supported by API versions 21 and newer.
- */
-extern uchar __attribute__((const, overloadable))max(uchar v1, uchar v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))max(uchar2 v1, uchar2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))max(uchar3 v1, uchar3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))max(uchar4 v1, uchar4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern short __attribute__((const, overloadable))max(short v1, short v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern short2 __attribute__((const, overloadable))max(short2 v1, short2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern short3 __attribute__((const, overloadable))max(short3 v1, short3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern short4 __attribute__((const, overloadable))max(short4 v1, short4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort __attribute__((const, overloadable))max(ushort v1, ushort v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))max(ushort2 v1, ushort2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))max(ushort3 v1, ushort3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))max(ushort4 v1, ushort4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern int __attribute__((const, overloadable))max(int v1, int v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern int2 __attribute__((const, overloadable))max(int2 v1, int2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern int3 __attribute__((const, overloadable))max(int3 v1, int3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern int4 __attribute__((const, overloadable))max(int4 v1, int4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uint __attribute__((const, overloadable))max(uint v1, uint v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uint2 __attribute__((const, overloadable))max(uint2 v1, uint2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uint3 __attribute__((const, overloadable))max(uint3 v1, uint3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uint4 __attribute__((const, overloadable))max(uint4 v1, uint4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern long __attribute__((const, overloadable))max(long v1, long v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))max(long2 v1, long2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))max(long3 v1, long3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))max(long4 v1, long4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong __attribute__((const, overloadable))max(ulong v1, ulong v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))max(ulong2 v1, ulong2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))max(ulong3 v1, ulong3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the maximum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))max(ulong4 v1, ulong4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))min(float, float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))min(float2, float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))min(float3, float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))min(float4, float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static char __attribute__((const, overloadable))min(char v1, char v2) {
- return (v1 < v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uchar __attribute__((const, overloadable))min(uchar v1, uchar v2) {
- return (v1 < v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static short __attribute__((const, overloadable))min(short v1, short v2) {
- return (v1 < v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static ushort __attribute__((const, overloadable))min(ushort v1, ushort v2) {
- return (v1 < v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static int __attribute__((const, overloadable))min(int v1, int v2) {
- return (v1 < v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uint __attribute__((const, overloadable))min(uint v1, uint v2) {
- return (v1 < v2 ? v1 : v2);
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static char2 __attribute__((const, overloadable))min(char2 v1, char2 v2) {
- char2 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uchar2 __attribute__((const, overloadable))min(uchar2 v1, uchar2 v2) {
- uchar2 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static short2 __attribute__((const, overloadable))min(short2 v1, short2 v2) {
- short2 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static ushort2 __attribute__((const, overloadable))min(ushort2 v1, ushort2 v2) {
- ushort2 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static int2 __attribute__((const, overloadable))min(int2 v1, int2 v2) {
- int2 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uint2 __attribute__((const, overloadable))min(uint2 v1, uint2 v2) {
- uint2 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static char3 __attribute__((const, overloadable))min(char3 v1, char3 v2) {
- char3 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uchar3 __attribute__((const, overloadable))min(uchar3 v1, uchar3 v2) {
- uchar3 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static short3 __attribute__((const, overloadable))min(short3 v1, short3 v2) {
- short3 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static ushort3 __attribute__((const, overloadable))min(ushort3 v1, ushort3 v2) {
- ushort3 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static int3 __attribute__((const, overloadable))min(int3 v1, int3 v2) {
- int3 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uint3 __attribute__((const, overloadable))min(uint3 v1, uint3 v2) {
- uint3 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static char4 __attribute__((const, overloadable))min(char4 v1, char4 v2) {
- char4 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- tmp.w = (v1.w < v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uchar4 __attribute__((const, overloadable))min(uchar4 v1, uchar4 v2) {
- uchar4 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- tmp.w = (v1.w < v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static short4 __attribute__((const, overloadable))min(short4 v1, short4 v2) {
- short4 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- tmp.w = (v1.w < v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static ushort4 __attribute__((const, overloadable))min(ushort4 v1, ushort4 v2) {
- ushort4 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- tmp.w = (v1.w < v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static int4 __attribute__((const, overloadable))min(int4 v1, int4 v2) {
- int4 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- tmp.w = (v1.w < v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9) && (RS_VERSION <= 19))
-/*
- * Return the minimum value from two arguments
- *
- * Suppored by API versions 9 - 19
- */
-static uint4 __attribute__((const, overloadable))min(uint4 v1, uint4 v2) {
- uint4 tmp;
- tmp.x = (v1.x < v2.x ? v1.x : v2.x);
- tmp.y = (v1.y < v2.y ? v1.y : v2.y);
- tmp.z = (v1.z < v2.z ? v1.z : v2.z);
- tmp.w = (v1.w < v2.w ? v1.w : v2.w);
- return tmp;
-}
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern char __attribute__((const, overloadable))min(char v1, char v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern char2 __attribute__((const, overloadable))min(char2 v1, char2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern char3 __attribute__((const, overloadable))min(char3 v1, char3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern char4 __attribute__((const, overloadable))min(char4 v1, char4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar __attribute__((const, overloadable))min(uchar v1, uchar v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar2 __attribute__((const, overloadable))min(uchar2 v1, uchar2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar3 __attribute__((const, overloadable))min(uchar3 v1, uchar3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uchar4 __attribute__((const, overloadable))min(uchar4 v1, uchar4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern short __attribute__((const, overloadable))min(short v1, short v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern short2 __attribute__((const, overloadable))min(short2 v1, short2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern short3 __attribute__((const, overloadable))min(short3 v1, short3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern short4 __attribute__((const, overloadable))min(short4 v1, short4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort __attribute__((const, overloadable))min(ushort v1, ushort v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort2 __attribute__((const, overloadable))min(ushort2 v1, ushort2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort3 __attribute__((const, overloadable))min(ushort3 v1, ushort3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ushort4 __attribute__((const, overloadable))min(ushort4 v1, ushort4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern int __attribute__((const, overloadable))min(int v1, int v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern int2 __attribute__((const, overloadable))min(int2 v1, int2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern int3 __attribute__((const, overloadable))min(int3 v1, int3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern int4 __attribute__((const, overloadable))min(int4 v1, int4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uint __attribute__((const, overloadable))min(uint v1, uint v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uint2 __attribute__((const, overloadable))min(uint2 v1, uint2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uint3 __attribute__((const, overloadable))min(uint3 v1, uint3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern uint4 __attribute__((const, overloadable))min(uint4 v1, uint4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern long __attribute__((const, overloadable))min(long v1, long v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern long2 __attribute__((const, overloadable))min(long2 v1, long2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern long3 __attribute__((const, overloadable))min(long3 v1, long3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern long4 __attribute__((const, overloadable))min(long4 v1, long4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong __attribute__((const, overloadable))min(ulong v1, ulong v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong2 __attribute__((const, overloadable))min(ulong2 v1, ulong2 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong3 __attribute__((const, overloadable))min(ulong3 v1, ulong3 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the minimum value from two arguments
- *
- * Supported by API versions 21 and newer.
- */
-extern ulong4 __attribute__((const, overloadable))min(ulong4 v1, ulong4 v2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * return start + ((stop - start) * amount)
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))mix(float start, float stop, float amount);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * return start + ((stop - start) * amount)
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))mix(float2 start, float2 stop, float2 amount);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * return start + ((stop - start) * amount)
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))mix(float3 start, float3 stop, float3 amount);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * return start + ((stop - start) * amount)
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))mix(float4 start, float4 stop, float4 amount);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * return start + ((stop - start) * amount)
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))mix(float2 start, float2 stop, float amount);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * return start + ((stop - start) * amount)
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))mix(float3 start, float3 stop, float amount);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * return start + ((stop - start) * amount)
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))mix(float4 start, float4 stop, float amount);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the integral and fractional components of a number.
- *
- * @param x Source value
- * @param iret iret[0] will be set to the integral portion of the number.
- * @return The floating point portion of the value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((overloadable))modf(float x, float* iret);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the integral and fractional components of a number.
- *
- * @param x Source value
- * @param iret iret[0] will be set to the integral portion of the number.
- * @return The floating point portion of the value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((overloadable))modf(float2 x, float2* iret);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the integral and fractional components of a number.
- *
- * @param x Source value
- * @param iret iret[0] will be set to the integral portion of the number.
- * @return The floating point portion of the value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((overloadable))modf(float3 x, float3* iret);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the integral and fractional components of a number.
- *
- * @param x Source value
- * @param iret iret[0] will be set to the integral portion of the number.
- * @return The floating point portion of the value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((overloadable))modf(float4 x, float4* iret);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * generate a nan
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))nan(uint);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acos
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_acos(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acos
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_acos(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acos
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_acos(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acos
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_acos(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acosh
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_acosh(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acosh
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_acosh(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acosh
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_acosh(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acosh
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_acosh(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acospi
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_acospi(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acospi
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_acospi(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acospi
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_acospi(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * acospi
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_acospi(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * asin
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_asin(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * asin
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_asin(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * asin
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_asin(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * asin
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_asin(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * asinh
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_asinh(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * asinh
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_asinh(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * asinh
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_asinh(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * asinh
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_asinh(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse sine divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_asinpi(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse sine divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_asinpi(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse sine divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_asinpi(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse sine divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_asinpi(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_atan(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_atan(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_atan(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_atan(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent of y / x.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_atan2(float y, float x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent of y / x.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_atan2(float2 y, float2 x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent of y / x.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_atan2(float3 y, float3 x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent of y / x.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_atan2(float4 y, float4 x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent of y / x, divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_atan2pi(float y, float x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent of y / x, divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_atan2pi(float2 y, float2 x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent of y / x, divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_atan2pi(float3 y, float3 x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent of y / x, divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_atan2pi(float4 y, float4 x);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse hyperbolic tangent.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_atanh(float in);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse hyperbolic tangent.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_atanh(float2 in);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse hyperbolic tangent.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_atanh(float3 in);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse hyperbolic tangent.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_atanh(float4 in);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_atanpi(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_atanpi(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_atanpi(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the inverse tangent divided by PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_atanpi(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cube root.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_cbrt(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cube root.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_cbrt(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cube root.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_cbrt(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cube root.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_cbrt(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cosine.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_cos(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cosine.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_cos(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cosine.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_cos(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cosine.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_cos(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hypebolic cosine.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_cosh(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hypebolic cosine.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_cosh(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hypebolic cosine.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_cosh(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hypebolic cosine.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_cosh(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cosine of the value * PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_cospi(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cosine of the value * PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_cospi(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cosine of the value * PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_cospi(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the cosine of the value * PI.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_cospi(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate distance between two points.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_distance(float lhs, float rhs);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate distance between two points.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_distance(float2 lhs, float2 rhs);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate distance between two points.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_distance(float3 lhs, float3 rhs);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate distance between two points.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_distance(float4 lhs, float4 rhs);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate division result of two values.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_divide(float lhs, float rhs);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate division result of two values.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_divide(float2 lhs, float2 rhs);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate division result of two values.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_divide(float3 lhs, float3 rhs);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate division result of two values.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_divide(float4 lhs, float4 rhs);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp
- * valid for inputs -86.f to 86.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float __attribute__((const, overloadable))native_exp(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp
- * valid for inputs -86.f to 86.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_exp(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp
- * valid for inputs -86.f to 86.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_exp(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp
- * valid for inputs -86.f to 86.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_exp(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp10
- * valid for inputs -37.f to 37.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float __attribute__((const, overloadable))native_exp10(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp10
- * valid for inputs -37.f to 37.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_exp10(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp10
- * valid for inputs -37.f to 37.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_exp10(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp10
- * valid for inputs -37.f to 37.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_exp10(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp2
- * valid for inputs -125.f to 125.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float __attribute__((const, overloadable))native_exp2(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp2
- * valid for inputs -125.f to 125.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_exp2(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp2
- * valid for inputs -125.f to 125.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_exp2(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate exp2
- * valid for inputs -125.f to 125.f
- * Max 8192 ulps of error
- *
- * Supported by API versions 18 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_exp2(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return (e ^ value) - 1.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_expm1(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return (e ^ value) - 1.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_expm1(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return (e ^ value) - 1.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_expm1(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return (e ^ value) - 1.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_expm1(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return native_sqrt(x*x + y*y)
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_hypot(float x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return native_sqrt(x*x + y*y)
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_hypot(float2 x, float2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return native_sqrt(x*x + y*y)
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_hypot(float3 x, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return native_sqrt(x*x + y*y)
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_hypot(float4 x, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate length of a vector.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_length(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate length of a vector.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_length(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate length of a vector.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_length(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the approximate length of a vector.
+ * Fast approximate log.
  *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_length(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log
  * It is not accurate for values very close to zero.
  *
- * Supported by API versions 18 and newer.
+ * See also log().
  */
-extern float __attribute__((const, overloadable))native_log(float v);
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((const, overloadable))
+    native_log(float v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log
- * It is not accurate for values very close to zero.
- *
- * Supported by API versions 18 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_log(float2 v);
+extern float2 __attribute__((const, overloadable))
+    native_log(float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log
- * It is not accurate for values very close to zero.
- *
- * Supported by API versions 18 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_log(float3 v);
+extern float3 __attribute__((const, overloadable))
+    native_log(float3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float4 __attribute__((const, overloadable))
+    native_log(float4 v);
+#endif
+
 /*
- * Fast approximate log
+ * native_log10: Approximate base 10 logarithm
+ *
+ * Fast approximate log10.
+ *
  * It is not accurate for values very close to zero.
  *
- * Supported by API versions 18 and newer.
+ * See also log10().
  */
-extern float4 __attribute__((const, overloadable))native_log(float4 v);
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((const, overloadable))
+    native_log10(float v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log10
- * It is not accurate for values very close to zero.
- *
- * Supported by API versions 18 and newer.
- */
-extern float __attribute__((const, overloadable))native_log10(float v);
+extern float2 __attribute__((const, overloadable))
+    native_log10(float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log10
- * It is not accurate for values very close to zero.
- *
- * Supported by API versions 18 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_log10(float2 v);
+extern float3 __attribute__((const, overloadable))
+    native_log10(float3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log10
- * It is not accurate for values very close to zero.
- *
- * Supported by API versions 18 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_log10(float3 v);
+extern float4 __attribute__((const, overloadable))
+    native_log10(float4 v);
 #endif
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
 /*
- * Fast approximate log10
- * It is not accurate for values very close to zero.
+ * native_log1p: Approximate natural logarithm of a value plus 1
  *
- * Supported by API versions 18 and newer.
+ * Returns the approximate natural logarithm of (v + 1.0f)
+ *
+ * See also log1p().
  */
-extern float4 __attribute__((const, overloadable))native_log10(float4 v);
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_log1p(float v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the natural logarithm of (v + 1.0f)
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_log1p(float);
+extern float2 __attribute__((const, overloadable))
+    native_log1p(float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the natural logarithm of (v + 1.0f)
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_log1p(float2);
+extern float3 __attribute__((const, overloadable))
+    native_log1p(float3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_log1p(float4 v);
+#endif
+
 /*
- * Return the natural logarithm of (v + 1.0f)
+ * native_log2: Approximate base 2 logarithm
  *
- * Supported by API versions 21 and newer.
+ * Fast approximate log2.
+ *
+ * It is not accurate for values very close to zero.
+ *
+ * See also log2().
  */
-extern float3 __attribute__((const, overloadable))native_log1p(float3);
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((const, overloadable))
+    native_log2(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((const, overloadable))
+    native_log2(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float3 __attribute__((const, overloadable))
+    native_log2(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float4 __attribute__((const, overloadable))
+    native_log2(float4 v);
+#endif
+
+/*
+ * native_normalize: Approximately normalize a vector
+ *
+ * Approximately normalizes a vector.
+ *
+ * See also normalize(), fast_normalize().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_normalize(float v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the natural logarithm of (v + 1.0f)
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_log1p(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log2
- * It is not accurate for values very close to zero.
- *
- * Supported by API versions 18 and newer.
- */
-extern float __attribute__((const, overloadable))native_log2(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log2
- * It is not accurate for values very close to zero.
- *
- * Supported by API versions 18 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_log2(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log2
- * It is not accurate for values very close to zero.
- *
- * Supported by API versions 18 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_log2(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate log2
- * It is not accurate for values very close to zero.
- *
- * Supported by API versions 18 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_log2(float4 v);
+extern float2 __attribute__((const, overloadable))
+    native_normalize(float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_normalize(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_normalize(float4 v);
+#endif
+
 /*
+ * native_powr: Approximate positive base raised to an exponent
+ *
+ * Fast approximate (base ^ exponent).
+ *
+ * See also powr().
+ *
+ * Parameters:
+ *   base Must be between 0.f and 256.f.  The function is not accurate for values very close to zero.
+ *   exponent Must be between -15.f and 15.f.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float __attribute__((const, overloadable))
+    native_powr(float base, float exponent);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((const, overloadable))
+    native_powr(float2 base, float2 exponent);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float3 __attribute__((const, overloadable))
+    native_powr(float3 base, float3 exponent);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float4 __attribute__((const, overloadable))
+    native_powr(float4 base, float4 exponent);
+#endif
+
+/*
+ * native_recip: Approximate reciprocal
+ *
+ * Returns the approximate approximate reciprocal of a value.
+ *
+ * See also half_recip().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_recip(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_recip(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_recip(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_recip(float4 v);
+#endif
+
+/*
+ * native_rootn: Approximate nth root
+ *
+ * Compute the approximate Nth root of a value.
+ *
+ * See also rootn().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_rootn(float v, int n);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_rootn(float2 v, int2 n);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_rootn(float3 v, int3 n);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_rootn(float4 v, int4 n);
+#endif
+
+/*
+ * native_rsqrt: Approximate reciprocal of a square root
+ *
+ * Returns approximate (1 / sqrt(v)).
+ *
+ * See also rsqrt(), half_rsqrt().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_rsqrt(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_rsqrt(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_rsqrt(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_rsqrt(float4 v);
+#endif
+
+/*
+ * native_sin: Approximate sine
+ *
+ * Returns the approximate sine of an angle measured in radians.
+ *
+ * See also sin().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_sin(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_sin(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_sin(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_sin(float4 v);
+#endif
+
+/*
+ * native_sincos: Approximate sine and cosine
+ *
+ * Returns the approximate sine and cosine of a value.
+ *
+ * See also sincos().
+ *
+ * Parameters:
+ *   v The incoming value in radians.
+ *   cos *cos will be set to the cosine value.
+ *
+ * Returns: sine
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((overloadable))
+    native_sincos(float v, float* cos);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((overloadable))
+    native_sincos(float2 v, float2* cos);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((overloadable))
+    native_sincos(float3 v, float3* cos);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((overloadable))
+    native_sincos(float4 v, float4* cos);
+#endif
+
+/*
+ * native_sinh: Approximate hyperbolic sine
+ *
+ * Returns the approximate hyperbolic sine of a value specified in radians.
+ *
+ * See also sinh().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_sinh(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_sinh(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_sinh(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_sinh(float4 v);
+#endif
+
+/*
+ * native_sinpi: Approximate sine of a number multiplied by pi
+ *
+ * Returns the approximate sine of (v * pi), where (v * pi) is measured in radians.
+ *
+ * To get the sine of a value measured in degrees, call sinpi(v / 180.f).
+ *
+ * See also sinpi().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_sinpi(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_sinpi(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_sinpi(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_sinpi(float4 v);
+#endif
+
+/*
+ * native_sqrt: Approximate square root
+ *
+ * Returns the approximate sqrt(v).
+ *
+ * See also sqrt(), half_sqrt().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_sqrt(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_sqrt(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_sqrt(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_sqrt(float4 v);
+#endif
+
+/*
+ * native_tan: Approximate tangent
+ *
+ * Returns the approximate tangent of an angle measured in radians.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_tan(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_tan(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_tan(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_tan(float4 v);
+#endif
+
+/*
+ * native_tanh: Approximate hyperbolic tangent
+ *
+ * Returns the approximate hyperbolic tangent of a value.
+ *
+ * See also tanh().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_tanh(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_tanh(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_tanh(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_tanh(float4 v);
+#endif
+
+/*
+ * native_tanpi: Approximate tangent of a number multiplied by pi
+ *
+ * Returns the approximate tangent of (v * pi), where (v * pi) is measured in radians.
+ *
+ * To get the tangent of a value measured in degrees, call tanpi(v / 180.f).
+ *
+ * See also tanpi().
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float __attribute__((const, overloadable))
+    native_tanpi(float v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    native_tanpi(float2 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float3 __attribute__((const, overloadable))
+    native_tanpi(float3 v);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float4 __attribute__((const, overloadable))
+    native_tanpi(float4 v);
+#endif
+
+/*
+ * nextafter: Next floating point number
+ *
+ * Returns the next representable floating point number from v towards target.
+ *
+ * In rs_fp_relaxed mode, a denormalized input value may not yield the next
+ * denormalized  value, as support of denormalized values is optional in
+ * relaxed mode.
+ */
+extern float __attribute__((const, overloadable))
+    nextafter(float v, float target);
+
+extern float2 __attribute__((const, overloadable))
+    nextafter(float2 v, float2 target);
+
+extern float3 __attribute__((const, overloadable))
+    nextafter(float3 v, float3 target);
+
+extern float4 __attribute__((const, overloadable))
+    nextafter(float4 v, float4 target);
+
+/*
+ * normalize: Normalize a vector
+ *
  * Normalize a vector.
  *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_normalize(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Normalize a vector.
+ * For vectors of size 1, returns -1.f for negative values, 0.f for null values, and 1.f for positive values.
  *
- * Supported by API versions 21 and newer.
+ * See also fast_normalize(), native_normalize().
  */
-extern float2 __attribute__((const, overloadable))native_normalize(float2 v);
-#endif
+extern float __attribute__((const, overloadable))
+    normalize(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    normalize(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    normalize(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    normalize(float4 v);
+
 /*
- * Normalize a vector.
+ * pow: Base raised to an exponent
  *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_normalize(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Normalize a vector.
+ * Returns base raised to the power exponent, i.e. base ^ exponent.
  *
- * Supported by API versions 21 and newer.
+ * pown() and powr() are similar.  pown() takes an integer exponent. powr() assumes the base to be non-negative.
  */
-extern float4 __attribute__((const, overloadable))native_normalize(float4 v);
-#endif
+extern float __attribute__((const, overloadable))
+    pow(float base, float exponent);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((const, overloadable))
+    pow(float2 base, float2 exponent);
+
+extern float3 __attribute__((const, overloadable))
+    pow(float3 base, float3 exponent);
+
+extern float4 __attribute__((const, overloadable))
+    pow(float4 base, float4 exponent);
+
 /*
- * Fast approximate v ^ y
- * v must be between 0.f and 256.f
- * y must be between -15.f and 15.f
- * It is not accurate for values of v very close to zero.
+ * pown: Base raised to an integer exponent
  *
- * Supported by API versions 18 and newer.
- */
-extern float __attribute__((const, overloadable))native_powr(float v, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate v ^ y
- * v must be between 0.f and 256.f
- * y must be between -15.f and 15.f
- * It is not accurate for values of v very close to zero.
+ * Returns base raised to the power exponent, i.e. base ^ exponent.
  *
- * Supported by API versions 18 and newer.
+ * pow() and powr() are similar.  The both take a float exponent. powr() also assumes the base to be non-negative.
  */
-extern float2 __attribute__((const, overloadable))native_powr(float2 v, float2 y);
-#endif
+extern float __attribute__((const, overloadable))
+    pown(float base, int exponent);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
+extern float2 __attribute__((const, overloadable))
+    pown(float2 base, int2 exponent);
+
+extern float3 __attribute__((const, overloadable))
+    pown(float3 base, int3 exponent);
+
+extern float4 __attribute__((const, overloadable))
+    pown(float4 base, int4 exponent);
+
 /*
- * Fast approximate v ^ y
- * v must be between 0.f and 256.f
- * y must be between -15.f and 15.f
- * It is not accurate for values of v very close to zero.
+ * powr: Positive base raised to an exponent
  *
- * Supported by API versions 18 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_powr(float3 v, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 18))
-/*
- * Fast approximate v ^ y
- * v must be between 0.f and 256.f
- * y must be between -15.f and 15.f
- * It is not accurate for values of v very close to zero.
+ * Returns base raised to the power exponent, i.e. base ^ exponent.  base must be >= 0.
  *
- * Supported by API versions 18 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_powr(float4 v, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the approximate reciprocal of a value.
+ * pow() and pown() are similar.  They both make no assumptions about the base.  pow() takes a float exponent while pown() take an integer.
  *
- * Supported by API versions 21 and newer.
+ * See also native_powr().
  */
-extern float __attribute__((const, overloadable))native_recip(float v);
-#endif
+extern float __attribute__((const, overloadable))
+    powr(float base, float exponent);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    powr(float2 base, float2 exponent);
+
+extern float3 __attribute__((const, overloadable))
+    powr(float3 base, float3 exponent);
+
+extern float4 __attribute__((const, overloadable))
+    powr(float4 base, float4 exponent);
+
 /*
- * Return the approximate reciprocal of a value.
+ * radians: Converts degrees into radians
  *
- * Supported by API versions 21 and newer.
+ * Converts from degrees to radians.
  */
-extern float2 __attribute__((const, overloadable))native_recip(float2 v);
-#endif
+extern float __attribute__((const, overloadable))
+    radians(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    radians(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    radians(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    radians(float4 v);
+
 /*
- * Return the approximate reciprocal of a value.
+ * remainder: Remainder of a division
  *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_recip(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the approximate reciprocal of a value.
+ * Returns the remainder of (numerator / denominator), where the quotient is rounded towards the nearest integer.
  *
- * Supported by API versions 21 and newer.
+ * The function fmod() is similar but rounds toward the closest interger.
+ * For example, fmod(-3.8f, 2.f) returns -1.8f (-3.8f - -1.f * 2.f)
+ * while remainder(-3.8f, 2.f) returns 0.2f (-3.8f - -2.f * 2.f).
  */
-extern float4 __attribute__((const, overloadable))native_recip(float4 v);
-#endif
+extern float __attribute__((const, overloadable))
+    remainder(float numerator, float denominator);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    remainder(float2 numerator, float2 denominator);
+
+extern float3 __attribute__((const, overloadable))
+    remainder(float3 numerator, float3 denominator);
+
+extern float4 __attribute__((const, overloadable))
+    remainder(float4 numerator, float4 denominator);
+
 /*
+ * remquo: Remainder and quotient of a division
+ *
+ * Returns the quotient and the remainder of (numerator / denominator).
+ *
+ * Only the sign and lowest three bits of the quotient are guaranteed to be accurate.
+ *
+ * This function is useful for implementing periodic functions.  The low three bits of the quotient gives the quadrant and the remainder the distance within the quadrant.  For example, an implementation of sin(x) could call remquo(x, PI / 2.f, &quadrant) to reduce very large value of x to something within a limited range.
+ *
+ * Example: remquo(-23.5f, 8.f, &quot) sets the lowest three bits of quot to 3 and the sign negative.  It returns 0.5f.
+ *
+ * Parameters:
+ *   numerator The numerator.
+ *   denominator The denominator.
+ *   quotient *quotient will be set to the integer quotient.
+ *
+ * Returns: The remainder, precise only for the low three bits.
+ */
+extern float __attribute__((overloadable))
+    remquo(float numerator, float denominator, int* quotient);
+
+extern float2 __attribute__((overloadable))
+    remquo(float2 numerator, float2 denominator, int2* quotient);
+
+extern float3 __attribute__((overloadable))
+    remquo(float3 numerator, float3 denominator, int3* quotient);
+
+extern float4 __attribute__((overloadable))
+    remquo(float4 numerator, float4 denominator, int4* quotient);
+
+/*
+ * rint: Round to even
+ *
+ * Rounds to the nearest integral value.
+ *
+ * rint() rounds half values to even.  For example, rint(0.5f) returns 0.f and rint(1.5f) returns 2.f.  Similarly, rint(-0.5f) returns -0.f and rint(-1.5f) returns -2.f.
+ *
+ * round() is similar but rounds away from zero.  trunc() truncates the decimal fraction.
+ */
+extern float __attribute__((const, overloadable))
+    rint(float v);
+
+extern float2 __attribute__((const, overloadable))
+    rint(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    rint(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    rint(float4 v);
+
+/*
+ * rootn: Nth root
+ *
  * Compute the Nth root of a value.
  *
- * Supported by API versions 21 and newer.
+ * See also native_rootn().
  */
-extern float __attribute__((const, overloadable))native_rootn(float v, int n);
-#endif
+extern float __attribute__((const, overloadable))
+    rootn(float v, int n);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the Nth root of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_rootn(float2 v, int2 n);
-#endif
+extern float2 __attribute__((const, overloadable))
+    rootn(float2 v, int2 n);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the Nth root of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_rootn(float3 v, int3 n);
-#endif
+extern float3 __attribute__((const, overloadable))
+    rootn(float3 v, int3 n);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Compute the Nth root of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_rootn(float4 v, int4 n);
-#endif
+extern float4 __attribute__((const, overloadable))
+    rootn(float4 v, int4 n);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
 /*
- * Return (1 / sqrt(value)).
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_rsqrt(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return (1 / sqrt(value)).
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_rsqrt(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return (1 / sqrt(value)).
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_rsqrt(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return (1 / sqrt(value)).
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_rsqrt(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sine of a value specified in radians.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_sin(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sine of a value specified in radians.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_sin(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sine of a value specified in radians.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_sin(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sine of a value specified in radians.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_sin(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sine and cosine of a value.
- *
- * @return sine
- * @param v The incoming value in radians
- * @param *cosptr cosptr[0] will be set to the cosine value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((overloadable))native_sincos(float v, float* cosptr);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sine and cosine of a value.
- *
- * @return sine
- * @param v The incoming value in radians
- * @param *cosptr cosptr[0] will be set to the cosine value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((overloadable))native_sincos(float2 v, float2* cosptr);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sine and cosine of a value.
- *
- * @return sine
- * @param v The incoming value in radians
- * @param *cosptr cosptr[0] will be set to the cosine value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((overloadable))native_sincos(float3 v, float3* cosptr);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sine and cosine of a value.
- *
- * @return sine
- * @param v The incoming value in radians
- * @param *cosptr cosptr[0] will be set to the cosine value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((overloadable))native_sincos(float4 v, float4* cosptr);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hyperbolic sine of a value specified in radians.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_sinh(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hyperbolic sine of a value specified in radians.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_sinh(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hyperbolic sine of a value specified in radians.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_sinh(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hyperbolic sine of a value specified in radians.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_sinh(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sin(v * PI).
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_sinpi(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sin(v * PI).
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_sinpi(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sin(v * PI).
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_sinpi(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the sin(v * PI).
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_sinpi(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the aproximate sqrt(v).
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_sqrt(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the aproximate sqrt(v).
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_sqrt(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the aproximate sqrt(v).
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_sqrt(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the aproximate sqrt(v).
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_sqrt(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the tangent of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_tan(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the tangent of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_tan(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the tangent of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_tan(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the tangent of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_tan(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hyperbolic tangent of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_tanh(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hyperbolic tangent of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_tanh(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hyperbolic tangent of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_tanh(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return the hyperbolic tangent of a value.
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_tanh(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return tan(v * PI)
- *
- * Supported by API versions 21 and newer.
- */
-extern float __attribute__((const, overloadable))native_tanpi(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return tan(v * PI)
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))native_tanpi(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return tan(v * PI)
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))native_tanpi(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * Return tan(v * PI)
- *
- * Supported by API versions 21 and newer.
- */
-extern float4 __attribute__((const, overloadable))native_tanpi(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the next floating point number from x towards y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))nextafter(float x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the next floating point number from x towards y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))nextafter(float2 x, float2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the next floating point number from x towards y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))nextafter(float3 x, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the next floating point number from x towards y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))nextafter(float4 x, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Normalize a vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))normalize(float v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Normalize a vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))normalize(float2 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Normalize a vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))normalize(float3 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Normalize a vector.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))normalize(float4 v);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))pow(float x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))pow(float2 x, float2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))pow(float3 x, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))pow(float4 x, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))pown(float x, int y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))pown(float2 x, int2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))pown(float3 x, int3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))pown(float4 x, int4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- * x must be >= 0
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))powr(float x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- * x must be >= 0
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))powr(float2 x, float2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- * x must be >= 0
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))powr(float3 x, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return x ^ y.
- * x must be >= 0
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))powr(float4 x, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Convert from degrees to radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))radians(float value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Convert from degrees to radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))radians(float2 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Convert from degrees to radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))radians(float3 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Convert from degrees to radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))radians(float4 value);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return round x/y to the nearest integer then compute the remainder.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))remainder(float x, float y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return round x/y to the nearest integer then compute the remainder.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))remainder(float2 x, float2 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return round x/y to the nearest integer then compute the remainder.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))remainder(float3 x, float3 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return round x/y to the nearest integer then compute the remainder.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))remainder(float4 x, float4 y);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the quotient and the remainder of b/c.  Only the sign and lowest three bits of the quotient are guaranteed to be accurate.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((overloadable))remquo(float b, float c, int* d);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the quotient and the remainder of b/c.  Only the sign and lowest three bits of the quotient are guaranteed to be accurate.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((overloadable))remquo(float2 b, float2 c, int2* d);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the quotient and the remainder of b/c.  Only the sign and lowest three bits of the quotient are guaranteed to be accurate.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((overloadable))remquo(float3 b, float3 c, int3* d);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the quotient and the remainder of b/c.  Only the sign and lowest three bits of the quotient are guaranteed to be accurate.
+ * round: Round away from zero
  *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((overloadable))remquo(float4 b, float4 c, int4* d);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
  * Round to the nearest integral value.
  *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))rint(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Round to the nearest integral value.
+ * round() rounds half values away from zero.  For example, round(0.5f) returns 1.f and round(1.5f) returns 2.f.  Similarly, round(-0.5f) returns -1.f and round(-1.5f) returns -2.f.
  *
- * Supported by API versions 9 and newer.
+ * rint() is similar but rounds half values toward even.  trunc() truncates the decimal fraction.
  */
-extern float2 __attribute__((const, overloadable))rint(float2);
-#endif
+extern float __attribute__((const, overloadable))
+    round(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
+extern float2 __attribute__((const, overloadable))
+    round(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    round(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    round(float4 v);
+
 /*
- * Round to the nearest integral value.
+ * rsqrt: Reciprocal of a square root
  *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))rint(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Round to the nearest integral value.
+ * Returns (1 / sqrt(v)).
  *
- * Supported by API versions 9 and newer.
+ * See also half_rsqrt(), native_rsqrt().
  */
-extern float4 __attribute__((const, overloadable))rint(float4);
-#endif
+extern float __attribute__((const, overloadable))
+    rsqrt(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
+extern float2 __attribute__((const, overloadable))
+    rsqrt(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    rsqrt(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    rsqrt(float4 v);
+
 /*
- * Compute the Nth root of a value.
+ * sign: Sign of a value
  *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))rootn(float v, int n);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the Nth root of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))rootn(float2 v, int2 n);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the Nth root of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))rootn(float3 v, int3 n);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the Nth root of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))rootn(float4 v, int4 n);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Round to the nearest integral value.  Half values are rounded away from zero.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))round(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Round to the nearest integral value.  Half values are rounded away from zero.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))round(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Round to the nearest integral value.  Half values are rounded away from zero.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))round(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Round to the nearest integral value.  Half values are rounded away from zero.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))round(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (1 / sqrt(value)).
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))rsqrt(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (1 / sqrt(value)).
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))rsqrt(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (1 / sqrt(value)).
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))rsqrt(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return (1 / sqrt(value)).
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))rsqrt(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sign of a value.
+ * Returns the sign of a value.
  *
  * if (v < 0) return -1.f;
  * else if (v > 0) return 1.f;
  * else return 0.f;
- *
- * Supported by API versions 9 and newer.
  */
-extern float __attribute__((const, overloadable))sign(float v);
-#endif
+extern float __attribute__((const, overloadable))
+    sign(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sign of a value.
- *
- * if (v < 0) return -1.f;
- * else if (v > 0) return 1.f;
- * else return 0.f;
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))sign(float2 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    sign(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sign of a value.
- *
- * if (v < 0) return -1.f;
- * else if (v > 0) return 1.f;
- * else return 0.f;
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))sign(float3 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    sign(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sign of a value.
- *
- * if (v < 0) return -1.f;
- * else if (v > 0) return 1.f;
- * else return 0.f;
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))sign(float4 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    sign(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the sine of a value specified in radians.
+ * sin: Sine
  *
- * Supported by API versions 9 and newer.
+ * Returns the sine of an angle measured in radians.
+ *
+ * See also native_sin().
  */
-extern float __attribute__((const, overloadable))sin(float);
-#endif
+extern float __attribute__((const, overloadable))
+    sin(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sine of a value specified in radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))sin(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    sin(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sine of a value specified in radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))sin(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    sin(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sine of a value specified in radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))sin(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    sin(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the sine and cosine of a value.
+ * sincos: Sine and cosine
  *
- * @return sine
- * @param v The incoming value in radians
- * @param *cosptr cosptr[0] will be set to the cosine value.
+ * Returns the sine and cosine of a value.
  *
- * Supported by API versions 9 and newer.
+ * See also native_sincos().
+ *
+ * Parameters:
+ *   v The incoming value in radians
+ *   cos *cos will be set to the cosine value.
+ *
+ * Returns: sine of v
  */
-extern float __attribute__((overloadable))sincos(float v, float* cosptr);
-#endif
+extern float __attribute__((overloadable))
+    sincos(float v, float* cos);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sine and cosine of a value.
- *
- * @return sine
- * @param v The incoming value in radians
- * @param *cosptr cosptr[0] will be set to the cosine value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((overloadable))sincos(float2 v, float2* cosptr);
-#endif
+extern float2 __attribute__((overloadable))
+    sincos(float2 v, float2* cos);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sine and cosine of a value.
- *
- * @return sine
- * @param v The incoming value in radians
- * @param *cosptr cosptr[0] will be set to the cosine value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((overloadable))sincos(float3 v, float3* cosptr);
-#endif
+extern float3 __attribute__((overloadable))
+    sincos(float3 v, float3* cos);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sine and cosine of a value.
- *
- * @return sine
- * @param v The incoming value in radians
- * @param *cosptr cosptr[0] will be set to the cosine value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((overloadable))sincos(float4 v, float4* cosptr);
-#endif
+extern float4 __attribute__((overloadable))
+    sincos(float4 v, float4* cos);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the hyperbolic sine of a value specified in radians.
+ * sinh: Hyperbolic sine
  *
- * Supported by API versions 9 and newer.
+ * Returns the hyperbolic sine of v, where v is measured in radians.
+ *
+ * See also native_sinh().
  */
-extern float __attribute__((const, overloadable))sinh(float);
-#endif
+extern float __attribute__((const, overloadable))
+    sinh(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hyperbolic sine of a value specified in radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))sinh(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    sinh(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hyperbolic sine of a value specified in radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))sinh(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    sinh(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hyperbolic sine of a value specified in radians.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))sinh(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    sinh(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the sin(v * PI).
+ * sinpi: Sine of a number multiplied by pi
  *
- * Supported by API versions 9 and newer.
+ * Returns the sine of (v * pi), where (v * pi) is measured in radians.
+ *
+ * To get the sine of a value measured in degrees, call sinpi(v / 180.f).
+ *
+ * See also native_sinpi().
  */
-extern float __attribute__((const, overloadable))sinpi(float);
-#endif
+extern float __attribute__((const, overloadable))
+    sinpi(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sin(v * PI).
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))sinpi(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    sinpi(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sin(v * PI).
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))sinpi(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    sinpi(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the sin(v * PI).
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))sinpi(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    sinpi(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * Return the square root of a value.
+ * sqrt: Square root
  *
- * Supported by API versions 9 and newer.
+ * Returns the square root of a value.
+ *
+ * See also half_sqrt(), native_sqrt().
  */
-extern float __attribute__((const, overloadable))sqrt(float);
-#endif
+extern float __attribute__((const, overloadable))
+    sqrt(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the square root of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))sqrt(float2);
-#endif
+extern float2 __attribute__((const, overloadable))
+    sqrt(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the square root of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))sqrt(float3);
-#endif
+extern float3 __attribute__((const, overloadable))
+    sqrt(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the square root of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))sqrt(float4);
-#endif
+extern float4 __attribute__((const, overloadable))
+    sqrt(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
 /*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
+ * step: 0 if less than a value, 0 otherwise
  *
- * Supported by API versions 9 and newer.
+ * Returns 0.f if v < edge, 1.f otherwise.
+ *
+ * This can be useful to create conditional computations without using loops and branching instructions.  For example, instead of computing (a[i] < b[i]) ? 0.f : atan2(a[i], b[i]) for the corresponding elements of a vector, you could instead use step(a, b) * atan2(a, b).
  */
-extern float __attribute__((const, overloadable))step(float edge, float v);
-#endif
+extern float __attribute__((const, overloadable))
+    step(float edge, float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))step(float2 edge, float2 v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    step(float2 edge, float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))step(float3 edge, float3 v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    step(float3 edge, float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))step(float4 edge, float4 v);
-#endif
+extern float4 __attribute__((const, overloadable))
+    step(float4 edge, float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))step(float2 edge, float v);
-#endif
+extern float2 __attribute__((const, overloadable))
+    step(float2 edge, float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))step(float3 edge, float v);
-#endif
+extern float3 __attribute__((const, overloadable))
+    step(float3 edge, float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))step(float4 edge, float v);
+extern float4 __attribute__((const, overloadable))
+    step(float4 edge, float v);
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+extern float2 __attribute__((const, overloadable))
+    step(float edge, float2 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
- *
- * Supported by API versions 21 and newer.
- */
-extern float2 __attribute__((const, overloadable))step(float edge, float2 v);
+extern float3 __attribute__((const, overloadable))
+    step(float edge, float3 v);
 #endif
 
 #if (defined(RS_VERSION) && (RS_VERSION >= 21))
-/*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
- *
- * Supported by API versions 21 and newer.
- */
-extern float3 __attribute__((const, overloadable))step(float edge, float3 v);
+extern float4 __attribute__((const, overloadable))
+    step(float edge, float4 v);
 #endif
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
 /*
- * if (v < edge)
- * return 0.f;
- * else
- * return 1.f;
+ * tan: Tangent
  *
- * Supported by API versions 21 and newer.
+ * Returns the tangent of an angle measured in radians.
+ *
+ * See also native_tan().
  */
-extern float4 __attribute__((const, overloadable))step(float edge, float4 v);
-#endif
+extern float __attribute__((const, overloadable))
+    tan(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
+extern float2 __attribute__((const, overloadable))
+    tan(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    tan(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    tan(float4 v);
+
 /*
- * Return the tangent of a value.
+ * tanh: Hyperbolic tangent
  *
- * Supported by API versions 9 and newer.
+ * Returns the hyperbolic tangent of a value.
+ *
+ * See also native_tanh().
  */
-extern float __attribute__((const, overloadable))tan(float);
-#endif
+extern float __attribute__((const, overloadable))
+    tanh(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
+extern float2 __attribute__((const, overloadable))
+    tanh(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    tanh(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    tanh(float4 v);
+
 /*
- * Return the tangent of a value.
+ * tanpi: Tangent of a number multiplied by pi
  *
- * Supported by API versions 9 and newer.
+ * Returns the tangent of (v * pi), where (v * pi) is measured in radians.
+ *
+ * To get the tangent of a value measured in degrees, call tanpi(v / 180.f).
+ *
+ * See also native_tanpi().
  */
-extern float2 __attribute__((const, overloadable))tan(float2);
-#endif
+extern float __attribute__((const, overloadable))
+    tanpi(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
+extern float2 __attribute__((const, overloadable))
+    tanpi(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    tanpi(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    tanpi(float4 v);
+
 /*
- * Return the tangent of a value.
+ * tgamma: Gamma function
  *
- * Supported by API versions 9 and newer.
+ * Returns the gamma function of a value.
+ *
+ * See also lgamma().
  */
-extern float3 __attribute__((const, overloadable))tan(float3);
-#endif
+extern float __attribute__((const, overloadable))
+    tgamma(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
+extern float2 __attribute__((const, overloadable))
+    tgamma(float2 v);
+
+extern float3 __attribute__((const, overloadable))
+    tgamma(float3 v);
+
+extern float4 __attribute__((const, overloadable))
+    tgamma(float4 v);
+
 /*
- * Return the tangent of a value.
+ * trunc: Truncates a floating point
  *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))tan(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hyperbolic tangent of a value.
+ * Rounds to integral using truncation.
  *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))tanh(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hyperbolic tangent of a value.
+ * For example, trunc(1.7f) returns 1.f and trunc(-1.7f) returns -1.f.
  *
- * Supported by API versions 9 and newer.
+ * See rint() and round() for other rounding options.
  */
-extern float2 __attribute__((const, overloadable))tanh(float2);
-#endif
+extern float __attribute__((const, overloadable))
+    trunc(float v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hyperbolic tangent of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))tanh(float3);
-#endif
+extern float2 __attribute__((const, overloadable))
+    trunc(float2 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return the hyperbolic tangent of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))tanh(float4);
-#endif
+extern float3 __attribute__((const, overloadable))
+    trunc(float3 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return tan(v * PI)
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))tanpi(float);
-#endif
+extern float4 __attribute__((const, overloadable))
+    trunc(float4 v);
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return tan(v * PI)
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))tanpi(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return tan(v * PI)
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))tanpi(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Return tan(v * PI)
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))tanpi(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the gamma function of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))tgamma(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the gamma function of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))tgamma(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the gamma function of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))tgamma(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * Compute the gamma function of a value.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))tgamma(float4);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * ound to integral using truncation.
- *
- * Supported by API versions 9 and newer.
- */
-extern float __attribute__((const, overloadable))trunc(float);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * ound to integral using truncation.
- *
- * Supported by API versions 9 and newer.
- */
-extern float2 __attribute__((const, overloadable))trunc(float2);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * ound to integral using truncation.
- *
- * Supported by API versions 9 and newer.
- */
-extern float3 __attribute__((const, overloadable))trunc(float3);
-#endif
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 9))
-/*
- * ound to integral using truncation.
- *
- * Supported by API versions 9 and newer.
- */
-extern float4 __attribute__((const, overloadable))trunc(float4);
-#endif
-
-#endif // __rs_core_math_rsh__
+#endif // RENDERSCRIPT_RS_CORE_MATH_RSH
diff --git a/renderscript/include/rs_debug.rsh b/renderscript/include/rs_debug.rsh
index 7a13c9d..b6a6fb2 100644
--- a/renderscript/include/rs_debug.rsh
+++ b/renderscript/include/rs_debug.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,254 +14,216 @@
  * limitations under the License.
  */
 
-/** @file rs_debug.rsh
- *  \brief Utility debugging routines
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_debug.rsh: Utility debugging routines
  *
- *  Routines intended to be used during application developement.  These should
- *  not be used in shipping applications.  All print a string and value pair to
- *  the standard log.
- *
+ * Routines intended to be used during application developement.  These should
+ * not be used in shipping applications.  All print a string and value pair to
+ * the standard log.
  */
-
-#ifndef __RS_DEBUG_RSH__
-#define __RS_DEBUG_RSH__
-
-
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float, float);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float, float, float);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float, float, float, float);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float2);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float3);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, float4);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, double);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, const rs_matrix4x4 *);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, const rs_matrix3x3 *);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, const rs_matrix2x2 *);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, int);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, uint);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, long);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, unsigned long);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, long long);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, unsigned long long);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, const void *);
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 17))
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, char);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, char2);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, char3);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, char4);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, unsigned char);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, uchar2);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, uchar3);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, uchar4);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, short);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, short2);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, short3);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, short4);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, unsigned short);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, ushort2);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, ushort3);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, ushort4);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, int2);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, int3);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, int4);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, uint2);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, uint3);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, uint4);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, long2);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, long3);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, long4);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, ulong2);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, ulong3);
-/**
- * Debug function.  Prints a string and value to the log.
- */
-extern void __attribute__((overloadable))
-    rsDebug(const char *, ulong4);
-#endif  // (defined(RS_VERSION) && (RS_VERSION >= 17))
+#ifndef RENDERSCRIPT_RS_DEBUG_RSH
+#define RENDERSCRIPT_RS_DEBUG_RSH
 
 #define RS_DEBUG(a) rsDebug(#a, a)
 #define RS_DEBUG_MARKER rsDebug(__FILE__, __LINE__)
 
+/*
+ * Debug function.  Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, double a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, int a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, uint a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, long a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, ulong a);
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, int2 a);
 #endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, int3 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, int4 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, uint2 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, uint3 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, uint4 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, long2 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, long3 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, long4 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, ulong2 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, ulong3 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, ulong4 a);
+#endif
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, float a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, float2 a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, float3 a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, float4 a);
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, char a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, char2 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, char3 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, char4 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, uchar a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, uchar2 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, uchar3 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, uchar4 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, short a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, short2 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, short3 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, short4 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, ushort a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, ushort2 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, ushort3 a);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, ushort4 a);
+#endif
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, float a, float b);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, float a, float b, float c);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, float a, float b, float c, float d);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, long long a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, unsigned long long a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, const void* a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, const rs_matrix4x4* a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, const rs_matrix3x3* a);
+
+extern void __attribute__((overloadable))
+    rsDebug(const char* message, const rs_matrix2x2* a);
+
+#endif // RENDERSCRIPT_RS_DEBUG_RSH
diff --git a/renderscript/include/rs_element.rsh b/renderscript/include/rs_element.rsh
index 0230f10..91233c2 100644
--- a/renderscript/include/rs_element.rsh
+++ b/renderscript/include/rs_element.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,130 +14,192 @@
  * limitations under the License.
  */
 
-/** @file rs_element.rsh
- *  \brief Element routines
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_element.rsh: Element functions
  *
+ * The term "element" is used a bit ambiguously in RenderScript, as both
+ * the type of an item of an allocation and the instantiation of that type:
  *
+ * rs_element is a handle to a type specification, and
+ *
+ * In functions like rsGetElementAt(), "element" means the instantiation
+ * of the type, i.e. an item of an allocation.
+ *
+ * The functions below let you query the characteristics of the type specificiation.
+ *
+ * To create complex elements, use the Element.Builder Java class.
+ * For common elements, in Java you can simply use one of the many predefined elements
+ * like F32_2.  You can't create elements from a script.
+ *
+ * An element can be a simple data type as found in C/C++, a handle type,
+ * a structure, or a fixed size vector (of size 2, 3, or 4) of sub-elements.
+ *
+ * Elements can also have a kind, which is semantic information used mostly to
+ * interpret pixel data.
  */
+#ifndef RENDERSCRIPT_RS_ELEMENT_RSH
+#define RENDERSCRIPT_RS_ELEMENT_RSH
 
-#ifndef __RS_ELEMENT_RSH__
-#define __RS_ELEMENT_RSH__
-
-// New API's
+/*
+ * rsElementGetBytesSize: Return the size of an element
+ *
+ * Returns the size in bytes that an instantiation of this element will occupy.
+ */
 #if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern uint32_t __attribute__((overloadable))
+    rsElementGetBytesSize(rs_element e);
+#endif
 
-/**
+/*
+ * rsElementGetDataKind: Return the kind of an element
+ *
+ * Returns the element's data kind.  This is used to interpret pixel data.
+ *
+ * See rs_data_kind.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_data_kind __attribute__((overloadable))
+    rsElementGetDataKind(rs_element e);
+#endif
+
+/*
+ * rsElementGetDataType: Return the data type of an element
+ *
+ * Returns the element's base data type.  This can be a type similar to C/C++ (e.g. RS_TYPE_UNSIGNED_8),
+ * a handle (e.g. RS_TYPE_ALLOCATION and RS_TYPE_ELEMENT), or a more complex numerical type
+ * (e.g.RS_TYPE_UNSIGNED_5_6_5 and RS_TYPE_MATRIX_4X4).
+ *
+ * If the element describes a vector, this function returns the data type of one of its items.
+ *
+ * If the element describes a structure, RS_TYPE_NONE is returned.
+ *
+ * See rs_data_type.
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_data_type __attribute__((overloadable))
+    rsElementGetDataType(rs_element e);
+#endif
+
+/*
+ * rsElementGetSubElement: Return a sub element of a complex element
+ *
+ * For the element represents a structure, this function returns the sub-element at
+ * the specified index.
+ *
+ * If the element is not a structure or the index is greater or equal to the number
+ * of sub-elements, an invalid handle is returned.
+ *
+ * Parameters:
+ *   e Element to query
+ *   index Index of the sub-element to return
+ *
+ * Returns: Sub-element at the given index
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_element __attribute__((overloadable))
+    rsElementGetSubElement(rs_element e, uint32_t index);
+#endif
+
+/*
+ * rsElementGetSubElementArraySize: Return the array size of a sub element of a complex element
+ *
+ * For complex elements, some sub-elements could be statically
+ * sized arrays. This function returns the array size of the
+ * sub-element at the index.
+ *
+ * Parameters:
+ *   e Element to query
+ *   index Index of the sub-element
+ *
+ * Returns: Array size of the sub-element at the given index
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern uint32_t __attribute__((overloadable))
+    rsElementGetSubElementArraySize(rs_element e, uint32_t index);
+#endif
+
+/*
+ * rsElementGetSubElementCount: Return the number of sub-elements
+ *
  * Elements could be simple, such as an int or a float, or a
- * structure with multiple sub elements, such as a collection of
- * floats, float2, float4. This function returns zero for simple
+ * structure with multiple sub-elements, such as a collection of
+ * floats, float2, float4.  This function returns zero for simple
  * elements or the number of sub-elements otherwise.
  *
- * @param e element to get data from
- * @return number of sub-elements in this element
+ * Parameters:
+ *   e Element to get data from
+ *
+ * Returns: Number of sub-elements in this element
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 extern uint32_t __attribute__((overloadable))
     rsElementGetSubElementCount(rs_element e);
+#endif
 
-/**
- * For complex elements, this function will return the
- * sub-element at index
+/*
+ * rsElementGetSubElementName: Return the name of a sub-element
  *
- * @param e element to get data from
- * @param index index of the sub-element to return
- * @return sub-element in this element at given index
+ * For complex elements, this function returns the name of the sub-element
+ * at the specified index.
+ *
+ * Parameters:
+ *   e Element to get data from
+ *   index Index of the sub-element
+ *   name Array to store the name into
+ *   nameLength Length of the provided name array
+ *
+ * Returns: Number of characters actually written, excluding the null terminator
  */
-extern rs_element __attribute__((overloadable))
-    rsElementGetSubElement(rs_element, uint32_t index);
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern uint32_t __attribute__((overloadable))
+    rsElementGetSubElementName(rs_element e, uint32_t index, char* name, uint32_t nameLength);
+#endif
 
-/**
+/*
+ * rsElementGetSubElementNameLength: Return the length of the name of a sub-element
+ *
  * For complex elements, this function will return the length of
  * sub-element name at index
  *
- * @param e element to get data from
- * @param index index of the sub-element to return
- * @return length of the sub-element name including the null
- *         terminator (size of buffer needed to write the name)
+ * Parameters:
+ *   e Element to get data from
+ *   index Index of the sub-element to return
+ *
+ * Returns: Length of the sub-element name including the null terminator (size of buffer needed to write the name)
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 extern uint32_t __attribute__((overloadable))
     rsElementGetSubElementNameLength(rs_element e, uint32_t index);
+#endif
 
-/**
- * For complex elements, this function will return the
- * sub-element name at index
- *
- * @param e element to get data from
- * @param index index of the sub-element
- * @param name array to store the name into
- * @param nameLength length of the provided name array
- * @return number of characters actually written, excluding the
- *         null terminator
- */
-extern uint32_t __attribute__((overloadable))
-    rsElementGetSubElementName(rs_element e, uint32_t index, char *name, uint32_t nameLength);
-
-/**
- * For complex elements, some sub-elements could be statically
- * sized arrays. This function will return the array size for
- * sub-element at index
- *
- * @param e element to get data from
- * @param index index of the sub-element
- * @return array size of sub-element in this element at given
- *         index
- */
-extern uint32_t __attribute__((overloadable))
-    rsElementGetSubElementArraySize(rs_element e, uint32_t index);
-
-/**
+/*
  * This function specifies the location of a sub-element within
  * the element
  *
- * @param e element to get data from
- * @param index index of the sub-element
- * @return offset in bytes of sub-element in this element at
- *         given index
+ * Parameters:
+ *   e Element to get data from
+ *   index Index of the sub-element
+ *
+ * Returns: Offset in bytes of sub-element in this element at given index
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 extern uint32_t __attribute__((overloadable))
     rsElementGetSubElementOffsetBytes(rs_element e, uint32_t index);
+#endif
 
-/**
- * Returns the size of element in bytes
- *
- * @param e element to get data from
- * @return total size of the element in bytes
- */
-extern uint32_t __attribute__((overloadable))
-    rsElementGetBytesSize(rs_element e);
-
-/**
- * Returns the element's data type
- *
- * @param e element to get data from
- * @return element's data type
- */
-extern rs_data_type __attribute__((overloadable))
-    rsElementGetDataType(rs_element e);
-
-/**
- * Returns the element's data kind
- *
- * @param e element to get data from
- * @return element's data size
- */
-extern rs_data_kind __attribute__((overloadable))
-    rsElementGetDataKind(rs_element e);
-
-/**
+/*
  * Returns the element's vector size
  *
- * @param e element to get data from
- * @return length of the element vector (for float2, float3,
- *         etc.)
+ * Parameters:
+ *   e Element to get data from
+ *
+ * Returns: Length of the element vector (for float2, float3, etc.)
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 extern uint32_t __attribute__((overloadable))
     rsElementGetVectorSize(rs_element e);
+#endif
 
-#endif // (defined(RS_VERSION) && (RS_VERSION >= 16))
-
-#endif // __RS_ELEMENT_RSH__
-
+#endif // RENDERSCRIPT_RS_ELEMENT_RSH
diff --git a/renderscript/include/rs_graphics.rsh b/renderscript/include/rs_graphics.rsh
index 1fcb7ed..1f74518 100644
--- a/renderscript/include/rs_graphics.rsh
+++ b/renderscript/include/rs_graphics.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011-2012 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,403 +14,344 @@
  * limitations under the License.
  */
 
-/** @file rs_graphics.rsh
- *  \brief RenderScript graphics API
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_graphics.rsh: RenderScript graphics API
  *
- *  A set of graphics functions used by RenderScript.
- *
+ * A set of graphics functions used by RenderScript.
  */
-#ifndef __RS_GRAPHICS_RSH__
-#define __RS_GRAPHICS_RSH__
+#ifndef RENDERSCRIPT_RS_GRAPHICS_RSH
+#define RENDERSCRIPT_RS_GRAPHICS_RSH
 
 #ifdef __LP64__
-//#error "RenderScript graphics is deprecated and not supported in 64bit mode."
+// TODO We need to fix some of the builds before enabling this error:
+// #error "RenderScript graphics is deprecated and not supported in 64bit mode."
 #else
-
 #include "rs_mesh.rsh"
 #include "rs_program.rsh"
+#endif
 
-#if (defined(RS_VERSION) && (RS_VERSION >= 14))
-/**
- * Set the color target used for all subsequent rendering calls
- * @param colorTarget
- * @param slot
+/*
+ * Sync the contents of an allocation.
+ *
+ * If the source is specified, sync from memory space specified by source.
+ *
+ * If the source is not specified, sync from its SCRIPT memory space to its HW
+ * memory spaces.
  */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgAllocationSyncAll(rs_allocation alloc);
+#endif
+
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern void __attribute__((overloadable))
+    rsgAllocationSyncAll(rs_allocation alloc, rs_allocation_usage_type source);
+#endif
+#endif
+
+/*
+ * Set the color target used for all subsequent rendering calls
+ */
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
 extern void __attribute__((overloadable))
     rsgBindColorTarget(rs_allocation colorTarget, uint slot);
+#endif
+#endif
 
-/**
- * Clear the previously set color target
- * @param slot
+/*
+ * Bind a new Allocation object to a ProgramFragment or ProgramVertex.
+ * The Allocation must be a valid constant input for the Program.
+ *
+ * Parameters:
+ *   ps program fragment object
+ *   slot index of the constant buffer on the program
+ *   c constants to bind
+ *   pv program vertex object
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgClearColorTarget(uint slot);
+    rsgBindConstant(rs_program_fragment ps, uint slot, rs_allocation c);
+#endif
 
-/**
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgBindConstant(rs_program_vertex pv, uint slot, rs_allocation c);
+#endif
+
+/*
  * Set the depth target used for all subsequent rendering calls
- * @param depthTarget
  */
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
 extern void __attribute__((overloadable))
     rsgBindDepthTarget(rs_allocation depthTarget);
+#endif
+#endif
 
-/**
- * Clear the previously set depth target
- */
-extern void __attribute__((overloadable))
-    rsgClearDepthTarget(void);
-
-/**
- * Clear all color and depth targets and resume rendering into
- * the framebuffer
- */
-extern void __attribute__((overloadable))
-    rsgClearAllRenderTargets(void);
-
-/**
- * Force RenderScript to finish all rendering commands
- */
-extern uint __attribute__((overloadable))
-    rsgFinish(void);
-
-#endif //defined(RS_VERSION) && (RS_VERSION >= 14)
-
-/**
- * Bind a new ProgramFragment to the rendering context.
+/*
+ * Binds the font object to be used for all subsequent font rendering calls
  *
- * @param pf
+ * Parameters:
+ *   font object to bind
  */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgBindFont(rs_font font);
+#endif
+
+/*
+ * Bind a new ProgramFragment to the rendering context.
+ */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
     rsgBindProgramFragment(rs_program_fragment pf);
+#endif
 
-/**
- * Bind a new ProgramStore to the rendering context.
- *
- * @param ps
- */
-extern void __attribute__((overloadable))
-    rsgBindProgramStore(rs_program_store ps);
-
-/**
- * Bind a new ProgramVertex to the rendering context.
- *
- * @param pv
- */
-extern void __attribute__((overloadable))
-    rsgBindProgramVertex(rs_program_vertex pv);
-
-/**
+/*
  * Bind a new ProgramRaster to the rendering context.
- *
- * @param pr
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
     rsgBindProgramRaster(rs_program_raster pr);
+#endif
 
-/**
+/*
+ * Bind a new ProgramStore to the rendering context.
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgBindProgramStore(rs_program_store ps);
+#endif
+
+/*
+ * Bind a new ProgramVertex to the rendering context.
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgBindProgramVertex(rs_program_vertex pv);
+#endif
+
+/*
  * Bind a new Sampler object to a ProgramFragment.  The sampler will
  * operate on the texture bound at the matching slot.
- *
- * @param slot
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgBindSampler(rs_program_fragment, uint slot, rs_sampler);
+    rsgBindSampler(rs_program_fragment fragment, uint slot, rs_sampler sampler);
+#endif
 
-/**
+/*
  * Bind a new Allocation object to a ProgramFragment.  The
  * Allocation must be a valid texture for the Program.  The sampling
  * of the texture will be controled by the Sampler bound at the
  * matching slot.
- *
- * @param slot
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgBindTexture(rs_program_fragment, uint slot, rs_allocation);
+    rsgBindTexture(rs_program_fragment v, uint slot, rs_allocation alloc);
+#endif
 
-/**
- * Load the projection matrix for a currently bound fixed function
- * vertex program. Calling this function with a custom vertex shader
- * would result in an error.
- * @param proj projection matrix
+/*
+ * Clear all color and depth targets and resume rendering into
+ * the framebuffer
  */
-extern void __attribute__((overloadable))
-    rsgProgramVertexLoadProjectionMatrix(const rs_matrix4x4 *proj);
-/**
- * Load the model matrix for a currently bound fixed function
- * vertex program. Calling this function with a custom vertex shader
- * would result in an error.
- * @param model model matrix
- */
-extern void __attribute__((overloadable))
-    rsgProgramVertexLoadModelMatrix(const rs_matrix4x4 *model);
-/**
- * Load the texture matrix for a currently bound fixed function
- * vertex program. Calling this function with a custom vertex shader
- * would result in an error.
- * @param tex texture matrix
- */
-extern void __attribute__((overloadable))
-    rsgProgramVertexLoadTextureMatrix(const rs_matrix4x4 *tex);
-/**
- * Get the projection matrix for a currently bound fixed function
- * vertex program. Calling this function with a custom vertex shader
- * would result in an error.
- * @param proj matrix to store the current projection matrix into
- */
-extern void __attribute__((overloadable))
-    rsgProgramVertexGetProjectionMatrix(rs_matrix4x4 *proj);
-
-/**
- * Set the constant color for a fixed function emulation program.
- *
- * @param pf
- * @param r
- * @param g
- * @param b
- * @param a
- */
-extern void __attribute__((overloadable))
-    rsgProgramFragmentConstantColor(rs_program_fragment pf, float r, float g, float b, float a);
-
-/**
- * Bind a new Allocation object to a ProgramFragment.  The
- * Allocation must be a valid constant input for the Program.
- *
- * @param ps program object
- * @param slot index of the constant buffer on the program
- * @param c constants to bind
- */
-extern void __attribute__((overloadable))
-    rsgBindConstant(rs_program_fragment ps, uint slot, rs_allocation c);
-
-/**
- * Bind a new Allocation object to a ProgramVertex.  The
- * Allocation must be a valid constant input for the Program.
- *
- * @param pv program object
- * @param slot index of the constant buffer on the program
- * @param c constants to bind
- */
-extern void __attribute__((overloadable))
-    rsgBindConstant(rs_program_vertex pv, uint slot, rs_allocation c);
-
-/**
- * Get the width of the current rendering surface.
- *
- * @return uint
- */
-extern uint __attribute__((overloadable))
-    rsgGetWidth(void);
-
-/**
- * Get the height of the current rendering surface.
- *
- * @return uint
- */
-extern uint __attribute__((overloadable))
-    rsgGetHeight(void);
-
-
-/**
- * Sync the contents of an allocation from its SCRIPT memory space to its HW
- * memory spaces.
- *
- * @param alloc
- */
-extern void __attribute__((overloadable))
-    rsgAllocationSyncAll(rs_allocation alloc);
-
+#ifndef __LP64__
 #if (defined(RS_VERSION) && (RS_VERSION >= 14))
-
-/**
- * Sync the contents of an allocation from memory space
- * specified by source.
- *
- * @param alloc
- * @param source
- */
 extern void __attribute__((overloadable))
-    rsgAllocationSyncAll(rs_allocation alloc,
-                         rs_allocation_usage_type source);
+    rsgClearAllRenderTargets(void);
+#endif
+#endif
 
-#endif //defined(RS_VERSION) && (RS_VERSION >= 14)
-
-/**
- * Low performance utility function for drawing a simple rectangle.  Not
- * intended for drawing large quantities of geometry.
- *
- * @param x1
- * @param y1
- * @param x2
- * @param y2
- * @param z
+/*
+ * Clears the rendering surface to the specified color.
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgDrawRect(float x1, float y1, float x2, float y2, float z);
+    rsgClearColor(float r, float g, float b, float a);
+#endif
 
-/**
+/*
+ * Clear the previously set color target
+ */
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern void __attribute__((overloadable))
+    rsgClearColorTarget(uint slot);
+#endif
+#endif
+
+/*
+ * Clears the depth suface to the specified value.
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgClearDepth(float value);
+#endif
+
+/*
+ * Clear the previously set depth target
+ */
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern void __attribute__((overloadable))
+    rsgClearDepthTarget(void);
+#endif
+#endif
+
+/*
+ * Draw a mesh using the current context state.
+ *
+ * If primitiveIndex is specified, draw part of a mesh using the current context state.
+ *
+ * If start and len are also specified, draw specified index range of part of a mesh using the current context state.
+ *
+ * Otherwise the whole mesh is rendered.
+ *
+ * Parameters:
+ *   ism mesh object to render
+ *   primitiveIndex for meshes that contain multiple primitive groups this parameter specifies the index of the group to draw.
+ *   start starting index in the range
+ *   len number of indices to draw
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgDrawMesh(rs_mesh ism);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgDrawMesh(rs_mesh ism, uint primitiveIndex);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgDrawMesh(rs_mesh ism, uint primitiveIndex, uint start, uint len);
+#endif
+
+/*
  * Low performance utility function for drawing a simple quad.  Not intended for
  * drawing large quantities of geometry.
- *
- * @param x1
- * @param y1
- * @param z1
- * @param x2
- * @param y2
- * @param z2
- * @param x3
- * @param y3
- * @param z3
- * @param x4
- * @param y4
- * @param z4
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgDrawQuad(float x1, float y1, float z1,
-                float x2, float y2, float z2,
-                float x3, float y3, float z3,
-                float x4, float y4, float z4);
+    rsgDrawQuad(float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3,
+                float z3, float x4, float y4, float z4);
+#endif
 
-
-/**
+/*
  * Low performance utility function for drawing a textured quad.  Not intended
  * for drawing large quantities of geometry.
- *
- * @param x1
- * @param y1
- * @param z1
- * @param u1
- * @param v1
- * @param x2
- * @param y2
- * @param z2
- * @param u2
- * @param v2
- * @param x3
- * @param y3
- * @param z3
- * @param u3
- * @param v3
- * @param x4
- * @param y4
- * @param z4
- * @param u4
- * @param v4
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgDrawQuadTexCoords(float x1, float y1, float z1, float u1, float v1,
-                         float x2, float y2, float z2, float u2, float v2,
-                         float x3, float y3, float z3, float u3, float v3,
-                         float x4, float y4, float z4, float u4, float v4);
+    rsgDrawQuadTexCoords(float x1, float y1, float z1, float u1, float v1, float x2, float y2,
+                         float z2, float u2, float v2, float x3, float y3, float z3, float u3,
+                         float v3, float x4, float y4, float z4, float u4, float v4);
+#endif
 
+/*
+ * Low performance utility function for drawing a simple rectangle.  Not
+ * intended for drawing large quantities of geometry.
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgDrawRect(float x1, float y1, float x2, float y2, float z);
+#endif
 
-/**
+/*
  * Low performance function for drawing rectangles in screenspace.  This
  * function uses the default passthough ProgramVertex.  Any bound ProgramVertex
  * is ignored.  This function has considerable overhead and should not be used
  * for drawing in shipping applications.
- *
- * @param x
- * @param y
- * @param z
- * @param w
- * @param h
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
     rsgDrawSpriteScreenspace(float x, float y, float z, float w, float h);
+#endif
 
-extern void __attribute__((overloadable))
-    rsgDrawPath(rs_path p);
-
-/**
- * Draw a mesh using the current context state.  The whole mesh is
- * rendered.
- *
- * @param ism
- */
-extern void __attribute__((overloadable))
-    rsgDrawMesh(rs_mesh ism);
-/**
- * Draw part of a mesh using the current context state.
- * @param ism mesh object to render
- * @param primitiveIndex for meshes that contain multiple primitive groups
- *        this parameter specifies the index of the group to draw.
- */
-extern void __attribute__((overloadable))
-    rsgDrawMesh(rs_mesh ism, uint primitiveIndex);
-/**
- * Draw specified index range of part of a mesh using the current context state.
- * @param ism mesh object to render
- * @param primitiveIndex for meshes that contain multiple primitive groups
- *        this parameter specifies the index of the group to draw.
- * @param start starting index in the range
- * @param len number of indices to draw
- */
-extern void __attribute__((overloadable))
-    rsgDrawMesh(rs_mesh ism, uint primitiveIndex, uint start, uint len);
-
-/**
- * Clears the rendering surface to the specified color.
- *
- * @param r
- * @param g
- * @param b
- * @param a
- */
-extern void __attribute__((overloadable))
-    rsgClearColor(float r, float g, float b, float a);
-
-/**
- * Clears the depth suface to the specified value.
- */
-extern void __attribute__((overloadable))
-    rsgClearDepth(float value);
-/**
+/*
  * Draws text given a string and location
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgDrawText(const char *, int x, int y);
-/**
- * \overload
+    rsgDrawText(const char* text, int x, int y);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgDrawText(rs_allocation alloc, int x, int y);
+#endif
+
+/*
+ * Force RenderScript to finish all rendering commands
  */
-extern void __attribute__((overloadable))
-    rsgDrawText(rs_allocation, int x, int y);
-/**
- * Binds the font object to be used for all subsequent font rendering calls
- * @param font object to bind
- */
-extern void __attribute__((overloadable))
-    rsgBindFont(rs_font font);
-/**
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern uint __attribute__((overloadable))
+    rsgFinish(void);
+#endif
+#endif
+
+/*
  * Sets the font color for all subsequent rendering calls
- * @param r red component
- * @param g green component
- * @param b blue component
- * @param a alpha component
+ *
+ * Parameters:
+ *   r red component
+ *   g green component
+ *   b blue component
+ *   a alpha component
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
     rsgFontColor(float r, float g, float b, float a);
-/**
+#endif
+
+/*
+ * Get the height of the current rendering surface.
+ */
+#ifndef __LP64__
+extern uint __attribute__((overloadable))
+    rsgGetHeight(void);
+#endif
+
+/*
+ * Get the width of the current rendering surface.
+ */
+#ifndef __LP64__
+extern uint __attribute__((overloadable))
+    rsgGetWidth(void);
+#endif
+
+/*
  * Returns the bounding box of the text relative to (0, 0)
  * Any of left, right, top, bottom could be NULL
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgMeasureText(const char *, int *left, int *right, int *top, int *bottom);
-/**
- * \overload
- */
+    rsgMeasureText(const char* text, int* left, int* right, int* top, int* bottom);
+#endif
+
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgMeasureText(rs_allocation, int *left, int *right, int *top, int *bottom);
-/**
+    rsgMeasureText(rs_allocation alloc, int* left, int* right, int* top, int* bottom);
+#endif
+
+/*
  * Computes an axis aligned bounding box of a mesh object
  */
+#ifndef __LP64__
 extern void __attribute__((overloadable))
-    rsgMeshComputeBoundingBox(rs_mesh mesh, float *minX, float *minY, float *minZ,
-                                                float *maxX, float *maxY, float *maxZ);
-/**
- * \overload
- */
-__inline__ static void __attribute__((overloadable, always_inline))
-rsgMeshComputeBoundingBox(rs_mesh mesh, float3 *bBoxMin, float3 *bBoxMax) {
+    rsgMeshComputeBoundingBox(rs_mesh mesh, float* minX, float* minY, float* min, float* maxX,
+                              float* maxY, float* maxZ);
+#endif
+
+#ifndef __LP64__
+static inline void __attribute__((always_inline, overloadable))
+    rsgMeshComputeBoundingBox(rs_mesh mesh, float3* bBoxMin, float3* bBoxMax) {
     float x1, y1, z1, x2, y2, z2;
     rsgMeshComputeBoundingBox(mesh, &x1, &y1, &z1, &x2, &y2, &z2);
     bBoxMin->x = x1;
@@ -420,7 +361,66 @@
     bBoxMax->y = y2;
     bBoxMax->z = z2;
 }
-
-#endif //__LP64__
 #endif
 
+/*
+ * Set the constant color for a fixed function emulation program.
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgProgramFragmentConstantColor(rs_program_fragment pf, float r, float g, float b, float a);
+#endif
+
+/*
+ * Get the projection matrix for a currently bound fixed function
+ * vertex program. Calling this function with a custom vertex shader
+ * would result in an error.
+ *
+ * Parameters:
+ *   proj matrix to store the current projection matrix into
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgProgramVertexGetProjectionMatrix(rs_matrix4x4* proj);
+#endif
+
+/*
+ * Load the model matrix for a currently bound fixed function
+ * vertex program. Calling this function with a custom vertex shader
+ * would result in an error.
+ *
+ * Parameters:
+ *   model model matrix
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgProgramVertexLoadModelMatrix(const rs_matrix4x4* model);
+#endif
+
+/*
+ * Load the projection matrix for a currently bound fixed function
+ * vertex program. Calling this function with a custom vertex shader
+ * would result in an error.
+ *
+ * Parameters:
+ *   proj projection matrix
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgProgramVertexLoadProjectionMatrix(const rs_matrix4x4* proj);
+#endif
+
+/*
+ * Load the texture matrix for a currently bound fixed function
+ * vertex program. Calling this function with a custom vertex shader
+ * would result in an error.
+ *
+ * Parameters:
+ *   tex texture matrix
+ */
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsgProgramVertexLoadTextureMatrix(const rs_matrix4x4* tex);
+#endif
+
+#endif // RENDERSCRIPT_RS_GRAPHICS_RSH
diff --git a/renderscript/include/rs_math.rsh b/renderscript/include/rs_math.rsh
index 4d3124c..b6b6ee0 100644
--- a/renderscript/include/rs_math.rsh
+++ b/renderscript/include/rs_math.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,95 +14,61 @@
  * limitations under the License.
  */
 
-/** @file rs_math.rsh
- *  \brief todo-jsams
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_math.rsh: TODO Add documentation
  *
- *  todo-jsams
+ * TODO Add documentation
+ */
+#ifndef RENDERSCRIPT_RS_MATH_RSH
+#define RENDERSCRIPT_RS_MATH_RSH
+
+/*
+ * rsClamp: Restrain a value to a range
  *
- */
-
-#ifndef __RS_MATH_RSH__
-#define __RS_MATH_RSH__
-
-
-/**
- * Return a random value between 0 (or min_value) and max_malue.
- */
-extern int __attribute__((overloadable))
-    rsRand(int max_value);
-/**
- * \overload
- */
-extern int __attribute__((overloadable))
-    rsRand(int min_value, int max_value);
-/**
- * \overload
- */
-extern float __attribute__((overloadable))
-    rsRand(float max_value);
-/**
- * \overload
- */
-extern float __attribute__((overloadable))
-    rsRand(float min_value, float max_value);
-
-/**
- * Returns the fractional part of a float
- */
-extern float __attribute__((const, overloadable))
-    rsFrac(float);
-
-
-/////////////////////////////////////////////////////
-// int ops
-/////////////////////////////////////////////////////
-
-/**
- * Clamp the value amount between low and high.
+ * Clamp a value between low and high.
  *
- * @param amount  The value to clamp
- * @param low
- * @param high
+ * Deprecated.  Use clamp() instead.
+ *
+ * Parameters:
+ *   amount The value to clamp
+ *   low Lower bound
+ *   high Upper bound
  */
-_RS_RUNTIME uint __attribute__((const, overloadable, always_inline)) rsClamp(uint amount, uint low, uint high);
+extern char __attribute__((const, always_inline, overloadable))
+    rsClamp(char amount, char low, char high);
 
-/**
- * \overload
- */
-_RS_RUNTIME int __attribute__((const, overloadable, always_inline)) rsClamp(int amount, int low, int high);
-/**
- * \overload
- */
-_RS_RUNTIME ushort __attribute__((const, overloadable, always_inline)) rsClamp(ushort amount, ushort low, ushort high);
-/**
- * \overload
- */
-_RS_RUNTIME short __attribute__((const, overloadable, always_inline)) rsClamp(short amount, short low, short high);
-/**
- * \overload
- */
-_RS_RUNTIME uchar __attribute__((const, overloadable, always_inline)) rsClamp(uchar amount, uchar low, uchar high);
-/**
- * \overload
- */
-_RS_RUNTIME char __attribute__((const, overloadable, always_inline)) rsClamp(char amount, char low, char high);
+extern uchar __attribute__((const, always_inline, overloadable))
+    rsClamp(uchar amount, uchar low, uchar high);
 
+extern short __attribute__((const, always_inline, overloadable))
+    rsClamp(short amount, short low, short high);
 
-/**
+extern ushort __attribute__((const, always_inline, overloadable))
+    rsClamp(ushort amount, ushort low, ushort high);
+
+extern int __attribute__((const, always_inline, overloadable))
+    rsClamp(int amount, int low, int high);
+
+extern uint __attribute__((const, always_inline, overloadable))
+    rsClamp(uint amount, uint low, uint high);
+
+/*
  * Computes 6 frustum planes from the view projection matrix
- * @param viewProj matrix to extract planes from
- * @param left plane
- * @param right plane
- * @param top plane
- * @param bottom plane
- * @param near plane
- * @param far plane
+ *
+ * Parameters:
+ *   viewProj matrix to extract planes from
+ *   left left plane
+ *   right right plane
+ *   top top plane
+ *   bottom bottom plane
+ *   near near plane
+ *   far far plane
  */
-__inline__ static void __attribute__((overloadable, always_inline))
-rsExtractFrustumPlanes(const rs_matrix4x4 *viewProj,
-                         float4 *left, float4 *right,
-                         float4 *top, float4 *bottom,
-                         float4 *near, float4 *far) {
+static inline void __attribute__((always_inline, overloadable))
+    rsExtractFrustumPlanes(const rs_matrix4x4* viewProj, float4* left, float4* right, float4* top,
+                           float4* bottom, float4* near, float4* far) {
     // x y z w = a b c d in the plane equation
     left->x = viewProj->m[3] + viewProj->m[0];
     left->y = viewProj->m[7] + viewProj->m[4];
@@ -148,22 +114,27 @@
     *far /= len;
 }
 
-/**
- * Checks if a sphere is withing the 6 frustum planes
- * @param sphere float4 representing the sphere
- * @param left plane
- * @param right plane
- * @param top plane
- * @param bottom plane
- * @param near plane
- * @param far plane
+/*
+ * Returns the fractional part of a float
  */
-__inline__ static bool __attribute__((overloadable, always_inline))
-rsIsSphereInFrustum(float4 *sphere,
-                      float4 *left, float4 *right,
-                      float4 *top, float4 *bottom,
-                      float4 *near, float4 *far) {
+extern float __attribute__((const, overloadable))
+    rsFrac(float v);
 
+/*
+ * Checks if a sphere is withing the 6 frustum planes
+ *
+ * Parameters:
+ *   sphere float4 representing the sphere
+ *   left left plane
+ *   right right plane
+ *   top top plane
+ *   bottom bottom plane
+ *   near near plane
+ *   far far plane
+ */
+static inline bool __attribute__((always_inline, overloadable))
+    rsIsSphereInFrustum(float4* sphere, float4* left, float4* right, float4* top, float4* bottom,
+                        float4* near, float4* far) {
     float distToCenter = dot(left->xyz, sphere->xyz) + left->w;
     if (distToCenter < -sphere->w) {
         return false;
@@ -191,61 +162,52 @@
     return true;
 }
 
-
-/**
- * Pack floating point (0-1) RGB values into a uchar4.  The alpha component is
- * set to 255 (1.0).
+/*
+ * Pack floating point (0-1) RGB values into a uchar4.
  *
- * @param r
- * @param g
- * @param b
- *
- * @return uchar4
+ * For the float3 variant and the variant that only specifies r, g, b,
+ * the alpha component is set to 255 (1.0).
  */
-_RS_RUNTIME uchar4 __attribute__((const, overloadable)) rsPackColorTo8888(float r, float g, float b);
+extern uchar4 __attribute__((const, overloadable))
+    rsPackColorTo8888(float r, float g, float b);
 
-/**
- * Pack floating point (0-1) RGBA values into a uchar4.
- *
- * @param r
- * @param g
- * @param b
- * @param a
- *
- * @return uchar4
+extern uchar4 __attribute__((const, overloadable))
+    rsPackColorTo8888(float r, float g, float b, float a);
+
+extern uchar4 __attribute__((const, overloadable))
+    rsPackColorTo8888(float3 color);
+
+extern uchar4 __attribute__((const, overloadable))
+    rsPackColorTo8888(float4 color);
+
+/*
+ * Return a random value between 0 (or min_value) and max_malue.
  */
-_RS_RUNTIME uchar4 __attribute__((const, overloadable)) rsPackColorTo8888(float r, float g, float b, float a);
+extern int __attribute__((overloadable))
+    rsRand(int max_value);
 
-/**
- * Pack floating point (0-1) RGB values into a uchar4.  The alpha component is
- * set to 255 (1.0).
- *
- * @param color
- *
- * @return uchar4
- */
-_RS_RUNTIME uchar4 __attribute__((const, overloadable)) rsPackColorTo8888(float3 color);
+extern int __attribute__((overloadable))
+    rsRand(int min_value, int max_value);
 
-/**
- * Pack floating point (0-1) RGBA values into a uchar4.
- *
- * @param color
- *
- * @return uchar4
- */
-_RS_RUNTIME uchar4 __attribute__((const, overloadable)) rsPackColorTo8888(float4 color);
+extern float __attribute__((overloadable))
+    rsRand(float max_value);
 
-/**
+extern float __attribute__((overloadable))
+    rsRand(float min_value, float max_value);
+
+/*
  * Unpack a uchar4 color to float4.  The resulting float range will be (0-1).
- *
- * @param c
- *
- * @return float4
  */
-_RS_RUNTIME float4 __attribute__((const)) rsUnpackColor8888(uchar4 c);
+extern float4 __attribute__((const))
+    rsUnpackColor8888(uchar4 c);
 
-_RS_RUNTIME uchar4 __attribute__((const, overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v);
-_RS_RUNTIME float4 __attribute__((const, overloadable)) rsYuvToRGBA_float4(uchar y, uchar u, uchar v);
+/*
+ * Convert from YUV to RGBA.
+ */
+extern float4 __attribute__((const, overloadable))
+    rsYuvToRGBA_float4(uchar y, uchar u, uchar v);
 
+extern uchar4 __attribute__((const, overloadable))
+    rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v);
 
-#endif
+#endif // RENDERSCRIPT_RS_MATH_RSH
diff --git a/renderscript/include/rs_matrix.rsh b/renderscript/include/rs_matrix.rsh
index 34b9532..3ed35a4 100644
--- a/renderscript/include/rs_matrix.rsh
+++ b/renderscript/include/rs_matrix.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,519 +14,482 @@
  * limitations under the License.
  */
 
-/** @file rs_matrix.rsh
- *  \brief Matrix functions.
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_matrix.rsh: Matrix functions
  *
  * These functions let you manipulate square matrices of rank 2x2, 3x3, and 4x4.
  * They are particularly useful for graphical transformations and are
  * compatible with OpenGL.
  *
- * A few general notes:
+ * We use a zero-based index for rows and columns.  E.g. the last element of
+ * a rs_matrix4x4 is found at (3, 3).
  *
- * \li We use a zero-based index for rows and columns.  E.g. the last element of
- * a \ref rs_matrix4x4 is found at (3, 3).
+ * RenderScript uses column-major matrices and column-based vectors.
+ * Transforming a vector is done by postmultiplying the vector,
+ * e.g. (matrix * vector), as provided by rsMatrixMultiply().
  *
- * \li RenderScript uses column-based vectors.  Transforming a vector is done by
- * postmultiplying the vector, e.g. <em>(matrix * vector)</em>, as provided by
- * \ref rsMatrixMultiply.
- *
- * \li To create a transformation matrix that performs two transformations at
+ * To create a transformation matrix that performs two transformations at
  * once, multiply the two source matrices, with the first transformation as the
  * right argument.  E.g. to create a transformation matrix that applies the
- * transformation \e s1 followed by \e s2, call
- * </c>rsMatrixLoadMultiply(&combined, &s2, &s1)</c>.
- * This derives from <em>s2 * (s1 * v)</em>, which is <em>(s2 * s1) * v</em>.
+ * transformation s1 followed by s2, call rsMatrixLoadMultiply(&combined, &s2, &s1).
+ * This derives from s2 * (s1 * v), which is (s2 * s1) * v.
  *
- * \li We have two style of functions to create transformation matrices:
- * rsMatrixLoad<em>Transformation</em> and rsMatrix<em>Transformation</em>.  The
+ * We have two style of functions to create transformation matrices:
+ * rsMatrixLoadTransformation and rsMatrixTransformation.  The
  * former style simply stores the transformation matrix in the first argument.
  * The latter modifies a pre-existing transformation matrix so that the new
- * transformation happens first.  E.g. if you call \ref rsMatrixTranslate
+ * transformation happens first.  E.g. if you call rsMatrixTranslate()
  * on a matrix that already does a scaling, the resulting matrix when applied
  * to a vector will first do the translation then the scaling.
- *
  */
+#ifndef RENDERSCRIPT_RS_MATRIX_RSH
+#define RENDERSCRIPT_RS_MATRIX_RSH
 
-#ifndef __RS_MATRIX_RSH__
-#define __RS_MATRIX_RSH__
-
-/**
- * Set an element of a matrix.
+/*
+ * rsMatrixGet: Get one element
  *
- * @param m The matrix that will be modified.
- * @param col The zero-based column of the element to be set.
- * @param row The zero-based row of the element to be set.
- * @param v The value to set.
- *
- * \warning The order of the column and row parameters may be
- * unexpected.
- *
- * @return void
- */
-_RS_RUNTIME void __attribute__((overloadable))
-rsMatrixSet(rs_matrix4x4 *m, uint32_t col, uint32_t row, float v);
-/**
- * \overload
- */
-_RS_RUNTIME void __attribute__((overloadable))
-rsMatrixSet(rs_matrix3x3 *m, uint32_t col, uint32_t row, float v);
-/**
- * \overload
- */
-_RS_RUNTIME void __attribute__((overloadable))
-rsMatrixSet(rs_matrix2x2 *m, uint32_t col, uint32_t row, float v);
-
-/**
  * Returns one element of a matrix.
  *
- * @param m The matrix to extract the element from.
- * @param col The zero-based column of the element to be extracted.
- * @param row The zero-based row of the element to extracted.
+ * Warning: The order of the column and row parameters may be unexpected.
  *
- * \warning The order of the column and row parameters may be
- * unexpected.
- *
- * @return float
+ * Parameters:
+ *   m The matrix to extract the element from.
+ *   col The zero-based column of the element to be extracted.
+ *   row The zero-based row of the element to extracted.
  */
-_RS_RUNTIME float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix4x4 *m, uint32_t col, uint32_t row);
-/**
- * \overload
- */
-_RS_RUNTIME float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix3x3 *m, uint32_t col, uint32_t row);
-/**
- * \overload
- */
-_RS_RUNTIME float __attribute__((overloadable))
-rsMatrixGet(const rs_matrix2x2 *m, uint32_t col, uint32_t row);
+extern float __attribute__((overloadable))
+    rsMatrixGet(const rs_matrix4x4* m, uint32_t col, uint32_t row);
 
-/**
- * Set the elements of a matrix to the identity matrix.
- *
- * @param m The matrix to set.
- */
-extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix4x4 *m);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix3x3 *m);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoadIdentity(rs_matrix2x2 *m);
+extern float __attribute__((overloadable))
+    rsMatrixGet(const rs_matrix3x3* m, uint32_t col, uint32_t row);
 
-/**
- * Set the elements of a matrix from an array of floats.
+extern float __attribute__((overloadable))
+    rsMatrixGet(const rs_matrix2x2* m, uint32_t col, uint32_t row);
+
+/*
+ * rsMatrixInverse: Inverts a matrix in place
  *
- * The array of floats should be in row-major order, i.e. the element a
- * <em>row 0, column 0</em> should be first, followed by the element at
- * <em>row 0, column 1</em>, etc.
+ * Returns true if the matrix was successfully inverted.
  *
- * @param m The matrix to set.
- * @param v The array of values to set the matrix to. These arrays should be
- * 4, 9, or 16 floats long, depending on the matrix size.
+ * Parameters:
+ *   m The matrix to invert.
  */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const float *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix3x3 *m, const float *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix2x2 *m, const float *v);
-/**
- * Set the elements of a matrix from another matrix.
+extern bool __attribute__((overloadable))
+    rsMatrixInverse(rs_matrix4x4* m);
+
+/*
+ * rsMatrixInverseTranspose: Inverts and transpose a matrix in place
  *
- * If the source matrix is smaller than the destination, the rest of the
+ * The matrix is first inverted then transposed.
+ * Returns true if the matrix was successfully inverted.
+ *
+ * Parameters:
+ *   m The matrix to modify.
+ */
+extern bool __attribute__((overloadable))
+    rsMatrixInverseTranspose(rs_matrix4x4* m);
+
+/*
+ * rsMatrixLoad: Load or copy a matrix
+ *
+ * Set the elements of a matrix from an array of floats or from another matrix.
+ *
+ * If loading from an array, the floats should be in row-major order, i.e. the element a
+ * row 0, column 0 should be first, followed by the element at
+ * row 0, column 1, etc.
+ *
+ * If loading from a matrix and the source is smaller than the destination, the rest of the
  * destination is filled with elements of the identity matrix.  E.g.
  * loading a rs_matrix2x2 into a rs_matrix4x4 will give:
  *
- * \htmlonly<table>
- * <tr><td>m00</td><td>m01</td><td>0.0</td><td>0.0</td></tr>
- * <tr><td>m10</td><td>m11</td><td>0.0</td><td>0.0</td></tr>
- * <tr><td>0.0</td><td>0.0</td><td>1.0</td><td>0.0</td></tr>
- * <tr><td>0.0</td><td>0.0</td><td>0.0</td><td>1.0</td></tr>
- * </table>\endhtmlonly
+ * m00 m01 0.0 0.0
+ * m10 m11 0.0 0.0
+ * 0.0 0.0 1.0 0.0
+ * 0.0 0.0 0.0 1.0
  *
- * @param m The matrix to set.
- * @param v The source matrix.
+ *
+ * Parameters:
+ *   destination The matrix to set.
+ *   array The array of values to set the matrix to. These arrays should be 4, 9, or 16 floats long, depending on the matrix size.
+ *   source The source matrix.
  */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix4x4 *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix3x3 *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix2x2 *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix3x3 *m, const rs_matrix3x3 *v);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixLoad(rs_matrix2x2 *m, const rs_matrix2x2 *v);
+extern void __attribute__((overloadable))
+    rsMatrixLoad(rs_matrix4x4* destination, const float* array);
 
-/**
- * Load a rotation matrix.
+extern void __attribute__((overloadable))
+    rsMatrixLoad(rs_matrix3x3* destination, const float* array);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoad(rs_matrix2x2* destination, const float* array);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoad(rs_matrix4x4* destination, const rs_matrix4x4* source);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoad(rs_matrix3x3* destination, const rs_matrix3x3* source);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoad(rs_matrix2x2* destination, const rs_matrix2x2* source);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoad(rs_matrix4x4* destination, const rs_matrix3x3* source);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoad(rs_matrix4x4* destination, const rs_matrix2x2* source);
+
+/*
+ * rsMatrixLoadFrustum: Load a frustum projection matrix
+ *
+ * Constructs a frustum projection matrix, transforming the box
+ * identified by the six clipping planes left, right, bottom, top,
+ * near, far.
+ *
+ * To apply this projection to a vector, multiply the vector by the
+ * created matrix using rsMatrixMultiply().
+ *
+ * Parameters:
+ *   m The matrix to set.
+ */
+extern void __attribute__((overloadable))
+    rsMatrixLoadFrustum(rs_matrix4x4* m, float left, float right, float bottom, float top,
+                        float near, float far);
+
+/*
+ * rsMatrixLoadIdentity: Load identity matrix
+ *
+ * Set the elements of a matrix to the identity matrix.
+ *
+ * Parameters:
+ *   m The matrix to set.
+ */
+extern void __attribute__((overloadable))
+    rsMatrixLoadIdentity(rs_matrix4x4* m);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoadIdentity(rs_matrix3x3* m);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoadIdentity(rs_matrix2x2* m);
+
+/*
+ * rsMatrixLoadMultiply: Multiply two matrices
+ *
+ * Sets m to the matrix product of lhs * rhs.
+ *
+ * To combine two 4x4 transformaton matrices, multiply the second transformation matrix
+ * by the first transformation matrix.  E.g. to create a transformation matrix that applies
+ * the transformation s1 followed by s2, call
+ * rsMatrixLoadMultiply(&combined, &s2, &s1).
+ *
+ * Warning: Prior to version 21, storing the result back into right matrix is not supported and
+ * will result in undefined behavior.  Use rsMatrixMulitply instead.   E.g. instead of doing
+ * rsMatrixLoadMultiply (&m2r, &m2r, &m2l), use rsMatrixMultiply (&m2r, &m2l).
+ * rsMatrixLoadMultiply (&m2l, &m2r, &m2l) works as expected.
+ *
+ * Parameters:
+ *   m The matrix to set.
+ *   lhs The left matrix of the product.
+ *   rhs The right matrix of the product.
+ */
+extern void __attribute__((overloadable))
+    rsMatrixLoadMultiply(rs_matrix4x4* m, const rs_matrix4x4* lhs, const rs_matrix4x4* rhs);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoadMultiply(rs_matrix3x3* m, const rs_matrix3x3* lhs, const rs_matrix3x3* rhs);
+
+extern void __attribute__((overloadable))
+    rsMatrixLoadMultiply(rs_matrix2x2* m, const rs_matrix2x2* lhs, const rs_matrix2x2* rhs);
+
+/*
+ * rsMatrixLoadOrtho: Load an orthographic projection matrix
+ *
+ * Constructs an orthographic projection matrix, transforming the box
+ * identified by the six clipping planes left, right, bottom, top,
+ * near, far into a unit cube with a corner at
+ * (-1, -1, -1) and the opposite at (1, 1, 1).
+ *
+ * To apply this projection to a vector, multiply the vector by the
+ * created matrix using rsMatrixMultiply().
+ *
+ * See https://en.wikipedia.org/wiki/Orthographic_projection .
+ *
+ * Parameters:
+ *   m The matrix to set.
+ */
+extern void __attribute__((overloadable))
+    rsMatrixLoadOrtho(rs_matrix4x4* m, float left, float right, float bottom, float top, float near,
+                      float far);
+
+/*
+ * rsMatrixLoadPerspective: Load a perspective projection matrix
+ *
+ * Constructs a perspective projection matrix, assuming a symmetrical field of view.
+ *
+ * To apply this projection to a vector, multiply the vector by the
+ * created matrix using rsMatrixMultiply().
+ *
+ * Parameters:
+ *   m The matrix to set.
+ *   fovy Field of view, in degrees along the Y axis.
+ *   aspect Ratio of x / y.
+ *   near The near clipping plane.
+ *   far The far clipping plane.
+ */
+extern void __attribute__((overloadable))
+    rsMatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far);
+
+/*
+ * rsMatrixLoadRotate: Load a rotation matrix
  *
  * This function creates a rotation matrix.  The axis of rotation is the
- * <em>(x, y, z)</em> vector.
+ * (x, y, z) vector.
  *
  * To rotate a vector, multiply the vector by the created matrix
- * using \ref rsMatrixMultiply.
+ * using rsMatrixMultiply().
  *
  * See http://en.wikipedia.org/wiki/Rotation_matrix .
  *
- * @param m The matrix to set.
- * @param rot How much rotation to do, in degrees.
- * @param x The x component of the vector that is the axis of rotation.
- * @param y The y component of the vector that is the axis of rotation.
- * @param z The z component of the vector that is the axis of rotation.
+ * Parameters:
+ *   m The matrix to set.
+ *   rot How much rotation to do, in degrees.
+ *   x The x component of the vector that is the axis of rotation.
+ *   y The y component of the vector that is the axis of rotation.
+ *   z The z component of the vector that is the axis of rotation.
  */
 extern void __attribute__((overloadable))
-rsMatrixLoadRotate(rs_matrix4x4 *m, float rot, float x, float y, float z);
+    rsMatrixLoadRotate(rs_matrix4x4* m, float rot, float x, float y, float z);
 
-/**
- * Load a scale matrix.
+/*
+ * rsMatrixLoadScale: Load a scaling matrix
  *
  * This function creates a scaling matrix, where each component of a
  * vector is multiplied by a number.  This number can be negative.
  *
  * To scale a vector, multiply the vector by the created matrix
- * using \ref rsMatrixMultiply.
+ * using rsMatrixMultiply().
  *
- * @param m The matrix to set.
- * @param x The multiple to scale the x components by.
- * @param y The multiple to scale the y components by.
- * @param z The multiple to scale the z components by.
+ * Parameters:
+ *   m The matrix to set.
+ *   x The multiple to scale the x components by.
+ *   y The multiple to scale the y components by.
+ *   z The multiple to scale the z components by.
  */
 extern void __attribute__((overloadable))
-rsMatrixLoadScale(rs_matrix4x4 *m, float x, float y, float z);
+    rsMatrixLoadScale(rs_matrix4x4* m, float x, float y, float z);
 
-/**
- * Load a translation matrix.
+/*
+ * rsMatrixLoadTranslate: Load a translation matrix
  *
  * This function creates a translation matrix, where a
  * number is added to each element of a vector.
  *
  * To translate a vector, multiply the vector by the created matrix
- * using \ref rsMatrixMultiply.
+ * using rsMatrixMultiply().
  *
- * @param m The matrix to set.
- * @param x The number to add to each x component.
- * @param y The number to add to each y component.
- * @param z The number to add to each z component.
+ * Parameters:
+ *   m The matrix to set.
+ *   x The number to add to each x component.
+ *   y The number to add to each y component.
+ *   z The number to add to each z component.
  */
 extern void __attribute__((overloadable))
-rsMatrixLoadTranslate(rs_matrix4x4 *m, float x, float y, float z);
+    rsMatrixLoadTranslate(rs_matrix4x4* m, float x, float y, float z);
 
-/**
- * Multiply two matrices.
+/*
+ * rsMatrixMultiply: Multiply a matrix by a vector or another matrix
  *
- * Sets \e m to the matrix product of <em>lhs * rhs</em>.
- *
- * To combine two 4x4 transformaton matrices, multiply the second transformation matrix
- * by the first transformation matrix.  E.g. to create a transformation matrix that applies
- * the transformation \e s1 followed by \e s2, call
- * </c>rsMatrixLoadMultiply(&combined, &s2, &s1)</c>.
- *
- * \warning Prior to version 21, storing the result back into right matrix is not supported and
- * will result in undefined behavior.  Use rsMatrixMulitply instead.   E.g. instead of doing
- * rsMatrixLoadMultiply (&m2r, &m2r, &m2l), use rsMatrixMultiply (&m2r, &m2l).
- * rsMatrixLoadMultiply (&m2l, &m2r, &m2l) works as expected.
- *
- * @param m The matrix to set.
- * @param lhs The left matrix of the product.
- * @param rhs The right matrix of the product.
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs);
-
-/**
- * Multiply a matrix into another one.
- *
- * Sets \e m to the matrix product <em>m * rhs</em>.
+ * For the matrix by matrix variant, sets m to the matrix product m * rhs.
  *
  * When combining two 4x4 transformation matrices using this function, the resulting
- * matrix will correspond to performing the \e rhs transformation first followed by
- * the original \e m transformation.
+ * matrix will correspond to performing the rhs transformation first followed by
+ * the original m transformation.
  *
- * @param m The left matrix of the product and the matrix to be set.
- * @param rhs The right matrix of the product.
+ * For the matrix by vector variant, returns the post-multiplication of the vector
+ * by the matrix, ie. m * in.
+ *
+ * When multiplying a float3 to a rs_matrix4x4, the vector is expanded with (1).
+ *
+ * When multiplying a float2 to a rs_matrix4x4, the vector is expanded with (0, 1).
+ *
+ * When multiplying a float2 to a rs_matrix3x3, the vector is expanded with (0).
+ *
+ * Starting with API 14, this function takes a const matrix as the first argument.
+ *
+ * Parameters:
+ *   m The left matrix of the product and the matrix to be set.
+ *   rhs The right matrix of the product.
  */
 extern void __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *rhs);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *rhs);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *rhs);
+    rsMatrixMultiply(rs_matrix4x4* m, const rs_matrix4x4* rhs);
 
-/**
- * Multiply the matrix \e m with a rotation matrix.
+extern void __attribute__((overloadable))
+    rsMatrixMultiply(rs_matrix3x3* m, const rs_matrix3x3* rhs);
+
+extern void __attribute__((overloadable))
+    rsMatrixMultiply(rs_matrix2x2* m, const rs_matrix2x2* rhs);
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 13)
+extern float4 __attribute__((overloadable))
+    rsMatrixMultiply(rs_matrix4x4* m, float4 in);
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 13)
+extern float4 __attribute__((overloadable))
+    rsMatrixMultiply(rs_matrix4x4* m, float3 in);
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 13)
+extern float4 __attribute__((overloadable))
+    rsMatrixMultiply(rs_matrix4x4* m, float2 in);
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 13)
+extern float3 __attribute__((overloadable))
+    rsMatrixMultiply(rs_matrix3x3* m, float3 in);
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 13)
+extern float3 __attribute__((overloadable))
+    rsMatrixMultiply(rs_matrix3x3* m, float2 in);
+#endif
+
+#if !defined(RS_VERSION) || (RS_VERSION <= 13)
+extern float2 __attribute__((overloadable))
+    rsMatrixMultiply(rs_matrix2x2* m, float2 in);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern float4 __attribute__((overloadable))
+    rsMatrixMultiply(const rs_matrix4x4* m, float4 in);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern float4 __attribute__((overloadable))
+    rsMatrixMultiply(const rs_matrix4x4* m, float3 in);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern float4 __attribute__((overloadable))
+    rsMatrixMultiply(const rs_matrix4x4* m, float2 in);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern float3 __attribute__((overloadable))
+    rsMatrixMultiply(const rs_matrix3x3* m, float3 in);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern float3 __attribute__((overloadable))
+    rsMatrixMultiply(const rs_matrix3x3* m, float2 in);
+#endif
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
+extern float2 __attribute__((overloadable))
+    rsMatrixMultiply(const rs_matrix2x2* m, float2 in);
+#endif
+
+/*
+ * rsMatrixRotate: Apply a rotation to a transformation matrix
+ *
+ * Multiply the matrix m with a rotation matrix.
  *
  * This function modifies a transformation matrix to first do a rotation.
- * The axis of rotation is the <em>(x, y, z)</em> vector.
+ * The axis of rotation is the (x, y, z) vector.
  *
  * To apply this combined transformation to a vector, multiply
- * the vector by the created matrix using \ref rsMatrixMultiply.
+ * the vector by the created matrix using rsMatrixMultiply().
  *
- * @param m The matrix to modify.
- * @param rot How much rotation to do, in degrees.
- * @param x The x component of the vector that is the axis of rotation.
- * @param y The y component of the vector that is the axis of rotation.
- * @param z The z component of the vector that is the axis of rotation.
+ * Parameters:
+ *   m The matrix to modify.
+ *   rot How much rotation to do, in degrees.
+ *   x The x component of the vector that is the axis of rotation.
+ *   y The y component of the vector that is the axis of rotation.
+ *   z The z component of the vector that is the axis of rotation.
  */
 extern void __attribute__((overloadable))
-rsMatrixRotate(rs_matrix4x4 *m, float rot, float x, float y, float z);
+    rsMatrixRotate(rs_matrix4x4* m, float rot, float x, float y, float z);
 
-/**
- * Multiply the matrix \e m with a scaling matrix.
+/*
+ * rsMatrixScale: Apply a scaling to a transformation matrix
+ *
+ * Multiply the matrix m with a scaling matrix.
  *
  * This function modifies a transformation matrix to first do a scaling.
  * When scaling, each component of a vector is multiplied by a number.
  * This number can be negative.
  *
  * To apply this combined transformation to a vector, multiply
- * the vector by the created matrix using \ref rsMatrixMultiply.
+ * the vector by the created matrix using rsMatrixMultiply().
  *
- * @param m The matrix to modify.
- * @param x The multiple to scale the x components by.
- * @param y The multiple to scale the y components by.
- * @param z The multiple to scale the z components by.
+ * Parameters:
+ *   m The matrix to modify.
+ *   x The multiple to scale the x components by.
+ *   y The multiple to scale the y components by.
+ *   z The multiple to scale the z components by.
  */
 extern void __attribute__((overloadable))
-rsMatrixScale(rs_matrix4x4 *m, float x, float y, float z);
+    rsMatrixScale(rs_matrix4x4* m, float x, float y, float z);
 
-/**
- * Multiply the matrix \e m with a translation matrix.
+/*
+ * rsMatrixSet: Set one element
+ *
+ * Set an element of a matrix.
+ *
+ * Warning: The order of the column and row parameters may be unexpected.
+ *
+ * Parameters:
+ *   m The matrix that will be modified.
+ *   col The zero-based column of the element to be set.
+ *   row The zero-based row of the element to be set.
+ *   v The value to set.
+ */
+extern void __attribute__((overloadable))
+    rsMatrixSet(rs_matrix4x4* m, uint32_t col, uint32_t row, float v);
+
+extern void __attribute__((overloadable))
+    rsMatrixSet(rs_matrix3x3* m, uint32_t col, uint32_t row, float v);
+
+extern void __attribute__((overloadable))
+    rsMatrixSet(rs_matrix2x2* m, uint32_t col, uint32_t row, float v);
+
+/*
+ * rsMatrixTranslate: Apply a translation to a transformation matrix
+ *
+ * Multiply the matrix m with a translation matrix.
  *
  * This function modifies a transformation matrix to first
  * do a translation.  When translating, a number is added
  * to each component of a vector.
  *
  * To apply this combined transformation to a vector, multiply
- * the vector by the created matrix using \ref rsMatrixMultiply.
+ * the vector by the created matrix using rsMatrixMultiply().
  *
- * @param m The matrix to modify.
- * @param x The number to add to each x component.
- * @param y The number to add to each y component.
- * @param z The number to add to each z component.
+ * Parameters:
+ *   m The matrix to modify.
+ *   x The number to add to each x component.
+ *   y The number to add to each y component.
+ *   z The number to add to each z component.
  */
 extern void __attribute__((overloadable))
-rsMatrixTranslate(rs_matrix4x4 *m, float x, float y, float z);
+    rsMatrixTranslate(rs_matrix4x4* m, float x, float y, float z);
 
-/**
- * Load an orthographic projection matrix.
+/*
+ * rsMatrixTranspose: Transpose a matrix place
  *
- * Constructs an orthographic projection matrix, transforming the box
- * identified by the six clipping planes <em>left, right, bottom, top,
- * near, far</em> into a unit cube with a corner at
- * <em>(-1, -1, -1)</em> and the opposite at <em>(1, 1, 1)</em>.
- *
- * To apply this projection to a vector, multiply the vector by the
- * created matrix using \ref rsMatrixMultiply.
- *
- * See https://en.wikipedia.org/wiki/Orthographic_projection .
- *
- * @param m The matrix to set.
- * @param left
- * @param right
- * @param bottom
- * @param top
- * @param near
- * @param far
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadOrtho(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far);
-
-/**
- * Load a frustum projection matrix.
- *
- * Constructs a frustum projection matrix, transforming the box
- * identified by the six clipping planes <em>left, right, bottom, top,
- * near, far</em>.
- *
- * To apply this projection to a vector, multiply the vector by the
- * created matrix using \ref rsMatrixMultiply.
- *
- * @param m The matrix to set.
- * @param left
- * @param right
- * @param bottom
- * @param top
- * @param near
- * @param far
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadFrustum(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far);
-
-/**
- * Load a perspective projection matrix.
- *
- * Constructs a perspective projection matrix, assuming a symmetrical field of view.
- *
- * To apply this projection to a vector, multiply the vector by the
- * created matrix using \ref rsMatrixMultiply.
- *
- * @param m The matrix to set.
- * @param fovy Field of view, in degrees along the Y axis.
- * @param aspect Ratio of x / y.
- * @param near The near clipping plane.
- * @param far The far clipping plane.
- */
-extern void __attribute__((overloadable))
-rsMatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far);
-
-#if !defined(RS_VERSION) || (RS_VERSION < 14)
-/**
- * Multiply a vector by a matrix.
- *
- * Returns the post-multiplication of the vector by the matrix, ie. <em>m * in</em>.
- *
- * When multiplying a \e float3 to a \e rs_matrix4x4, the vector is expanded with (1).
- *
- * When multiplying a \e float2 to a \e rs_matrix4x4, the vector is expanded with (0, 1).
- *
- * When multiplying a \e float2 to a \e rs_matrix3x3, the vector is expanded with (0).
- *
- * This function is available in API version 10-13.  Starting with API 14,
- * the function takes a const matrix as the first argument.
- */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float4 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float3 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix4x4 *m, float2 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float3 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, float3 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float3 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix3x3 *m, float2 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float2 __attribute__((overloadable))
-rsMatrixMultiply(rs_matrix2x2 *m, float2 in);
-#else
-/**
- * Multiply a vector by a matrix.
- *
- * Returns the post-multiplication of the vector of the matrix, i.e. <em>m * in</em>.
- *
- * When multiplying a \e float3 to a \e rs_matrix4x4, the vector is expanded with (1).
- *
- * When multiplying a \e float2 to a \e rs_matrix4x4, the vector is expanded with (0, 1).
- *
- * When multiplying a \e float2 to a \e rs_matrix3x3, the vector is expanded with (0).
- *
- * This function is available starting with API version 14.
- */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float4 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float3 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float4 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix4x4 *m, float2 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float3 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix3x3 *m, float3 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float3 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix3x3 *m, float2 in);
-
-/**
- * \overload
- */
-_RS_RUNTIME float2 __attribute__((overloadable))
-rsMatrixMultiply(const rs_matrix2x2 *m, float2 in);
-#endif
-
-
-/**
- * Inverts a matrix in place.
- *
- * Returns true if the matrix was successfully inverted.
- *
- * @param m The matrix to invert.
- */
-extern bool __attribute__((overloadable)) rsMatrixInverse(rs_matrix4x4 *m);
-
-/**
- * Inverts and transpose a matrix in place.
- *
- * The matrix is first inverted then transposed.
- * Returns true if the matrix was successfully inverted.
- *
- * @param m The matrix to modify.
- */
-extern bool __attribute__((overloadable)) rsMatrixInverseTranspose(rs_matrix4x4 *m);
-
-/**
  * Transpose the matrix m in place.
  *
- * @param m The matrix to transpose.
+ * Parameters:
+ *   m The matrix to transpose.
  */
-extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix4x4 *m);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix3x3 *m);
-/**
- * \overload
- */
-extern void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix2x2 *m);
+extern void __attribute__((overloadable))
+    rsMatrixTranspose(rs_matrix4x4* m);
 
+extern void __attribute__((overloadable))
+    rsMatrixTranspose(rs_matrix3x3* m);
 
-#endif
+extern void __attribute__((overloadable))
+    rsMatrixTranspose(rs_matrix2x2* m);
+
+#endif // RENDERSCRIPT_RS_MATRIX_RSH
diff --git a/renderscript/include/rs_mesh.rsh b/renderscript/include/rs_mesh.rsh
index 0ecd786..c404a5f 100644
--- a/renderscript/include/rs_mesh.rsh
+++ b/renderscript/include/rs_mesh.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,75 +14,86 @@
  * limitations under the License.
  */
 
-/** @file rs_mesh.rsh
- *  \brief Mesh routines
- *
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_mesh.rsh: Mesh routines
  *
  */
+#ifndef RENDERSCRIPT_RS_MESH_RSH
+#define RENDERSCRIPT_RS_MESH_RSH
 
-#ifndef __RS_MESH_RSH__
-#define __RS_MESH_RSH__
-
-// New API's
-#if (defined(RS_VERSION) && (RS_VERSION >= 16))
-
-/**
- * Returns the number of allocations in the mesh that contain
- * vertex data
- *
- * @param m mesh to get data from
- * @return number of allocations in the mesh that contain vertex
- *         data
- */
-extern uint32_t __attribute__((overloadable))
-    rsgMeshGetVertexAllocationCount(rs_mesh m);
-
-/**
- * Meshes could have multiple index sets, this function returns
- * the number.
- *
- * @param m mesh to get data from
- * @return number of primitive groups in the mesh. This would
- *         include simple primitives as well as allocations
- *         containing index data
- */
-extern uint32_t __attribute__((overloadable))
-    rsgMeshGetPrimitiveCount(rs_mesh m);
-
-/**
- * Returns an allocation that is part of the mesh and contains
- * vertex data, e.g. positions, normals, texcoords
- *
- * @param m mesh to get data from
- * @param index index of the vertex allocation
- * @return allocation containing vertex data
- */
-extern rs_allocation __attribute__((overloadable))
-    rsgMeshGetVertexAllocation(rs_mesh m, uint32_t index);
-
-/**
+/*
  * Returns an allocation containing index data or a null
  * allocation if only the primitive is specified
  *
- * @param m mesh to get data from
- * @param index index of the index allocation
- * @return allocation containing index data
+ * Parameters:
+ *   m mesh to get data from
+ *   index index of the index allocation
+ *
+ * Returns: allocation containing index data
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 extern rs_allocation __attribute__((overloadable))
     rsgMeshGetIndexAllocation(rs_mesh m, uint32_t index);
+#endif
 
-/**
+/*
  * Returns the primitive describing how a part of the mesh is
  * rendered
  *
- * @param m mesh to get data from
- * @param index index of the primitive
- * @return primitive describing how the mesh is rendered
+ * Parameters:
+ *   m mesh to get data from
+ *   index index of the primitive
+ *
+ * Returns: primitive describing how the mesh is rendered
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 extern rs_primitive __attribute__((overloadable))
     rsgMeshGetPrimitive(rs_mesh m, uint32_t index);
+#endif
 
-#endif // (defined(RS_VERSION) && (RS_VERSION >= 16))
+/*
+ * Meshes could have multiple index sets, this function returns
+ * the number.
+ *
+ * Parameters:
+ *   m mesh to get data from
+ *
+ * Returns: number of primitive groups in the mesh. This would include simple primitives as well as allocations containing index data
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern uint32_t __attribute__((overloadable))
+    rsgMeshGetPrimitiveCount(rs_mesh m);
+#endif
 
-#endif // __RS_MESH_RSH__
+/*
+ * Returns an allocation that is part of the mesh and contains
+ * vertex data, e.g. positions, normals, texcoords
+ *
+ * Parameters:
+ *   m mesh to get data from
+ *   index index of the vertex allocation
+ *
+ * Returns: allocation containing vertex data
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_allocation __attribute__((overloadable))
+    rsgMeshGetVertexAllocation(rs_mesh m, uint32_t index);
+#endif
 
+/*
+ * Returns the number of allocations in the mesh that contain
+ * vertex data
+ *
+ * Parameters:
+ *   m mesh to get data from
+ *
+ * Returns: number of allocations in the mesh that contain vertex data
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern uint32_t __attribute__((overloadable))
+    rsgMeshGetVertexAllocationCount(rs_mesh m);
+#endif
+
+#endif // RENDERSCRIPT_RS_MESH_RSH
diff --git a/renderscript/include/rs_object.rsh b/renderscript/include/rs_object.rsh
index ed6423b..c7205e3 100644
--- a/renderscript/include/rs_object.rsh
+++ b/renderscript/include/rs_object.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,216 +14,160 @@
  * limitations under the License.
  */
 
-/** @file rs_object.rsh
- *  \brief Object routines
- *
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_object.rsh: Object routines
  *
  */
+#ifndef RENDERSCRIPT_RS_OBJECT_RSH
+#define RENDERSCRIPT_RS_OBJECT_RSH
 
-#ifndef __RS_OBJECT_RSH__
-#define __RS_OBJECT_RSH__
-
-
-/**
- * Copy reference to the specified object.
+/*
+ * rsClearObject: For internal use.
  *
- * @param dst
- * @param src
  */
 extern void __attribute__((overloadable))
-    rsSetObject(rs_element *dst, rs_element src);
-/**
- * \overload
- */
+    rsClearObject(rs_element* dst);
+
 extern void __attribute__((overloadable))
-    rsSetObject(rs_type *dst, rs_type src);
-/**
- * \overload
- */
+    rsClearObject(rs_type* dst);
+
 extern void __attribute__((overloadable))
-    rsSetObject(rs_allocation *dst, rs_allocation src);
-/**
- * \overload
- */
+    rsClearObject(rs_allocation* dst);
+
 extern void __attribute__((overloadable))
-    rsSetObject(rs_sampler *dst, rs_sampler src);
-/**
- * \overload
- */
+    rsClearObject(rs_sampler* dst);
+
 extern void __attribute__((overloadable))
-    rsSetObject(rs_script *dst, rs_script src);
+    rsClearObject(rs_script* dst);
 
 #ifndef __LP64__
-/**
- * \overload
- */
 extern void __attribute__((overloadable))
-    rsSetObject(rs_path *dst, rs_path src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_mesh *dst, rs_mesh src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_program_fragment *dst, rs_program_fragment src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_program_vertex *dst, rs_program_vertex src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_program_raster *dst, rs_program_raster src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_program_store *dst, rs_program_store src);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsSetObject(rs_font *dst, rs_font src);
-#endif //__LP64__
-
-/**
- * Sets the object to NULL.
- *
- * @return bool
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_element *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_type *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_allocation *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_sampler *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_script *dst);
-
-
-#ifndef __LP64__
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_path *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_mesh *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_program_fragment *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_program_vertex *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_program_raster *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_program_store *dst);
-/**
- * \overload
- */
-extern void __attribute__((overloadable))
-    rsClearObject(rs_font *dst);
-#endif //__LP64__
-
-
-/**
- * Tests if the object is valid.  Returns true if the object is valid, false if
- * it is NULL.
- *
- * @return bool
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_element);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_type);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_allocation);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_sampler);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_script);
-
-#ifndef __LP64__
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_path);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_mesh);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_program_fragment);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_program_vertex);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_program_raster);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_program_store);
-/**
- * \overload
- */
-extern bool __attribute__((overloadable))
-    rsIsObject(rs_font);
-#endif //__LP64__
-
+    rsClearObject(rs_mesh* dst);
 #endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_fragment* dst);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_vertex* dst);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_raster* dst);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_store* dst);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsClearObject(rs_font* dst);
+#endif
+
+/*
+ * rsIsObject: For internal use.
+ *
+ */
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_element v);
+
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_type v);
+
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_allocation v);
+
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_sampler v);
+
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_script v);
+
+#ifndef __LP64__
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_mesh v);
+#endif
+
+#ifndef __LP64__
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_fragment v);
+#endif
+
+#ifndef __LP64__
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_vertex v);
+#endif
+
+#ifndef __LP64__
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_raster v);
+#endif
+
+#ifndef __LP64__
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_store v);
+#endif
+
+#ifndef __LP64__
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_font v);
+#endif
+
+/*
+ * rsSetObject: For internal use.
+ *
+ */
+extern void __attribute__((overloadable))
+    rsSetObject(rs_element* dst, rs_element src);
+
+extern void __attribute__((overloadable))
+    rsSetObject(rs_type* dst, rs_type src);
+
+extern void __attribute__((overloadable))
+    rsSetObject(rs_allocation* dst, rs_allocation src);
+
+extern void __attribute__((overloadable))
+    rsSetObject(rs_sampler* dst, rs_sampler src);
+
+extern void __attribute__((overloadable))
+    rsSetObject(rs_script* dst, rs_script src);
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsSetObject(rs_mesh* dst, rs_mesh src);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_fragment* dst, rs_program_fragment src);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_vertex* dst, rs_program_vertex src);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_raster* dst, rs_program_raster src);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_store* dst, rs_program_store src);
+#endif
+
+#ifndef __LP64__
+extern void __attribute__((overloadable))
+    rsSetObject(rs_font* dst, rs_font src);
+#endif
+
+#endif // RENDERSCRIPT_RS_OBJECT_RSH
diff --git a/renderscript/include/rs_program.rsh b/renderscript/include/rs_program.rsh
index 299aae6..78a9848 100644
--- a/renderscript/include/rs_program.rsh
+++ b/renderscript/include/rs_program.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,105 +14,134 @@
  * limitations under the License.
  */
 
-/** @file rs_program.rsh
- *  \brief Program object routines
- *
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_program.rsh: Program object routines
  *
  */
+#ifndef RENDERSCRIPT_RS_PROGRAM_RSH
+#define RENDERSCRIPT_RS_PROGRAM_RSH
 
-#ifndef __RS_PROGRAM_RSH__
-#define __RS_PROGRAM_RSH__
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 16))
-
-/**
- * Get program store depth function
- *
- * @param ps program store to query
- */
-extern rs_depth_func __attribute__((overloadable))
-    rsgProgramStoreGetDepthFunc(rs_program_store ps);
-
-/**
- * Get program store depth mask
- *
- * @param ps program store to query
- */
-extern bool __attribute__((overloadable))
-    rsgProgramStoreIsDepthMaskEnabled(rs_program_store ps);
-/**
- * Get program store red component color mask
- *
- * @param ps program store to query
- */
-extern bool __attribute__((overloadable))
-    rsgProgramStoreIsColorMaskRedEnabled(rs_program_store ps);
-
-/**
- * Get program store green component color mask
- *
- * @param ps program store to query
- */
-extern bool __attribute__((overloadable))
-    rsgProgramStoreIsColorMaskGreenEnabled(rs_program_store ps);
-
-/**
- * Get program store blur component color mask
- *
- * @param ps program store to query
- */
-extern bool __attribute__((overloadable))
-    rsgProgramStoreIsColorMaskBlueEnabled(rs_program_store ps);
-
-/**
- * Get program store alpha component color mask
- *
- * @param ps program store to query
- */
-extern bool __attribute__((overloadable))
-    rsgProgramStoreIsColorMaskAlphaEnabled(rs_program_store ps);
-
-/**
- * Get program store blend source function
- *
- * @param ps program store to query
- */
-extern rs_blend_src_func __attribute__((overloadable))
-        rsgProgramStoreGetBlendSrcFunc(rs_program_store ps);
-
-/**
- * Get program store blend destination function
- *
- * @param ps program store to query
- */
-extern rs_blend_dst_func __attribute__((overloadable))
-    rsgProgramStoreGetBlendDstFunc(rs_program_store ps);
-
-/**
- * Get program store dither state
- *
- * @param ps program store to query
- */
-extern bool __attribute__((overloadable))
-    rsgProgramStoreIsDitherEnabled(rs_program_store ps);
-
-/**
- * Get program raster point sprite state
- *
- * @param pr program raster to query
- */
-extern bool __attribute__((overloadable))
-    rsgProgramRasterIsPointSpriteEnabled(rs_program_raster pr);
-
-/**
+/*
  * Get program raster cull mode
  *
- * @param pr program raster to query
+ * Parameters:
+ *   pr program raster to query
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 extern rs_cull_mode __attribute__((overloadable))
     rsgProgramRasterGetCullMode(rs_program_raster pr);
+#endif
 
-#endif // (defined(RS_VERSION) && (RS_VERSION >= 16))
+/*
+ * Get program raster point sprite state
+ *
+ * Parameters:
+ *   pr program raster to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern bool __attribute__((overloadable))
+    rsgProgramRasterIsPointSpriteEnabled(rs_program_raster pr);
+#endif
 
-#endif // __RS_PROGRAM_RSH__
+/*
+ * Get program store blend destination function
+ *
+ * Parameters:
+ *   ps program store to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_blend_dst_func __attribute__((overloadable))
+    rsgProgramStoreGetBlendDstFunc(rs_program_store ps);
+#endif
 
+/*
+ * Get program store blend source function
+ *
+ * Parameters:
+ *   ps program store to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_blend_src_func __attribute__((overloadable))
+    rsgProgramStoreGetBlendSrcFunc(rs_program_store ps);
+#endif
+
+/*
+ * Get program store depth function
+ *
+ * Parameters:
+ *   ps program store to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_depth_func __attribute__((overloadable))
+    rsgProgramStoreGetDepthFunc(rs_program_store ps);
+#endif
+
+/*
+ * Get program store alpha component color mask
+ *
+ * Parameters:
+ *   ps program store to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern bool __attribute__((overloadable))
+    rsgProgramStoreIsColorMaskAlphaEnabled(rs_program_store ps);
+#endif
+
+/*
+ * Get program store blur component color mask
+ *
+ * Parameters:
+ *   ps program store to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern bool __attribute__((overloadable))
+    rsgProgramStoreIsColorMaskBlueEnabled(rs_program_store ps);
+#endif
+
+/*
+ * Get program store green component color mask
+ *
+ * Parameters:
+ *   ps program store to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern bool __attribute__((overloadable))
+    rsgProgramStoreIsColorMaskGreenEnabled(rs_program_store ps);
+#endif
+
+/*
+ * Get program store red component color mask
+ *
+ * Parameters:
+ *   ps program store to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern bool __attribute__((overloadable))
+    rsgProgramStoreIsColorMaskRedEnabled(rs_program_store ps);
+#endif
+
+/*
+ * Get program store depth mask
+ *
+ * Parameters:
+ *   ps program store to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern bool __attribute__((overloadable))
+    rsgProgramStoreIsDepthMaskEnabled(rs_program_store ps);
+#endif
+
+/*
+ * Get program store dither state
+ *
+ * Parameters:
+ *   ps program store to query
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern bool __attribute__((overloadable))
+    rsgProgramStoreIsDitherEnabled(rs_program_store ps);
+#endif
+
+#endif // RENDERSCRIPT_RS_PROGRAM_RSH
diff --git a/renderscript/include/rs_quaternion.rsh b/renderscript/include/rs_quaternion.rsh
index 4e08d2f..c6ece96 100644
--- a/renderscript/include/rs_quaternion.rsh
+++ b/renderscript/include/rs_quaternion.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,80 +14,101 @@
  * limitations under the License.
  */
 
-/** @file rs_quaternion.rsh
- *  \brief Quaternion routines
- *
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_quaternion.rsh: Quaternion routines
  *
  */
+#ifndef RENDERSCRIPT_RS_QUATERNION_RSH
+#define RENDERSCRIPT_RS_QUATERNION_RSH
 
-#ifndef __RS_QUATERNION_RSH__
-#define __RS_QUATERNION_RSH__
-
-
-/**
- * Set the quaternion components
- * @param w component
- * @param x component
- * @param y component
- * @param z component
- */
-static void __attribute__((overloadable))
-rsQuaternionSet(rs_quaternion *q, float w, float x, float y, float z) {
-    q->w = w;
-    q->x = x;
-    q->y = y;
-    q->z = z;
-}
-
-/**
- * Set the quaternion from another quaternion
- * @param q destination quaternion
- * @param rhs source quaternion
- */
-static void __attribute__((overloadable))
-rsQuaternionSet(rs_quaternion *q, const rs_quaternion *rhs) {
-    q->w = rhs->w;
-    q->x = rhs->x;
-    q->y = rhs->y;
-    q->z = rhs->z;
-}
-
-/**
- * Multiply quaternion by a scalar
- * @param q quaternion to multiply
- * @param s scalar
- */
-static void __attribute__((overloadable))
-rsQuaternionMultiply(rs_quaternion *q, float s) {
-    q->w *= s;
-    q->x *= s;
-    q->y *= s;
-    q->z *= s;
-}
-
-/**
+/*
  * Add two quaternions
- * @param q destination quaternion to add to
- * @param rsh right hand side quaternion to add
+ *
+ * Parameters:
+ *   q destination quaternion to add to
+ *   rhs right hand side quaternion to add
  */
-static void
-rsQuaternionAdd(rs_quaternion *q, const rs_quaternion *rhs) {
+static inline void __attribute__((overloadable))
+    rsQuaternionAdd(rs_quaternion* q, const rs_quaternion* rhs) {
     q->w *= rhs->w;
     q->x *= rhs->x;
     q->y *= rhs->y;
     q->z *= rhs->z;
 }
 
-/**
- * Loads a quaternion that represents a rotation about an arbitrary unit vector
- * @param q quaternion to set
- * @param rot angle to rotate by
- * @param x component of a vector
- * @param y component of a vector
- * @param x component of a vector
+/*
+ * Conjugates the quaternion
+ *
+ * Parameters:
+ *   q quaternion to conjugate
  */
-static void
-rsQuaternionLoadRotateUnit(rs_quaternion *q, float rot, float x, float y, float z) {
+static inline void __attribute__((overloadable))
+    rsQuaternionConjugate(rs_quaternion* q) {
+    q->x = -q->x;
+    q->y = -q->y;
+    q->z = -q->z;
+}
+
+/*
+ * Dot product of two quaternions
+ *
+ * Parameters:
+ *   q0 first quaternion
+ *   q1 second quaternion
+ *
+ * Returns: dot product between q0 and q1
+ */
+static inline float __attribute__((overloadable))
+    rsQuaternionDot(const rs_quaternion* q0, const rs_quaternion* q1) {
+    return q0->w*q1->w + q0->x*q1->x + q0->y*q1->y + q0->z*q1->z;
+}
+
+/*
+ * Computes rotation matrix from the normalized quaternion
+ *
+ * Parameters:
+ *   m resulting matrix
+ *   q normalized quaternion
+ */
+static inline void __attribute__((overloadable))
+    rsQuaternionGetMatrixUnit(rs_matrix4x4* m, const rs_quaternion* q) {
+    float xx = q->x * q->x;
+    float xy = q->x * q->y;
+    float xz = q->x * q->z;
+    float xw = q->x * q->w;
+    float yy = q->y * q->y;
+    float yz = q->y * q->z;
+    float yw = q->y * q->w;
+    float zz = q->z * q->z;
+    float zw = q->z * q->w;
+
+    m->m[0]  = 1.0f - 2.0f * ( yy + zz );
+    m->m[4]  =        2.0f * ( xy - zw );
+    m->m[8]  =        2.0f * ( xz + yw );
+    m->m[1]  =        2.0f * ( xy + zw );
+    m->m[5]  = 1.0f - 2.0f * ( xx + zz );
+    m->m[9]  =        2.0f * ( yz - xw );
+    m->m[2]  =        2.0f * ( xz - yw );
+    m->m[6]  =        2.0f * ( yz + xw );
+    m->m[10] = 1.0f - 2.0f * ( xx + yy );
+    m->m[3]  = m->m[7] = m->m[11] = m->m[12] = m->m[13] = m->m[14] = 0.0f;
+    m->m[15] = 1.0f;
+}
+
+/*
+ * Loads a quaternion that represents a rotation about an arbitrary unit vector
+ *
+ * Parameters:
+ *   q quaternion to set
+ *   rot rot angle to rotate by
+ *   x component of a vector
+ *   y component of a vector
+ *   z component of a vector
+ */
+static inline void __attribute__((overloadable))
+    rsQuaternionLoadRotateUnit(rs_quaternion* q, float rot, float x, float y, float z) {
     rot *= (float)(M_PI / 180.0f) * 0.5f;
     float c = cos(rot);
     float s = sin(rot);
@@ -98,17 +119,46 @@
     q->z = z * s;
 }
 
-/**
+/*
+ * Set the quaternion from components or from another quaternion.
+ *
+ * Parameters:
+ *   q destination quaternion
+ *   w component
+ *   x component
+ *   y component
+ *   z component
+ *   rhs source quaternion
+ */
+static inline void __attribute__((overloadable))
+    rsQuaternionSet(rs_quaternion* q, float w, float x, float y, float z) {
+    q->w = w;
+    q->x = x;
+    q->y = y;
+    q->z = z;
+}
+
+static inline void __attribute__((overloadable))
+    rsQuaternionSet(rs_quaternion* q, const rs_quaternion* rhs) {
+    q->w = rhs->w;
+    q->x = rhs->x;
+    q->y = rhs->y;
+    q->z = rhs->z;
+}
+
+/*
  * Loads a quaternion that represents a rotation about an arbitrary vector
  * (doesn't have to be unit)
- * @param q quaternion to set
- * @param rot angle to rotate by
- * @param x component of a vector
- * @param y component of a vector
- * @param x component of a vector
+ *
+ * Parameters:
+ *   q quaternion to set
+ *   rot angle to rotate by
+ *   x component of a vector
+ *   y component of a vector
+ *   z component of a vector
  */
-static void
-rsQuaternionLoadRotate(rs_quaternion *q, float rot, float x, float y, float z) {
+static inline void __attribute__((overloadable))
+    rsQuaternionLoadRotate(rs_quaternion* q, float rot, float x, float y, float z) {
     const float len = x*x + y*y + z*z;
     if (len != 1) {
         const float recipLen = 1.f / sqrt(len);
@@ -119,48 +169,42 @@
     rsQuaternionLoadRotateUnit(q, rot, x, y, z);
 }
 
-/**
- * Conjugates the quaternion
- * @param q quaternion to conjugate
- */
-static void
-rsQuaternionConjugate(rs_quaternion *q) {
-    q->x = -q->x;
-    q->y = -q->y;
-    q->z = -q->z;
-}
-
-/**
- * Dot product of two quaternions
- * @param q0 first quaternion
- * @param q1 second quaternion
- * @return dot product between q0 and q1
- */
-static float
-rsQuaternionDot(const rs_quaternion *q0, const rs_quaternion *q1) {
-    return q0->w*q1->w + q0->x*q1->x + q0->y*q1->y + q0->z*q1->z;
-}
-
-/**
+/*
  * Normalizes the quaternion
- * @param q quaternion to normalize
+ *
+ * Parameters:
+ *   q quaternion to normalize
  */
-static void
-rsQuaternionNormalize(rs_quaternion *q) {
+static inline void __attribute__((overloadable))
+    rsQuaternionNormalize(rs_quaternion* q) {
     const float len = rsQuaternionDot(q, q);
     if (len != 1) {
         const float recipLen = 1.f / sqrt(len);
-        rsQuaternionMultiply(q, recipLen);
+        q->w *= recipLen;
+        q->x *= recipLen;
+        q->y *= recipLen;
+        q->z *= recipLen;
     }
 }
 
-/**
- * Multiply quaternion by another quaternion
- * @param q destination quaternion
- * @param rhs right hand side quaternion to multiply by
+/*
+ * Multiply quaternion by a scalar or another quaternion
+ *
+ * Parameters:
+ *   q destination quaternion
+ *   s scalar
+ *   rhs right hand side quaternion to multiply by
  */
-static void __attribute__((overloadable))
-rsQuaternionMultiply(rs_quaternion *q, const rs_quaternion *rhs) {
+static inline void __attribute__((overloadable))
+    rsQuaternionMultiply(rs_quaternion* q, float s) {
+    q->w *= s;
+    q->x *= s;
+    q->y *= s;
+    q->z *= s;
+}
+
+static inline void __attribute__((overloadable))
+    rsQuaternionMultiply(rs_quaternion* q, const rs_quaternion* rhs) {
     rs_quaternion qtmp;
     rsQuaternionSet(&qtmp, q);
 
@@ -171,15 +215,17 @@
     rsQuaternionNormalize(q);
 }
 
-/**
+/*
  * Performs spherical linear interpolation between two quaternions
- * @param q result quaternion from interpolation
- * @param q0 first param
- * @param q1 second param
- * @param t how much to interpolate by
+ *
+ * Parameters:
+ *   q result quaternion from interpolation
+ *   q0 first param
+ *   q1 second param
+ *   t how much to interpolate by
  */
-static void
-rsQuaternionSlerp(rs_quaternion *q, const rs_quaternion *q0, const rs_quaternion *q1, float t) {
+static inline void __attribute__((overloadable))
+    rsQuaternionSlerp(rs_quaternion* q, const rs_quaternion* q0, const rs_quaternion* q1, float t) {
     if (t <= 0.0f) {
         rsQuaternionSet(q, q0);
         return;
@@ -220,34 +266,4 @@
                         tempq0.y*scale + tempq1.y*invScale, tempq0.z*scale + tempq1.z*invScale);
 }
 
-/**
- * Computes rotation matrix from the normalized quaternion
- * @param m resulting matrix
- * @param p normalized quaternion
- */
-static void rsQuaternionGetMatrixUnit(rs_matrix4x4 *m, const rs_quaternion *q) {
-    float xx = q->x * q->x;
-    float xy = q->x * q->y;
-    float xz = q->x * q->z;
-    float xw = q->x * q->w;
-    float yy = q->y * q->y;
-    float yz = q->y * q->z;
-    float yw = q->y * q->w;
-    float zz = q->z * q->z;
-    float zw = q->z * q->w;
-
-    m->m[0]  = 1.0f - 2.0f * ( yy + zz );
-    m->m[4]  =        2.0f * ( xy - zw );
-    m->m[8]  =        2.0f * ( xz + yw );
-    m->m[1]  =        2.0f * ( xy + zw );
-    m->m[5]  = 1.0f - 2.0f * ( xx + zz );
-    m->m[9]  =        2.0f * ( yz - xw );
-    m->m[2]  =        2.0f * ( xz - yw );
-    m->m[6]  =        2.0f * ( yz + xw );
-    m->m[10] = 1.0f - 2.0f * ( xx + yy );
-    m->m[3]  = m->m[7] = m->m[11] = m->m[12] = m->m[13] = m->m[14] = 0.0f;
-    m->m[15] = 1.0f;
-}
-
-#endif
-
+#endif // RENDERSCRIPT_RS_QUATERNION_RSH
diff --git a/renderscript/include/rs_sampler.rsh b/renderscript/include/rs_sampler.rsh
index 2ff426c..4b1b778 100644
--- a/renderscript/include/rs_sampler.rsh
+++ b/renderscript/include/rs_sampler.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,64 +14,78 @@
  * limitations under the License.
  */
 
-/** @file rs_sampler.rsh
- *  \brief Sampler routines
- *
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_sampler.rsh: Sampler routines
  *
  */
+#ifndef RENDERSCRIPT_RS_SAMPLER_RSH
+#define RENDERSCRIPT_RS_SAMPLER_RSH
 
-#ifndef __RS_SAMPLER_RSH__
-#define __RS_SAMPLER_RSH__
-
-// New API's
+/*
+ *  Get sampler anisotropy
+ *
+ * Parameters:
+ *   s sampler to query
+ *
+ * Returns: anisotropy
+ */
 #if (defined(RS_VERSION) && (RS_VERSION >= 16))
-
-/**
- * Get sampler minification value
- *
- * @param s sampler to query
- * @return minification value
- */
-extern rs_sampler_value __attribute__((overloadable))
-    rsSamplerGetMinification(rs_sampler s);
-
-/**
- * Get sampler magnification value
- *
- * @param s sampler to query
- * @return magnification value
- */
-extern rs_sampler_value __attribute__((overloadable))
-    rsSamplerGetMagnification(rs_sampler s);
-
-/**
- * Get sampler wrap S value
- *
- * @param s sampler to query
- * @return wrap S value
- */
-extern rs_sampler_value __attribute__((overloadable))
-    rsSamplerGetWrapS(rs_sampler s);
-
-/**
- * Get sampler wrap T value
- *
- * @param s sampler to query
- * @return wrap T value
- */
-extern rs_sampler_value __attribute__((overloadable))
-    rsSamplerGetWrapT(rs_sampler s);
-
-/**
-  Get sampler anisotropy
- *
- * @param s sampler to query
- * @return anisotropy
- */
 extern float __attribute__((overloadable))
     rsSamplerGetAnisotropy(rs_sampler s);
+#endif
 
-#endif // (defined(RS_VERSION) && (RS_VERSION >= 16))
+/*
+ * Get sampler magnification value
+ *
+ * Parameters:
+ *   s sampler to query
+ *
+ * Returns: magnification value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_sampler_value __attribute__((overloadable))
+    rsSamplerGetMagnification(rs_sampler s);
+#endif
 
-#endif // __RS_SAMPLER_RSH__
+/*
+ * Get sampler minification value
+ *
+ * Parameters:
+ *   s sampler to query
+ *
+ * Returns: minification value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_sampler_value __attribute__((overloadable))
+    rsSamplerGetMinification(rs_sampler s);
+#endif
 
+/*
+ * Get sampler wrap S value
+ *
+ * Parameters:
+ *   s sampler to query
+ *
+ * Returns: wrap S value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_sampler_value __attribute__((overloadable))
+    rsSamplerGetWrapS(rs_sampler s);
+#endif
+
+/*
+ * Get sampler wrap T value
+ *
+ * Parameters:
+ *   s sampler to query
+ *
+ * Returns: wrap T value
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
+extern rs_sampler_value __attribute__((overloadable))
+    rsSamplerGetWrapT(rs_sampler s);
+#endif
+
+#endif // RENDERSCRIPT_RS_SAMPLER_RSH
diff --git a/renderscript/include/rs_time.rsh b/renderscript/include/rs_time.rsh
index abcb88b..3a4acf2 100644
--- a/renderscript/include/rs_time.rsh
+++ b/renderscript/include/rs_time.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,102 +14,97 @@
  * limitations under the License.
  */
 
-/** @file rs_time.rsh
- *  \brief RenderScript time routines
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_time.rsh: RenderScript time routines
  *
- *  This file contains RenderScript functions relating to time and date
- *  manipulation.
+ * This file contains RenderScript functions relating to time and date manipulation.
  */
+#ifndef RENDERSCRIPT_RS_TIME_RSH
+#define RENDERSCRIPT_RS_TIME_RSH
 
-#ifndef __RS_TIME_RSH__
-#define __RS_TIME_RSH__
-
-/**
+/*
+ * rs_time_t: Seconds since January 1, 1970
+ *
  * Calendar time interpreted as seconds elapsed since the Epoch (00:00:00 on
  * January 1, 1970, Coordinated Universal Time (UTC)).
  */
 #ifndef __LP64__
 typedef int rs_time_t;
-#else
+#endif
+
+#ifdef __LP64__
 typedef long rs_time_t;
 #endif
 
-/**
- * Data structure for broken-down time components.
+/*
+ * rs_tm: Date and time structure
  *
- * tm_sec   - Seconds after the minute. This ranges from 0 to 59, but possibly
- *            up to 60 for leap seconds.
- * tm_min   - Minutes after the hour. This ranges from 0 to 59.
- * tm_hour  - Hours past midnight. This ranges from 0 to 23.
- * tm_mday  - Day of the month. This ranges from 1 to 31.
- * tm_mon   - Months since January. This ranges from 0 to 11.
- * tm_year  - Years since 1900.
- * tm_wday  - Days since Sunday. This ranges from 0 to 6.
- * tm_yday  - Days since January 1. This ranges from 0 to 365.
- * tm_isdst - Flag to indicate whether daylight saving time is in effect. The
- *            value is positive if it is in effect, zero if it is not, and
- *            negative if the information is not available.
+ * Data structure for broken-down time components.
  */
 typedef struct {
-    int tm_sec;     ///< seconds
-    int tm_min;     ///< minutes
-    int tm_hour;    ///< hours
-    int tm_mday;    ///< day of the month
-    int tm_mon;     ///< month
-    int tm_year;    ///< year
-    int tm_wday;    ///< day of the week
-    int tm_yday;    ///< day of the year
-    int tm_isdst;   ///< daylight savings time
+    int tm_sec; // Seconds after the minute. This ranges from 0 to 59, but possibly up to 60 for leap seconds.
+    int tm_min; // Minutes after the hour. This ranges from 0 to 59.
+    int tm_hour; // Hours past midnight. This ranges from 0 to 23.
+    int tm_mday; // Day of the month. This ranges from 1 to 31.
+    int tm_mon; // Months since January. This ranges from 0 to 11.
+    int tm_year; // Years since 1900.
+    int tm_wday; // Days since Sunday. This ranges from 0 to 6.
+    int tm_yday; // Days since January 1. This ranges from 0 to 365.
+    int tm_isdst; // Flag to indicate whether daylight saving time is in effect. The value is positive if it is in effect, zero if it is not, and negative if the information is not available.
 } rs_tm;
 
-/**
- * Returns the number of seconds since the Epoch (00:00:00 UTC, January 1,
- * 1970). If @p timer is non-NULL, the result is also stored in the memory
- * pointed to by this variable. If an error occurs, a value of -1 is returned.
- *
- * @param timer Location to also store the returned calendar time.
- *
- * @return Seconds since the Epoch.
- */
-extern rs_time_t __attribute__((overloadable))
-    rsTime(rs_time_t *timer);
-
-/**
- * Converts the time specified by @p timer into broken-down time and stores it
- * in @p local. This function also returns a pointer to @p local. If @p local
- * is NULL, this function does nothing and returns NULL.
- *
- * @param local Broken-down time.
- * @param timer Input time as calendar time.
- *
- * @return Pointer to broken-down time (same as input @p local).
- */
-extern rs_tm * __attribute__((overloadable))
-    rsLocaltime(rs_tm *local, const rs_time_t *timer);
-
-/**
- * Returns the current system clock (uptime) in milliseconds.
- *
- * @return Uptime in milliseconds.
- */
-extern int64_t __attribute__((overloadable))
-    rsUptimeMillis(void);
-
-/**
- * Returns the current system clock (uptime) in nanoseconds.
- *
- * @return Uptime in nanoseconds.
- */
-extern int64_t __attribute__((overloadable))
-    rsUptimeNanos(void);
-
-/**
+/*
  * Returns the time in seconds since this function was last called in this
  * script.
  *
- * @return Time in seconds.
+ * Returns: Time in seconds.
  */
 extern float __attribute__((overloadable))
     rsGetDt(void);
 
-#endif
+/*
+ * Converts the time specified by p timer into broken-down time and stores it
+ * in p local. This function also returns a pointer to p local. If p local
+ * is NULL, this function does nothing and returns NULL.
+ *
+ * Parameters:
+ *   local Broken-down time.
+ *   timer Input time as calendar time.
+ *
+ * Returns: Pointer to broken-down time (same as input p local).
+ */
+extern rs_tm* __attribute__((overloadable))
+    rsLocaltime(rs_tm* local, const rs_time_t* timer);
+
+/*
+ * Returns the number of seconds since the Epoch (00:00:00 UTC, January 1,
+ * 1970). If p timer is non-NULL, the result is also stored in the memory
+ * pointed to by this variable. If an error occurs, a value of -1 is returned.
+ *
+ * Parameters:
+ *   timer Location to also store the returned calendar time.
+ *
+ * Returns: Seconds since the Epoch.
+ */
+extern rs_time_t __attribute__((overloadable))
+    rsTime(rs_time_t* timer);
+
+/*
+ * Returns the current system clock (uptime) in milliseconds.
+ *
+ * Returns: Uptime in milliseconds.
+ */
+extern int64_t __attribute__((overloadable))
+    rsUptimeMillis(void);
+
+/*
+ * Returns the current system clock (uptime) in nanoseconds.
+ *
+ * Returns: Uptime in nanoseconds.
+ */
+extern int64_t __attribute__((overloadable))
+    rsUptimeNanos(void);
+
+#endif // RENDERSCRIPT_RS_TIME_RSH
diff --git a/renderscript/include/rs_types.rsh b/renderscript/include/rs_types.rsh
index f1fc60b..b174933 100644
--- a/renderscript/include/rs_types.rsh
+++ b/renderscript/include/rs_types.rsh
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,362 +14,614 @@
  * limitations under the License.
  */
 
-/** @file rs_types.rsh
+// Don't edit this file!  It is auto-generated by frameworks/rs/api/gen_runtime.
+
+/*
+ * rs_types.rsh: Standard RenderScript types
  *
- *  Define the standard RenderScript types
- *
- *  Integers
+ *  Integers:
  *  8 bit: char, int8_t
  *  16 bit: short, int16_t
  *  32 bit: int, in32_t
  *  64 bit: long, long long, int64_t
  *
- *  Unsigned Integers
+ *  Unsigned integers:
  *  8 bit: uchar, uint8_t
  *  16 bit: ushort, uint16_t
  *  32 bit: uint, uint32_t
  *  64 bit: ulong, uint64_t
  *
- *  Floating point
+ *  Floating point:
  *  32 bit: float
  *  64 bit: double
  *
  *  Vectors of length 2, 3, and 4 are supported for all the types above.
- *
  */
-
-#ifndef __RS_TYPES_RSH__
-#define __RS_TYPES_RSH__
-
-/* Constants */
-#define M_E         2.718281828459045235360287471352662498f     /* e */
-#define M_LOG2E     1.442695040888963407359924681001892137f     /* log_2 e */
-#define M_LOG10E    0.434294481903251827651128918916605082f     /* log_10 e */
-#define M_LN2       0.693147180559945309417232121458176568f     /* log_e 2 */
-#define M_LN10      2.302585092994045684017991454684364208f     /* log_e 10 */
-#define M_PI        3.141592653589793238462643383279502884f     /* pi */
-#define M_PI_2      1.570796326794896619231321691639751442f     /* pi/2 */
-#define M_PI_4      0.785398163397448309615660845819875721f     /* pi/4 */
-#define M_1_PI      0.318309886183790671537767526745028724f     /* 1/pi */
-#define M_2_PIl     0.636619772367581343075535053490057448f     /* 2/pi */
-#define M_2_SQRTPI  1.128379167095512573896158903121545172f     /* 2/sqrt(pi) */
-#define M_SQRT2     1.414213562373095048801688724209698079f     /* sqrt(2) */
-#define M_SQRT1_2   0.707106781186547524400844362104849039f     /* 1/sqrt(2) */
+#ifndef RENDERSCRIPT_RS_TYPES_RSH
+#define RENDERSCRIPT_RS_TYPES_RSH
 
 #include "stdbool.h"
-/**
+
+#define RS_PACKED __attribute__((packed, aligned(4)))
+#define NULL ((void *)0)
+
+// Opaque handle to a RenderScript object. Do not use this directly.
+#ifndef __LP64__
+#define _RS_HANDLE \
+struct {\
+  const int* const p;\
+} __attribute__((packed, aligned(4)))
+#else
+#define _RS_HANDLE \
+struct {\
+  const long* const p;\
+  const long* const r;\
+  const long* const v1;\
+  const long* const v2;\
+}
+#endif
+
+/*
+ * M_1_PI: 1 / pi, as a 32 bit float
+ *
+ * The inverse of pi, as a 32 bit float.
+ */
+#define M_1_PI 0.318309886183790671537767526745028724f
+
+/*
+ * M_2_PI: 2 / pi, as a 32 bit float
+ *
+ * 2 divided by pi, as a 32 bit float.
+ */
+#define M_2_PI 0.636619772367581343075535053490057448f
+
+/*
+ * M_2_PIl: Deprecated.  Use M_2_PI instead.
+ *
+ */
+#define M_2_PIl 0.636619772367581343075535053490057448f
+
+/*
+ * M_2_SQRTPI: 2 / sqrt(pi), as a 32 bit float
+ *
+ * 2 divided by the square root of pi, as a 32 bit float.
+ */
+#define M_2_SQRTPI 1.128379167095512573896158903121545172f
+
+/*
+ * M_E: e, as a 32 bit float
+ *
+ * The number e, the base of the natural logarithm, as a 32 bit float.
+ */
+#define M_E 2.718281828459045235360287471352662498f
+
+/*
+ * M_LN10: log_e(10), as a 32 bit float
+ *
+ * The natural logarithm of 10, as a 32 bit float.
+ */
+#define M_LN10 2.302585092994045684017991454684364208f
+
+/*
+ * M_LN2: log_e(2), as a 32 bit float
+ *
+ * The natural logarithm of 2, as a 32 bit float.
+ */
+#define M_LN2 0.693147180559945309417232121458176568f
+
+/*
+ * M_LOG10E: log_10(e), as a 32 bit float
+ *
+ * The logarithm base 10 of e, as a 32 bit float.
+ */
+#define M_LOG10E 0.434294481903251827651128918916605082f
+
+/*
+ * M_LOG2E: log_2(e), as a 32 bit float
+ *
+ * The logarithm base 2 of e, as a 32 bit float.
+ */
+#define M_LOG2E 1.442695040888963407359924681001892137f
+
+/*
+ * M_PI: pi, as a 32 bit float
+ *
+ * The constant pi, as a 32 bit float.
+ */
+#define M_PI 3.141592653589793238462643383279502884f
+
+/*
+ * M_PI_2: pi / 2, as a 32 bit float
+ *
+ * Pi divided by 2, as a 32 bit float.
+ */
+#define M_PI_2 1.570796326794896619231321691639751442f
+
+/*
+ * M_PI_4: pi / 4, as a 32 bit float
+ *
+ * Pi divided by 4, as a 32 bit float.
+ */
+#define M_PI_4 0.785398163397448309615660845819875721f
+
+/*
+ * M_SQRT1_2: 1 / sqrt(2), as a 32 bit float
+ *
+ * The inverse of the square root of 2, as a 32 bit float.
+ */
+#define M_SQRT1_2 0.707106781186547524400844362104849039f
+
+/*
+ * M_SQRT2: sqrt(2), as a 32 bit float
+ *
+ * The square root of 2, as a 32 bit float.
+ */
+#define M_SQRT2 1.414213562373095048801688724209698079f
+
+/*
+ * int8_t: 8 bit signed integer
+ *
  * 8 bit integer type
  */
 typedef char int8_t;
-/**
+
+/*
+ * int16_t: 16 bit signed integer
+ *
  * 16 bit integer type
  */
 typedef short int16_t;
-/**
+
+/*
+ * int32_t: 32 bit signed integer
+ *
  * 32 bit integer type
  */
 typedef int int32_t;
-/**
+
+/*
+ * int64_t: 64 bit signed integer
+ *
  * 64 bit integer type
  */
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-    typedef long int64_t;
-#else
-    typedef long long int64_t;
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+typedef long long int64_t;
 #endif
-/**
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+typedef long int64_t;
+#endif
+
+/*
+ * uint8_t: 8 bit unsigned integer
+ *
  * 8 bit unsigned integer type
  */
 typedef unsigned char uint8_t;
-/**
+
+/*
+ * uint16_t: 16 bit unsigned integer
+ *
  * 16 bit unsigned integer type
  */
 typedef unsigned short uint16_t;
-/**
+
+/*
+ * uint32_t: 32 bit unsigned integer
+ *
  * 32 bit unsigned integer type
  */
 typedef unsigned int uint32_t;
-/**
+
+/*
+ * uint64_t: 64 bit unsigned integer
+ *
  * 64 bit unsigned integer type
  */
-#if (defined(RS_VERSION) && (RS_VERSION >= 21))
-    typedef unsigned long uint64_t;
-#else
-    typedef unsigned long long uint64_t;
+#if !defined(RS_VERSION) || (RS_VERSION <= 20)
+typedef unsigned long long uint64_t;
 #endif
-/**
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 21))
+typedef unsigned long uint64_t;
+#endif
+
+/*
+ * uchar: 8 bit unsigned integer
+ *
  * 8 bit unsigned integer type
  */
 typedef uint8_t uchar;
-/**
+
+/*
+ * ushort: 16 bit unsigned integer
+ *
  * 16 bit unsigned integer type
  */
 typedef uint16_t ushort;
-/**
+
+/*
+ * uint: 32 bit unsigned integer
+ *
  * 32 bit unsigned integer type
  */
 typedef uint32_t uint;
-/**
+
+/*
+ * ulong: 64 bit unsigned integer
+ *
  * Typedef for unsigned long (use for 64-bit unsigned integers)
  */
 typedef uint64_t ulong;
-/**
+
+/*
+ * size_t: Unsigned size type
+ *
  * Typedef for size_t
  */
+#ifdef __LP64__
+typedef uint64_t size_t;
+#endif
+
 #ifndef __LP64__
 typedef uint32_t size_t;
-typedef int32_t ssize_t;
-#else
-typedef uint64_t size_t;
+#endif
+
+/*
+ * ssize_t: Signed size type
+ *
+ * Typedef for ssize_t
+ */
+#ifdef __LP64__
 typedef int64_t ssize_t;
 #endif
 
 #ifndef __LP64__
-#define RS_BASE_OBJ typedef struct { const int* const p; } __attribute__((packed, aligned(4)))
-#else
-#define RS_BASE_OBJ typedef struct { const long* const p; const long* const r; const long* const v1; const long* const v2; }
+typedef int32_t ssize_t;
 #endif
 
-/**
- * \brief Opaque handle to a RenderScript element.
+/*
+ * rs_element: Handle to an element
  *
+ * Opaque handle to a RenderScript element.
  * See: android.renderscript.Element
  */
-RS_BASE_OBJ rs_element;
-/**
- * \brief Opaque handle to a RenderScript type.
+typedef _RS_HANDLE rs_element;
+
+/*
+ * rs_type: Handle to a Type
  *
+ * Opaque handle to a RenderScript type.
  * See: android.renderscript.Type
  */
-RS_BASE_OBJ rs_type;
-/**
- * \brief Opaque handle to a RenderScript allocation.
+typedef _RS_HANDLE rs_type;
+
+/*
+ * rs_allocation: Handle to an allocation
  *
+ * Opaque handle to a RenderScript allocation.
  * See: android.renderscript.Allocation
  */
-RS_BASE_OBJ rs_allocation;
-/**
- * \brief Opaque handle to a RenderScript sampler object.
+typedef _RS_HANDLE rs_allocation;
+
+/*
+ * rs_sampler: Handle to a Sampler
  *
+ * Opaque handle to a RenderScript sampler object.
  * See: android.renderscript.Sampler
  */
-RS_BASE_OBJ rs_sampler;
-/**
- * \brief Opaque handle to a RenderScript script object.
+typedef _RS_HANDLE rs_sampler;
+
+/*
+ * rs_script: Handle to a Script
  *
+ * Opaque handle to a RenderScript script object.
  * See: android.renderscript.ScriptC
  */
-RS_BASE_OBJ rs_script;
+typedef _RS_HANDLE rs_script;
 
-#ifndef __LP64__
-/**
- * \brief Opaque handle to a RenderScript mesh object.
+/*
+ * rs_mesh: Handle to a Mesh
  *
+ * Opaque handle to a RenderScript mesh object.
  * See: android.renderscript.Mesh
  */
-typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_mesh;
-/**
- * \brief Opaque handle to a RenderScript Path object.
+#ifndef __LP64__
+typedef _RS_HANDLE rs_mesh;
+#endif
+
+/*
+ * rs_program_fragment: Handle to a ProgramFragment
  *
- * See: android.renderscript.Path
- */
-typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_path;
-/**
- * \brief Opaque handle to a RenderScript ProgramFragment object.
- *
+ * Opaque handle to a RenderScript ProgramFragment object.
  * See: android.renderscript.ProgramFragment
  */
-typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_program_fragment;
-/**
- * \brief Opaque handle to a RenderScript ProgramVertex object.
+#ifndef __LP64__
+typedef _RS_HANDLE rs_program_fragment;
+#endif
+
+/*
+ * rs_program_vertex: Handle to a ProgramVertex
  *
+ * Opaque handle to a RenderScript ProgramVertex object.
  * See: android.renderscript.ProgramVertex
  */
-typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_program_vertex;
-/**
- * \brief Opaque handle to a RenderScript ProgramRaster object.
+#ifndef __LP64__
+typedef _RS_HANDLE rs_program_vertex;
+#endif
+
+/*
+ * rs_program_raster: Handle to a ProgramRaster
  *
+ * Opaque handle to a RenderScript ProgramRaster object.
  * See: android.renderscript.ProgramRaster
  */
-typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_program_raster;
-/**
- * \brief Opaque handle to a RenderScript ProgramStore object.
+#ifndef __LP64__
+typedef _RS_HANDLE rs_program_raster;
+#endif
+
+/*
+ * rs_program_store: Handle to a ProgramStore
  *
+ * Opaque handle to a RenderScript ProgramStore object.
  * See: android.renderscript.ProgramStore
  */
-typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_program_store;
-/**
- * \brief Opaque handle to a RenderScript font object.
+#ifndef __LP64__
+typedef _RS_HANDLE rs_program_store;
+#endif
+
+/*
+ * rs_font: Handle to a Font
  *
+ * Opaque handle to a RenderScript font object.
  * See: android.renderscript.Font
  */
-typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_font;
-#endif // __LP64__
+#ifndef __LP64__
+typedef _RS_HANDLE rs_font;
+#endif
 
-/**
+/*
+ * float2: Two 32 bit floats
+ *
  * Vector version of the basic float type.
- * Provides two float fields packed into a single 64 bit field with 64 bit
- * alignment.
+ * Provides two float fields packed into a single 64 bit field with 64 bit alignment.
  */
-typedef float float2 __attribute__((ext_vector_type(2)));
-/**
- * Vector version of the basic float type. Provides three float fields packed
- * into a single 128 bit field with 128 bit alignment.
- */
-typedef float float3 __attribute__((ext_vector_type(3)));
-/**
- * Vector version of the basic float type.
- * Provides four float fields packed into a single 128 bit field with 128 bit
- * alignment.
- */
-typedef float float4 __attribute__((ext_vector_type(4)));
+typedef float __attribute__((ext_vector_type(2))) float2;
 
-/**
+/*
+ * float3: Three 32 bit floats
+ *
+ * Vector version of the basic float type.
+ * Provides three float fields packed into a single 128 bit field with 128 bit alignment.
+ */
+typedef float __attribute__((ext_vector_type(3))) float3;
+
+/*
+ * float4: Four 32 bit floats
+ *
+ * Vector version of the basic float type.
+ * Provides four float fields packed into a single 128 bit field with 128 bit alignment.
+ */
+typedef float __attribute__((ext_vector_type(4))) float4;
+
+/*
+ * double2: Two 64 bit floats
+ *
  * Vector version of the basic double type. Provides two double fields packed
  * into a single 128 bit field with 128 bit alignment.
  */
-typedef double double2 __attribute__((ext_vector_type(2)));
-/**
+typedef double __attribute__((ext_vector_type(2))) double2;
+
+/*
+ * double3: Three 64 bit floats
+ *
  * Vector version of the basic double type. Provides three double fields packed
  * into a single 256 bit field with 256 bit alignment.
  */
-typedef double double3 __attribute__((ext_vector_type(3)));
-/**
+typedef double __attribute__((ext_vector_type(3))) double3;
+
+/*
+ * double4: Four 64 bit floats
+ *
  * Vector version of the basic double type. Provides four double fields packed
  * into a single 256 bit field with 256 bit alignment.
  */
-typedef double double4 __attribute__((ext_vector_type(4)));
+typedef double __attribute__((ext_vector_type(4))) double4;
 
-/**
+/*
+ * uchar2: Two 8 bit unsigned integers
+ *
  * Vector version of the basic uchar type. Provides two uchar fields packed
  * into a single 16 bit field with 16 bit alignment.
  */
-typedef uchar uchar2 __attribute__((ext_vector_type(2)));
-/**
+typedef uchar __attribute__((ext_vector_type(2))) uchar2;
+
+/*
+ * uchar3: Three 8 bit unsigned integers
+ *
  * Vector version of the basic uchar type. Provides three uchar fields packed
  * into a single 32 bit field with 32 bit alignment.
  */
-typedef uchar uchar3 __attribute__((ext_vector_type(3)));
-/**
+typedef uchar __attribute__((ext_vector_type(3))) uchar3;
+
+/*
+ * uchar4: Four 8 bit unsigned integers
+ *
  * Vector version of the basic uchar type. Provides four uchar fields packed
  * into a single 32 bit field with 32 bit alignment.
  */
-typedef uchar uchar4 __attribute__((ext_vector_type(4)));
+typedef uchar __attribute__((ext_vector_type(4))) uchar4;
 
-/**
+/*
+ * ushort2: Two 16 bit unsigned integers
+ *
  * Vector version of the basic ushort type. Provides two ushort fields packed
  * into a single 32 bit field with 32 bit alignment.
  */
-typedef ushort ushort2 __attribute__((ext_vector_type(2)));
-/**
+typedef ushort __attribute__((ext_vector_type(2))) ushort2;
+
+/*
+ * ushort3: Three 16 bit unsigned integers
+ *
  * Vector version of the basic ushort type. Provides three ushort fields packed
  * into a single 64 bit field with 64 bit alignment.
  */
-typedef ushort ushort3 __attribute__((ext_vector_type(3)));
-/**
+typedef ushort __attribute__((ext_vector_type(3))) ushort3;
+
+/*
+ * ushort4: Four 16 bit unsigned integers
+ *
  * Vector version of the basic ushort type. Provides four ushort fields packed
  * into a single 64 bit field with 64 bit alignment.
  */
-typedef ushort ushort4 __attribute__((ext_vector_type(4)));
+typedef ushort __attribute__((ext_vector_type(4))) ushort4;
 
-/**
+/*
+ * uint2: Two 32 bit unsigned integers
+ *
  * Vector version of the basic uint type. Provides two uint fields packed into a
  * single 64 bit field with 64 bit alignment.
  */
-typedef uint uint2 __attribute__((ext_vector_type(2)));
-/**
+typedef uint __attribute__((ext_vector_type(2))) uint2;
+
+/*
+ * uint3: Three 32 bit unsigned integers
+ *
  * Vector version of the basic uint type. Provides three uint fields packed into
  * a single 128 bit field with 128 bit alignment.
  */
-typedef uint uint3 __attribute__((ext_vector_type(3)));
-/**
+typedef uint __attribute__((ext_vector_type(3))) uint3;
+
+/*
+ * uint4: Four 32 bit unsigned integers
+ *
  * Vector version of the basic uint type. Provides four uint fields packed into
  * a single 128 bit field with 128 bit alignment.
  */
-typedef uint uint4 __attribute__((ext_vector_type(4)));
+typedef uint __attribute__((ext_vector_type(4))) uint4;
 
-/**
+/*
+ * ulong2: Two 64 bit unsigned integers
+ *
  * Vector version of the basic ulong type. Provides two ulong fields packed into
  * a single 128 bit field with 128 bit alignment.
  */
-typedef ulong ulong2 __attribute__((ext_vector_type(2)));
-/**
+typedef ulong __attribute__((ext_vector_type(2))) ulong2;
+
+/*
+ * ulong3: Three 64 bit unsigned integers
+ *
  * Vector version of the basic ulong type. Provides three ulong fields packed
  * into a single 256 bit field with 256 bit alignment.
  */
-typedef ulong ulong3 __attribute__((ext_vector_type(3)));
-/**
+typedef ulong __attribute__((ext_vector_type(3))) ulong3;
+
+/*
+ * ulong4: Four 64 bit unsigned integers
+ *
  * Vector version of the basic ulong type. Provides four ulong fields packed
  * into a single 256 bit field with 256 bit alignment.
  */
-typedef ulong ulong4 __attribute__((ext_vector_type(4)));
+typedef ulong __attribute__((ext_vector_type(4))) ulong4;
 
-/**
+/*
+ * char2: Two 8 bit signed integers
+ *
  * Vector version of the basic char type. Provides two char fields packed into a
  * single 16 bit field with 16 bit alignment.
  */
-typedef char char2 __attribute__((ext_vector_type(2)));
-/**
+typedef char __attribute__((ext_vector_type(2))) char2;
+
+/*
+ * char3: Three 8 bit signed integers
+ *
  * Vector version of the basic char type. Provides three char fields packed into
  * a single 32 bit field with 32 bit alignment.
  */
-typedef char char3 __attribute__((ext_vector_type(3)));
-/**
+typedef char __attribute__((ext_vector_type(3))) char3;
+
+/*
+ * char4: Four 8 bit signed integers
+ *
  * Vector version of the basic char type. Provides four char fields packed into
  * a single 32 bit field with 32 bit alignment.
  */
-typedef char char4 __attribute__((ext_vector_type(4)));
+typedef char __attribute__((ext_vector_type(4))) char4;
 
-/**
+/*
+ * short2: Two 16 bit signed integers
+ *
  * Vector version of the basic short type. Provides two short fields packed into
  * a single 32 bit field with 32 bit alignment.
  */
-typedef short short2 __attribute__((ext_vector_type(2)));
-/**
+typedef short __attribute__((ext_vector_type(2))) short2;
+
+/*
+ * short3: Three 16 bit signed integers
+ *
  * Vector version of the basic short type. Provides three short fields packed
  * into a single 64 bit field with 64 bit alignment.
  */
-typedef short short3 __attribute__((ext_vector_type(3)));
-/**
+typedef short __attribute__((ext_vector_type(3))) short3;
+
+/*
+ * short4: Four 16 bit signed integers
+ *
  * Vector version of the basic short type. Provides four short fields packed
  * into a single 64 bit field with 64 bit alignment.
  */
-typedef short short4 __attribute__((ext_vector_type(4)));
+typedef short __attribute__((ext_vector_type(4))) short4;
 
-/**
+/*
+ * int2: Two 32 bit signed integers
+ *
  * Vector version of the basic int type. Provides two int fields packed into a
  * single 64 bit field with 64 bit alignment.
  */
-typedef int int2 __attribute__((ext_vector_type(2)));
-/**
+typedef int __attribute__((ext_vector_type(2))) int2;
+
+/*
+ * int3: Three 32 bit signed integers
+ *
  * Vector version of the basic int type. Provides three int fields packed into a
  * single 128 bit field with 128 bit alignment.
  */
-typedef int int3 __attribute__((ext_vector_type(3)));
-/**
+typedef int __attribute__((ext_vector_type(3))) int3;
+
+/*
+ * int4: Four 32 bit signed integers
+ *
  * Vector version of the basic int type. Provides two four fields packed into a
  * single 128 bit field with 128 bit alignment.
  */
-typedef int int4 __attribute__((ext_vector_type(4)));
+typedef int __attribute__((ext_vector_type(4))) int4;
 
-/**
+/*
+ * long2: Two 64 bit signed integers
+ *
  * Vector version of the basic long type. Provides two long fields packed into a
  * single 128 bit field with 128 bit alignment.
  */
-typedef long long2 __attribute__((ext_vector_type(2)));
-/**
+typedef long __attribute__((ext_vector_type(2))) long2;
+
+/*
+ * long3: Three 64 bit signed integers
+ *
  * Vector version of the basic long type. Provides three long fields packed into
  * a single 256 bit field with 256 bit alignment.
  */
-typedef long long3 __attribute__((ext_vector_type(3)));
-/**
+typedef long __attribute__((ext_vector_type(3))) long3;
+
+/*
+ * long4: Four 64 bit signed integers
+ *
  * Vector version of the basic long type. Provides four long fields packed into
  * a single 256 bit field with 256 bit alignment.
  */
-typedef long long4 __attribute__((ext_vector_type(4)));
+typedef long __attribute__((ext_vector_type(4))) long4;
 
-/**
- * \brief 4x4 float matrix
+/*
+ * rs_matrix4x4: 4x4 matrix of 32 bit floats
  *
  * Native holder for RS matrix.  Elements are stored in the array at the
  * location [row*4 + col]
@@ -377,8 +629,9 @@
 typedef struct {
     float m[16];
 } rs_matrix4x4;
-/**
- * \brief 3x3 float matrix
+
+/*
+ * rs_matrix3x3: 3x3 matrix of 32 bit floats
  *
  * Native holder for RS matrix.  Elements are stored in the array at the
  * location [row*3 + col]
@@ -386,8 +639,9 @@
 typedef struct {
     float m[9];
 } rs_matrix3x3;
-/**
- * \brief 2x2 float matrix
+
+/*
+ * rs_matrix2x2: 2x2 matrix of 32 bit floats
  *
  * Native holder for RS matrix.  Elements are stored in the array at the
  * location [row*2 + col]
@@ -396,19 +650,18 @@
     float m[4];
 } rs_matrix2x2;
 
-/**
- * quaternion type for use with the quaternion functions
+/*
+ * rs_quaternion: Quarternion
+ *
+ * Quaternion type for use with the quaternion functions
  */
 typedef float4 rs_quaternion;
 
-#define RS_PACKED __attribute__((packed, aligned(4)))
-#define NULL ((void *)0)
-
-#if (defined(RS_VERSION) && (RS_VERSION >= 14))
-
-/**
- * \brief Enum for selecting cube map faces
+/*
+ * rs_allocation_cubemap_face: Enum for selecting cube map faces
+ *
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
 typedef enum {
     RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X = 0,
     RS_ALLOCATION_CUBEMAP_FACE_NEGATIVE_X = 1,
@@ -417,69 +670,45 @@
     RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_Z = 4,
     RS_ALLOCATION_CUBEMAP_FACE_NEGATIVE_Z = 5
 } rs_allocation_cubemap_face;
+#endif
 
-/**
- * \brief Bitfield to specify the usage types for an allocation.
+/*
+ * rs_allocation_usage_type: Bitfield to specify the usage types for an allocation
  *
  * These values are ORed together to specify which usages or memory spaces are
  * relevant to an allocation or an operation on an allocation.
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 14))
 typedef enum {
     RS_ALLOCATION_USAGE_SCRIPT = 0x0001,
-    RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE = 0x0002,
-    RS_ALLOCATION_USAGE_GRAPHICS_VERTEX = 0x0004,
-    RS_ALLOCATION_USAGE_GRAPHICS_CONSTANTS = 0x0008,
-    RS_ALLOCATION_USAGE_GRAPHICS_RENDER_TARGET = 0x0010
+    RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE = 0x0002, // Deprecated.
+    RS_ALLOCATION_USAGE_GRAPHICS_VERTEX = 0x0004, // Deprecated.
+    RS_ALLOCATION_USAGE_GRAPHICS_CONSTANTS = 0x0008, // Deprecated.
+    RS_ALLOCATION_USAGE_GRAPHICS_RENDER_TARGET = 0x0010 // Deprecated.
 } rs_allocation_usage_type;
+#endif
 
-#endif //defined(RS_VERSION) && (RS_VERSION >= 14)
-
-// New API's
-#if (defined(RS_VERSION) && (RS_VERSION >= 16))
-
-#ifndef __LP64__
-/**
- * Describes the way mesh vertex data is interpreted when rendering
+/*
+ * rs_primitive: How to intepret mesh vertex data
  *
- **/
+ * Describes the way mesh vertex data is interpreted when rendering
+ */
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 typedef enum {
-    /**
-    * Vertex data will be rendered as a series of points
-    */
-    RS_PRIMITIVE_POINT              = 0,
-    /**
-    * Vertex pairs will be rendered as lines
-    */
-    RS_PRIMITIVE_LINE               = 1,
-    /**
-    * Vertex data will be rendered as a connected line strip
-    */
-    RS_PRIMITIVE_LINE_STRIP         = 2,
-    /**
-    * Vertices will be rendered as individual triangles
-    */
-    RS_PRIMITIVE_TRIANGLE           = 3,
-    /**
-    * Vertices will be rendered as a connected triangle strip
-    * defined by the first three vertices with each additional
-    * triangle defined by a new vertex
-    */
-    RS_PRIMITIVE_TRIANGLE_STRIP     = 4,
-    /**
-    * Vertices will be rendered as a sequence of triangles that all
-    * share first vertex as the origin
-    */
-    RS_PRIMITIVE_TRIANGLE_FAN       = 5,
-
-    /**
-    * Invalid primitive
-    */
-    RS_PRIMITIVE_INVALID            = 100,
+    RS_PRIMITIVE_POINT = 0, // Vertex data will be rendered as a series of points
+    RS_PRIMITIVE_LINE = 1, // Vertex pairs will be rendered as lines
+    RS_PRIMITIVE_LINE_STRIP = 2, // Vertex data will be rendered as a connected line strip
+    RS_PRIMITIVE_TRIANGLE = 3, // Vertices will be rendered as individual triangles
+    RS_PRIMITIVE_TRIANGLE_STRIP = 4, // Vertices will be rendered as a connected triangle strip defined by the first three vertices with each additional triangle defined by a new vertex
+    RS_PRIMITIVE_TRIANGLE_FAN = 5, // Vertices will be rendered as a sequence of triangles that all share first vertex as the origin
+    RS_PRIMITIVE_INVALID = 100 // Invalid primitive
 } rs_primitive;
-#endif // __LP64__
+#endif
+#endif
 
-/**
- * \brief Enumeration for possible element data types
+/*
+ * rs_data_type: Element data types
  *
  * DataType represents the basic type information for a basic element.  The
  * naming convention follows.  For numeric types it is FLOAT,
@@ -496,6 +725,7 @@
  *
  * RS_* objects.  32 bit opaque handles.
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 typedef enum {
     RS_TYPE_NONE             = 0,
     RS_TYPE_FLOAT_32         = 2,
@@ -508,17 +738,13 @@
     RS_TYPE_UNSIGNED_16      = 9,
     RS_TYPE_UNSIGNED_32      = 10,
     RS_TYPE_UNSIGNED_64      = 11,
-
     RS_TYPE_BOOLEAN          = 12,
-
     RS_TYPE_UNSIGNED_5_6_5   = 13,
     RS_TYPE_UNSIGNED_5_5_5_1 = 14,
     RS_TYPE_UNSIGNED_4_4_4_4 = 15,
-
     RS_TYPE_MATRIX_4X4       = 16,
     RS_TYPE_MATRIX_3X3       = 17,
     RS_TYPE_MATRIX_2X2       = 18,
-
     RS_TYPE_ELEMENT          = 1000,
     RS_TYPE_TYPE             = 1001,
     RS_TYPE_ALLOCATION       = 1002,
@@ -530,21 +756,21 @@
     RS_TYPE_PROGRAM_RASTER   = 1008,
     RS_TYPE_PROGRAM_STORE    = 1009,
     RS_TYPE_FONT             = 1010,
-
-    RS_TYPE_INVALID          = 10000,
+    RS_TYPE_INVALID          = 10000
 } rs_data_type;
+#endif
 
-/**
- * \brief Enumeration for possible element data kind
+/*
+ * rs_data_kind: Element data kind
  *
  * The special interpretation of the data if required.  This is primarly
  * useful for graphical data.  USER indicates no special interpretation is
  * expected.  PIXEL is used in conjunction with the standard data types for
  * representing texture formats.
  */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 typedef enum {
     RS_KIND_USER         = 0,
-
     RS_KIND_PIXEL_L      = 7,
     RS_KIND_PIXEL_A      = 8,
     RS_KIND_PIXEL_LA     = 9,
@@ -552,52 +778,37 @@
     RS_KIND_PIXEL_RGBA   = 11,
     RS_KIND_PIXEL_DEPTH  = 12,
     RS_KIND_PIXEL_YUV    = 13,
-
-    RS_KIND_INVALID      = 100,
+    RS_KIND_INVALID      = 100
 } rs_data_kind;
+#endif
 
+/*
+ * rs_depth_func: Depth function
+ *
+ * Specifies conditional drawing depending on the comparison of the incoming
+ * depth to that found in the depth buffer.
+ */
 #ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 typedef enum {
-    /**
-    * Always drawn
-    */
-    RS_DEPTH_FUNC_ALWAYS        = 0,
-    /**
-    * Drawn if the incoming depth value is less than that in the
-    * depth buffer
-    */
-    RS_DEPTH_FUNC_LESS          = 1,
-    /**
-    * Drawn if the incoming depth value is less or equal to that in
-    * the depth buffer
-    */
-    RS_DEPTH_FUNC_LEQUAL        = 2,
-    /**
-    * Drawn if the incoming depth value is greater than that in the
-    * depth buffer
-    */
-    RS_DEPTH_FUNC_GREATER       = 3,
-    /**
-    * Drawn if the incoming depth value is greater or equal to that
-    * in the depth buffer
-    */
-    RS_DEPTH_FUNC_GEQUAL        = 4,
-    /**
-    * Drawn if the incoming depth value is equal to that in the
-    * depth buffer
-    */
-    RS_DEPTH_FUNC_EQUAL         = 5,
-    /**
-    * Drawn if the incoming depth value is not equal to that in the
-    * depth buffer
-    */
-    RS_DEPTH_FUNC_NOTEQUAL      = 6,
-    /**
-    * Invalid depth function
-    */
-    RS_DEPTH_FUNC_INVALID       = 100,
+    RS_DEPTH_FUNC_ALWAYS        = 0, // Always drawn
+    RS_DEPTH_FUNC_LESS          = 1, // Drawn if the incoming depth value is less than that in the depth buffer
+    RS_DEPTH_FUNC_LEQUAL        = 2, // Drawn if the incoming depth value is less or equal to that in the depth buffer
+    RS_DEPTH_FUNC_GREATER       = 3, // Drawn if the incoming depth value is greater than that in the depth buffer
+    RS_DEPTH_FUNC_GEQUAL        = 4, // Drawn if the incoming depth value is greater or equal to that in the depth buffer
+    RS_DEPTH_FUNC_EQUAL         = 5, // Drawn if the incoming depth value is equal to that in the depth buffer
+    RS_DEPTH_FUNC_NOTEQUAL      = 6, // Drawn if the incoming depth value is not equal to that in the depth buffer
+    RS_DEPTH_FUNC_INVALID       = 100 // Invalid depth function
 } rs_depth_func;
+#endif
+#endif
 
+/*
+ * rs_blend_src_func: Blend source function
+ *
+ */
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 typedef enum {
     RS_BLEND_SRC_ZERO                   = 0,
     RS_BLEND_SRC_ONE                    = 1,
@@ -608,10 +819,17 @@
     RS_BLEND_SRC_DST_ALPHA              = 6,
     RS_BLEND_SRC_ONE_MINUS_DST_ALPHA    = 7,
     RS_BLEND_SRC_SRC_ALPHA_SATURATE     = 8,
-
-    RS_BLEND_SRC_INVALID                = 100,
+    RS_BLEND_SRC_INVALID                = 100
 } rs_blend_src_func;
+#endif
+#endif
 
+/*
+ * rs_blend_dst_func: Blend destination function
+ *
+ */
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 typedef enum {
     RS_BLEND_DST_ZERO                   = 0,
     RS_BLEND_DST_ONE                    = 1,
@@ -621,19 +839,31 @@
     RS_BLEND_DST_ONE_MINUS_SRC_ALPHA    = 5,
     RS_BLEND_DST_DST_ALPHA              = 6,
     RS_BLEND_DST_ONE_MINUS_DST_ALPHA    = 7,
-
-    RS_BLEND_DST_INVALID                = 100,
+    RS_BLEND_DST_INVALID                = 100
 } rs_blend_dst_func;
+#endif
+#endif
 
+/*
+ * rs_cull_mode: Culling mode
+ *
+ */
+#ifndef __LP64__
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 typedef enum {
     RS_CULL_BACK     = 0,
     RS_CULL_FRONT    = 1,
     RS_CULL_NONE     = 2,
-
-    RS_CULL_INVALID  = 100,
+    RS_CULL_INVALID  = 100
 } rs_cull_mode;
-#endif //__LP64__
+#endif
+#endif
 
+/*
+ * rs_sampler_value: Sampler wrap T value
+ *
+ */
+#if (defined(RS_VERSION) && (RS_VERSION >= 16))
 typedef enum {
     RS_SAMPLER_NEAREST              = 0,
     RS_SAMPLER_LINEAR               = 1,
@@ -642,10 +872,8 @@
     RS_SAMPLER_CLAMP                = 4,
     RS_SAMPLER_LINEAR_MIP_NEAREST   = 5,
     RS_SAMPLER_MIRRORED_REPEAT      = 6,
-
-    RS_SAMPLER_INVALID              = 100,
+    RS_SAMPLER_INVALID              = 100
 } rs_sampler_value;
+#endif
 
-#endif // (defined(RS_VERSION) && (RS_VERSION >= 16))
-
-#endif // __RS_TYPES_RSH__
+#endif // RENDERSCRIPT_RS_TYPES_RSH
diff --git a/renderscript/lib/arm/libRSSupport.so b/renderscript/lib/arm/libRSSupport.so
index b43e828..86f8f78 100755
--- a/renderscript/lib/arm/libRSSupport.so
+++ b/renderscript/lib/arm/libRSSupport.so
Binary files differ
diff --git a/renderscript/lib/arm/libRSSupportIO.so b/renderscript/lib/arm/libRSSupportIO.so
index c2506e5..ed18c45 100755
--- a/renderscript/lib/arm/libRSSupportIO.so
+++ b/renderscript/lib/arm/libRSSupportIO.so
Binary files differ
diff --git a/renderscript/lib/arm/libc.so b/renderscript/lib/arm/libc.so
index d14bbeb..f068f81 100755
--- a/renderscript/lib/arm/libc.so
+++ b/renderscript/lib/arm/libc.so
Binary files differ
diff --git a/renderscript/lib/arm/libclcore.bc b/renderscript/lib/arm/libclcore.bc
index 04d97a2..d57ddea 100644
--- a/renderscript/lib/arm/libclcore.bc
+++ b/renderscript/lib/arm/libclcore.bc
Binary files differ
diff --git a/renderscript/lib/arm/libm.so b/renderscript/lib/arm/libm.so
index ab4d664..13fd827 100755
--- a/renderscript/lib/arm/libm.so
+++ b/renderscript/lib/arm/libm.so
Binary files differ
diff --git a/renderscript/lib/arm/librsjni.so b/renderscript/lib/arm/librsjni.so
index 712a4a8..2094b6a 100755
--- a/renderscript/lib/arm/librsjni.so
+++ b/renderscript/lib/arm/librsjni.so
Binary files differ
diff --git a/renderscript/lib/arm/librsrt_arm.bc b/renderscript/lib/arm/librsrt_arm.bc
index 04d97a2..d57ddea 100644
--- a/renderscript/lib/arm/librsrt_arm.bc
+++ b/renderscript/lib/arm/librsrt_arm.bc
Binary files differ
diff --git a/renderscript/lib/javalib.jar b/renderscript/lib/javalib.jar
index f0fee31..9e90b85 100644
--- a/renderscript/lib/javalib.jar
+++ b/renderscript/lib/javalib.jar
Binary files differ
diff --git a/renderscript/lib/mips/libRSSupport.so b/renderscript/lib/mips/libRSSupport.so
index 1244169..30476e7 100755
--- a/renderscript/lib/mips/libRSSupport.so
+++ b/renderscript/lib/mips/libRSSupport.so
Binary files differ
diff --git a/renderscript/lib/mips/libRSSupportIO.so b/renderscript/lib/mips/libRSSupportIO.so
index 1e66e74..19265e0 100755
--- a/renderscript/lib/mips/libRSSupportIO.so
+++ b/renderscript/lib/mips/libRSSupportIO.so
Binary files differ
diff --git a/renderscript/lib/mips/libc.so b/renderscript/lib/mips/libc.so
index f6dc7e8..020a5f6 100755
--- a/renderscript/lib/mips/libc.so
+++ b/renderscript/lib/mips/libc.so
Binary files differ
diff --git a/renderscript/lib/mips/libclcore.bc b/renderscript/lib/mips/libclcore.bc
index 04d97a2..d57ddea 100644
--- a/renderscript/lib/mips/libclcore.bc
+++ b/renderscript/lib/mips/libclcore.bc
Binary files differ
diff --git a/renderscript/lib/mips/libm.so b/renderscript/lib/mips/libm.so
index 20227b7..62a9a85 100755
--- a/renderscript/lib/mips/libm.so
+++ b/renderscript/lib/mips/libm.so
Binary files differ
diff --git a/renderscript/lib/mips/librsjni.so b/renderscript/lib/mips/librsjni.so
index 3663e4a..bf8f269 100755
--- a/renderscript/lib/mips/librsjni.so
+++ b/renderscript/lib/mips/librsjni.so
Binary files differ
diff --git a/renderscript/lib/mips/librsrt_mips.bc b/renderscript/lib/mips/librsrt_mips.bc
index 04d97a2..d57ddea 100644
--- a/renderscript/lib/mips/librsrt_mips.bc
+++ b/renderscript/lib/mips/librsrt_mips.bc
Binary files differ
diff --git a/renderscript/lib/x86/libRSSupport.so b/renderscript/lib/x86/libRSSupport.so
index 92a9ea3..0bdf71e 100755
--- a/renderscript/lib/x86/libRSSupport.so
+++ b/renderscript/lib/x86/libRSSupport.so
Binary files differ
diff --git a/renderscript/lib/x86/libRSSupportIO.so b/renderscript/lib/x86/libRSSupportIO.so
index e9c4344..57142d1 100755
--- a/renderscript/lib/x86/libRSSupportIO.so
+++ b/renderscript/lib/x86/libRSSupportIO.so
Binary files differ
diff --git a/renderscript/lib/x86/libc.so b/renderscript/lib/x86/libc.so
index 9b06ed1..7610b94 100755
--- a/renderscript/lib/x86/libc.so
+++ b/renderscript/lib/x86/libc.so
Binary files differ
diff --git a/renderscript/lib/x86/libclcore.bc b/renderscript/lib/x86/libclcore.bc
index 8833135..c93cb71 100644
--- a/renderscript/lib/x86/libclcore.bc
+++ b/renderscript/lib/x86/libclcore.bc
Binary files differ
diff --git a/renderscript/lib/x86/libm.so b/renderscript/lib/x86/libm.so
index ac23969..ce846a2 100755
--- a/renderscript/lib/x86/libm.so
+++ b/renderscript/lib/x86/libm.so
Binary files differ
diff --git a/renderscript/lib/x86/librsjni.so b/renderscript/lib/x86/librsjni.so
index 6d374d8..c95532f 100755
--- a/renderscript/lib/x86/librsjni.so
+++ b/renderscript/lib/x86/librsjni.so
Binary files differ
diff --git a/renderscript/lib/x86/librsrt_x86.bc b/renderscript/lib/x86/librsrt_x86.bc
index f6d83ce..c643494 100644
--- a/renderscript/lib/x86/librsrt_x86.bc
+++ b/renderscript/lib/x86/librsrt_x86.bc
Binary files differ
diff --git a/tools/darwin/bcc_compat b/tools/darwin/bcc_compat
index 5992f69..10ea50b 100755
--- a/tools/darwin/bcc_compat
+++ b/tools/darwin/bcc_compat
Binary files differ
diff --git a/tools/darwin/libLLVM.dylib b/tools/darwin/libLLVM.dylib
index b4200e7..c0e7d05 100755
--- a/tools/darwin/libLLVM.dylib
+++ b/tools/darwin/libLLVM.dylib
Binary files differ
diff --git a/tools/darwin/libbcc.dylib b/tools/darwin/libbcc.dylib
index 13f7a65..b0d0983 100755
--- a/tools/darwin/libbcc.dylib
+++ b/tools/darwin/libbcc.dylib
Binary files differ
diff --git a/tools/darwin/libbcinfo.dylib b/tools/darwin/libbcinfo.dylib
index 9a2ef5d..8d525ff 100755
--- a/tools/darwin/libbcinfo.dylib
+++ b/tools/darwin/libbcinfo.dylib
Binary files differ
diff --git a/tools/darwin/libc++.dylib b/tools/darwin/libc++.dylib
index 300f9c7..2b732f3 100755
--- a/tools/darwin/libc++.dylib
+++ b/tools/darwin/libc++.dylib
Binary files differ
diff --git a/tools/darwin/libclang.dylib b/tools/darwin/libclang.dylib
index 04884d1..daa7b10 100755
--- a/tools/darwin/libclang.dylib
+++ b/tools/darwin/libclang.dylib
Binary files differ
diff --git a/tools/darwin/llvm-rs-cc b/tools/darwin/llvm-rs-cc
index 6014525..e5576e3 100755
--- a/tools/darwin/llvm-rs-cc
+++ b/tools/darwin/llvm-rs-cc
Binary files differ
diff --git a/tools/linux/bcc_compat b/tools/linux/bcc_compat
index 92d7953..984276a 100755
--- a/tools/linux/bcc_compat
+++ b/tools/linux/bcc_compat
Binary files differ
diff --git a/tools/linux/libLLVM.so b/tools/linux/libLLVM.so
index e12a63a..3a90310 100755
--- a/tools/linux/libLLVM.so
+++ b/tools/linux/libLLVM.so
Binary files differ
diff --git a/tools/linux/libbcc.so b/tools/linux/libbcc.so
index b83227f..aee7112 100755
--- a/tools/linux/libbcc.so
+++ b/tools/linux/libbcc.so
Binary files differ
diff --git a/tools/linux/libbcinfo.so b/tools/linux/libbcinfo.so
index 3162c2d..a34b684 100755
--- a/tools/linux/libbcinfo.so
+++ b/tools/linux/libbcinfo.so
Binary files differ
diff --git a/tools/linux/libc++.so b/tools/linux/libc++.so
index 19e4750..0469e9a 100755
--- a/tools/linux/libc++.so
+++ b/tools/linux/libc++.so
Binary files differ
diff --git a/tools/linux/libclang.so b/tools/linux/libclang.so
index c1de4c4..dba01b9 100755
--- a/tools/linux/libclang.so
+++ b/tools/linux/libclang.so
Binary files differ
diff --git a/tools/linux/llvm-rs-cc b/tools/linux/llvm-rs-cc
index 47dbf6b..edfb7e6 100755
--- a/tools/linux/llvm-rs-cc
+++ b/tools/linux/llvm-rs-cc
Binary files differ