am 4fdec9b0: Cleanup naming of aprox & half functions. bug 7205089 * commit '4fdec9b00e2643b764c4f9786def10d643818aba': Cleanup naming of aprox & half functions. bug 7205089

commit: 37a2a3266d3091886f9ed452f249644c06ab498c [log] [tgz]
author: Jason Sams <jsams@google.com> Thu Sep 20 20:31:26 2012 -0700
committer: Android Git Automerger <android-git-automerger@android.com> Thu Sep 20 20:31:26 2012 -0700
tree: 24d6b168d74fc2660e02e42db07e3c9b53504993
parent: 9ad640a47a91fe47046eb57ba0b7b37641e75b99 [diff]
parent: 4fdec9b00e2643b764c4f9786def10d643818aba [diff]
diff --git a/lib/Renderscript/runtime/arch/generic.c b/lib/Renderscript/runtime/arch/generic.c
index 9b52cbb..e802d58 100644
--- a/lib/Renderscript/runtime/arch/generic.c
+++ b/lib/Renderscript/runtime/arch/generic.c

@@ -20,6 +20,7 @@
 extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
 extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
 extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
+extern float __attribute__((overloadable)) sqrt(float);
 
 
 /*
@@ -743,109 +744,101 @@
 
 
 /*
- * APPROX_RECIP
+ * half_RECIP
  */
 
-extern float __attribute__((overloadable)) approx_recip(float v) {
+extern float __attribute__((overloadable)) half_recip(float v) {
     // FIXME:  actual algorithm for generic approximate reciprocal
     return 1.f / v;
 }
 
-extern float2 __attribute__((overloadable)) approx_recip(float2 v) {
+extern float2 __attribute__((overloadable)) half_recip(float2 v) {
     float2 r;
-    r.x = approx_recip(r.x);
-    r.y = approx_recip(r.y);
+    r.x = half_recip(r.x);
+    r.y = half_recip(r.y);
     return r;
 }
 
-extern float3 __attribute__((overloadable)) approx_recip(float3 v) {
+extern float3 __attribute__((overloadable)) half_recip(float3 v) {
     float3 r;
-    r.x = approx_recip(r.x);
-    r.y = approx_recip(r.y);
-    r.z = approx_recip(r.z);
+    r.x = half_recip(r.x);
+    r.y = half_recip(r.y);
+    r.z = half_recip(r.z);
     return r;
 }
 
-extern float4 __attribute__((overloadable)) approx_recip(float4 v) {
+extern float4 __attribute__((overloadable)) half_recip(float4 v) {
     float4 r;
-    r.x = approx_recip(r.x);
-    r.y = approx_recip(r.y);
-    r.z = approx_recip(r.z);
-    r.w = approx_recip(r.w);
+    r.x = half_recip(r.x);
+    r.y = half_recip(r.y);
+    r.z = half_recip(r.z);
+    r.w = half_recip(r.w);
     return r;
 }
 
 
 /*
- * APPROX_SQRT
+ * half_SQRT
  */
 
-extern float __attribute__((overloadable)) approx_sqrt(float v) {
-    int i = *((int*)&v);
-    i = (1 << 29) + (i >> 1) - (1 << 22);
-    return *((float*)&i);
+extern float __attribute__((overloadable)) half_sqrt(float v) {
+    return sqrt(v);
 }
 
-extern float2 __attribute__((overloadable)) approx_sqrt(float2 v) {
+extern float2 __attribute__((overloadable)) half_sqrt(float2 v) {
     float2 r;
-    r.x = approx_sqrt(v.x);
-    r.y = approx_sqrt(v.y);
+    r.x = half_sqrt(v.x);
+    r.y = half_sqrt(v.y);
     return r;
 }
 
-extern float3 __attribute__((overloadable)) approx_sqrt(float3 v) {
+extern float3 __attribute__((overloadable)) half_sqrt(float3 v) {
     float3 r;
-    r.x = approx_sqrt(v.x);
-    r.y = approx_sqrt(v.y);
-    r.z = approx_sqrt(v.z);
+    r.x = half_sqrt(v.x);
+    r.y = half_sqrt(v.y);
+    r.z = half_sqrt(v.z);
     return r;
 }
 
-extern float4 __attribute__((overloadable)) approx_sqrt(float4 v) {
+extern float4 __attribute__((overloadable)) half_sqrt(float4 v) {
     float4 r;
-    r.x = approx_sqrt(v.x);
-    r.y = approx_sqrt(v.y);
-    r.z = approx_sqrt(v.z);
-    r.w = approx_sqrt(v.w);
+    r.x = half_sqrt(v.x);
+    r.y = half_sqrt(v.y);
+    r.z = half_sqrt(v.z);
+    r.w = half_sqrt(v.w);
     return r;
 }
 
 
 /*
- * APPROX_rsqrt
+ * half_rsqrt
  */
 
-extern float __attribute__((overloadable)) approx_rsqrt(float v) {
-    int i = *((int*)&v);
-    float r;
-    i = 0x5f3759df - (i >> 1);
-    r = *((float*)&i);
-    // Newton step: optional, can be repeated for more accuracy
-    //r = r * (1.5f - (0.5f*v) * r * r);
-    return r;
+extern float __attribute__((overloadable)) half_rsqrt(float v) {
+    return 1.f / sqrt(v);
 }
 
-extern float2 __attribute__((overloadable)) approx_rsqrt(float2 v) {
+extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
     float2 r;
-    r.x = approx_rsqrt(v.x);
-    r.y = approx_rsqrt(v.y);
+    r.x = half_rsqrt(v.x);
+    r.y = half_rsqrt(v.y);
     return r;
 }
 
-extern float3 __attribute__((overloadable)) approx_rsqrt(float3 v) {
+extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
     float3 r;
-    r.x = approx_rsqrt(v.x);
-    r.y = approx_rsqrt(v.y);
-    r.z = approx_rsqrt(v.z);
+    r.x = half_rsqrt(v.x);
+    r.y = half_rsqrt(v.y);
+    r.z = half_rsqrt(v.z);
     return r;
 }
 
-extern float4 __attribute__((overloadable)) approx_rsqrt(float4 v) {
+extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
     float4 r;
-    r.x = approx_rsqrt(v.x);
-    r.y = approx_rsqrt(v.y);
-    r.z = approx_rsqrt(v.z);
-    r.w = approx_rsqrt(v.w);
+    r.x = half_rsqrt(v.x);
+    r.y = half_rsqrt(v.y);
+    r.z = half_rsqrt(v.z);
+    r.w = half_rsqrt(v.w);
     return r;
 }
 

diff --git a/lib/Renderscript/runtime/arch/neon.ll b/lib/Renderscript/runtime/arch/neon.ll
index 42b04c1..010b252 100644
--- a/lib/Renderscript/runtime/arch/neon.ll
+++ b/lib/Renderscript/runtime/arch/neon.ll

@@ -738,38 +738,38 @@
 }
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;;              APPROX_RECIP              ;;;;;;;;;;
+;;;;;;;;;              half_RECIP              ;;;;;;;;;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-define float @_Z12approx_recipf(float %v) {
+define float @_Z10half_recipf(float %v) {
   %1 = insertelement <2 x float> undef, float %v, i32 0
   %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
   %3 = extractelement <2 x float> %2, i32 0
   ret float %3
 }
 
-define <2 x float> @_Z12approx_recip2Dv2_h(<2 x float> %v) nounwind readnone {
+define <2 x float> @_Z10half_recip2Dv2_h(<2 x float> %v) nounwind readnone {
   %1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone
   ret <2 x float> %1
 }
 
-define <3 x float> @_Z12approx_recip3Dv3_h(<3 x float> %v) nounwind readnone {
+define <3 x float> @_Z10half_recip3Dv3_h(<3 x float> %v) nounwind readnone {
   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %3
 }
 
-define <4 x float> @_Z12approx_recip4Dv4_h(<4 x float> %v) nounwind readnone {
+define <4 x float> @_Z10half_recip4Dv4_h(<4 x float> %v) nounwind readnone {
   %1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone
   ret <4 x float> %1
 }
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;;              APPROX_SQRT               ;;;;;;;;;;
+;;;;;;;;;              half_SQRT               ;;;;;;;;;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-define float @_Z11approx_sqrtf(float %v) {
+define float @_Z9half_sqrtf(float %v) {
   %1 = insertelement <2 x float> undef, float %v, i32 0
   %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
   %3 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %2) nounwind readnone
@@ -777,13 +777,13 @@
   ret float %4
 }
 
-define <2 x float> @_Z11approx_sqrt2Dv2_h(<2 x float> %v) nounwind readnone {
+define <2 x float> @_Z9half_sqrt2Dv2_h(<2 x float> %v) nounwind readnone {
   %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
   %2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
   ret <2 x float> %2
 }
 
-define <3 x float> @_Z11approx_sqrt3Dv3_h(<3 x float> %v) nounwind readnone {
+define <3 x float> @_Z9half_sqrt3Dv3_h(<3 x float> %v) nounwind readnone {
   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
   %3 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %2) nounwind readnone
@@ -791,7 +791,7 @@
   ret <3 x float> %4
 }
 
-define <4 x float> @_Z11approx_sqrt4Dv4_h(<4 x float> %v) nounwind readnone {
+define <4 x float> @_Z9half_sqrt4Dv4_h(<4 x float> %v) nounwind readnone {
   %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
   %2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
   ret <4 x float> %2
@@ -799,29 +799,29 @@
 
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;;              APPROX_RSQRT              ;;;;;;;;;;
+;;;;;;;;;              half_RSQRT              ;;;;;;;;;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-define float @_Z12approx_rsqrtf(float %v) {
+define float @_Z10half_rsqrtf(float %v) {
   %1 = insertelement <2 x float> undef, float %v, i32 0
   %2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
   %3 = extractelement <2 x float> %2, i32 0
   ret float %3
 }
 
-define <2 x float> @_Z12approx_rsqrt2Dv2_h(<2 x float> %v) nounwind readnone {
+define <2 x float> @_Z10half_rsqrt2Dv2_h(<2 x float> %v) nounwind readnone {
   %1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
   ret <2 x float> %1
 }
 
-define <3 x float> @_Z12approx_rsqrt3Dv3_h(<3 x float> %v) nounwind readnone {
+define <3 x float> @_Z10half_rsqrt3Dv3_h(<3 x float> %v) nounwind readnone {
   %1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
   %3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %3
 }
 
-define <4 x float> @_Z12approx_rsqrt4Dv4_h(<4 x float> %v) nounwind readnone {
+define <4 x float> @_Z10half_rsqrt4Dv4_h(<4 x float> %v) nounwind readnone {
   %1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
   ret <4 x float> %1
 }

diff --git a/lib/Renderscript/runtime/rs_cl.c b/lib/Renderscript/runtime/rs_cl.c
index 1526773..bf055b3 100644
--- a/lib/Renderscript/runtime/rs_cl.c
+++ b/lib/Renderscript/runtime/rs_cl.c

@@ -881,51 +881,52 @@
     return v / length(v);
 }
 
-extern float __attribute__((overloadable)) approx_sqrt(float);
+extern float __attribute__((overloadable)) half_sqrt(float);
 
-extern float __attribute__((overloadable)) approx_length(float v) {
+extern float __attribute__((overloadable)) fast_length(float v) {
     return v;
 }
-extern float __attribute__((overloadable)) approx_length(float2 v) {
-    return approx_sqrt(v.x*v.x + v.y*v.y);
+extern float __attribute__((overloadable)) fast_length(float2 v) {
+    return half_sqrt(v.x*v.x + v.y*v.y);
 }
-extern float __attribute__((overloadable)) approx_length(float3 v) {
-    return approx_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
+extern float __attribute__((overloadable)) fast_length(float3 v) {
+    return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
 }
-extern float __attribute__((overloadable)) approx_length(float4 v) {
-    return approx_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
+extern float __attribute__((overloadable)) fast_length(float4 v) {
+    return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
 }
 
-extern float __attribute__((overloadable)) approx_distance(float lhs, float rhs) {
-    return approx_length(lhs - rhs);
+extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
+    return fast_length(lhs - rhs);
 }
-extern float __attribute__((overloadable)) approx_distance(float2 lhs, float2 rhs) {
-    return approx_length(lhs - rhs);
+extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
+    return fast_length(lhs - rhs);
 }
-extern float __attribute__((overloadable)) approx_distance(float3 lhs, float3 rhs) {
-    return approx_length(lhs - rhs);
+extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
+    return fast_length(lhs - rhs);
 }
-extern float __attribute__((overloadable)) approx_distance(float4 lhs, float4 rhs) {
-    return approx_length(lhs - rhs);
+extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
+    return fast_length(lhs - rhs);
 }
 
-extern float __attribute__((overloadable)) approx_rsqrt(float);
+extern float __attribute__((overloadable)) half_rsqrt(float);
 
-extern float __attribute__((overloadable)) approx_normalize(float v) {
+extern float __attribute__((overloadable)) fast_normalize(float v) {
     return 1.f;
 }
-extern float2 __attribute__((overloadable)) approx_normalize(float2 v) {
-    return v * approx_rsqrt(v.x*v.x + v.y*v.y);
+extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
+    return v * half_rsqrt(v.x*v.x + v.y*v.y);
 }
-extern float3 __attribute__((overloadable)) approx_normalize(float3 v) {
-    return v * approx_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
+extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
+    return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
 }
-extern float4 __attribute__((overloadable)) approx_normalize(float4 v) {
-    return v * approx_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
+extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
+    return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
 }
 
-extern float __attribute__((overloadable)) approx_recip(float);
+extern float __attribute__((overloadable)) half_recip(float);
 
+/*
 extern float __attribute__((overloadable)) approx_atan(float x) {
     if (x == 0.f)
         return 0.f;
@@ -936,6 +937,7 @@
     return x * approx_recip(1.f + 0.28f * x*x);
 }
 FN_FUNC_FN(approx_atan)
+*/
 
 #undef FN_FUNC_FN
 #undef IN_FUNC_FN
commit	37a2a3266d3091886f9ed452f249644c06ab498c	[log] [tgz]
author	Jason Sams <jsams@google.com>	Thu Sep 20 20:31:26 2012 -0700
committer	Android Git Automerger <android-git-automerger@android.com>	Thu Sep 20 20:31:26 2012 -0700
tree	24d6b168d74fc2660e02e42db07e3c9b53504993
parent	9ad640a47a91fe47046eb57ba0b7b37641e75b99 [diff]
parent	4fdec9b00e2643b764c4f9786def10d643818aba [diff]