am 4fdec9b0: Cleanup naming of aprox & half functions. bug 7205089
* commit '4fdec9b00e2643b764c4f9786def10d643818aba':
Cleanup naming of aprox & half functions. bug 7205089
diff --git a/lib/Renderscript/runtime/arch/generic.c b/lib/Renderscript/runtime/arch/generic.c
index 9b52cbb..e802d58 100644
--- a/lib/Renderscript/runtime/arch/generic.c
+++ b/lib/Renderscript/runtime/arch/generic.c
@@ -20,6 +20,7 @@
extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
+extern float __attribute__((overloadable)) sqrt(float);
/*
@@ -743,109 +744,101 @@
/*
- * APPROX_RECIP
+ * half_RECIP
*/
-extern float __attribute__((overloadable)) approx_recip(float v) {
+extern float __attribute__((overloadable)) half_recip(float v) {
// FIXME: actual algorithm for generic approximate reciprocal
return 1.f / v;
}
-extern float2 __attribute__((overloadable)) approx_recip(float2 v) {
+extern float2 __attribute__((overloadable)) half_recip(float2 v) {
float2 r;
- r.x = approx_recip(r.x);
- r.y = approx_recip(r.y);
+ r.x = half_recip(r.x);
+ r.y = half_recip(r.y);
return r;
}
-extern float3 __attribute__((overloadable)) approx_recip(float3 v) {
+extern float3 __attribute__((overloadable)) half_recip(float3 v) {
float3 r;
- r.x = approx_recip(r.x);
- r.y = approx_recip(r.y);
- r.z = approx_recip(r.z);
+ r.x = half_recip(r.x);
+ r.y = half_recip(r.y);
+ r.z = half_recip(r.z);
return r;
}
-extern float4 __attribute__((overloadable)) approx_recip(float4 v) {
+extern float4 __attribute__((overloadable)) half_recip(float4 v) {
float4 r;
- r.x = approx_recip(r.x);
- r.y = approx_recip(r.y);
- r.z = approx_recip(r.z);
- r.w = approx_recip(r.w);
+ r.x = half_recip(r.x);
+ r.y = half_recip(r.y);
+ r.z = half_recip(r.z);
+ r.w = half_recip(r.w);
return r;
}
/*
- * APPROX_SQRT
+ * half_SQRT
*/
-extern float __attribute__((overloadable)) approx_sqrt(float v) {
- int i = *((int*)&v);
- i = (1 << 29) + (i >> 1) - (1 << 22);
- return *((float*)&i);
+extern float __attribute__((overloadable)) half_sqrt(float v) {
+ return sqrt(v);
}
-extern float2 __attribute__((overloadable)) approx_sqrt(float2 v) {
+extern float2 __attribute__((overloadable)) half_sqrt(float2 v) {
float2 r;
- r.x = approx_sqrt(v.x);
- r.y = approx_sqrt(v.y);
+ r.x = half_sqrt(v.x);
+ r.y = half_sqrt(v.y);
return r;
}
-extern float3 __attribute__((overloadable)) approx_sqrt(float3 v) {
+extern float3 __attribute__((overloadable)) half_sqrt(float3 v) {
float3 r;
- r.x = approx_sqrt(v.x);
- r.y = approx_sqrt(v.y);
- r.z = approx_sqrt(v.z);
+ r.x = half_sqrt(v.x);
+ r.y = half_sqrt(v.y);
+ r.z = half_sqrt(v.z);
return r;
}
-extern float4 __attribute__((overloadable)) approx_sqrt(float4 v) {
+extern float4 __attribute__((overloadable)) half_sqrt(float4 v) {
float4 r;
- r.x = approx_sqrt(v.x);
- r.y = approx_sqrt(v.y);
- r.z = approx_sqrt(v.z);
- r.w = approx_sqrt(v.w);
+ r.x = half_sqrt(v.x);
+ r.y = half_sqrt(v.y);
+ r.z = half_sqrt(v.z);
+ r.w = half_sqrt(v.w);
return r;
}
/*
- * APPROX_rsqrt
+ * half_rsqrt
*/
-extern float __attribute__((overloadable)) approx_rsqrt(float v) {
- int i = *((int*)&v);
- float r;
- i = 0x5f3759df - (i >> 1);
- r = *((float*)&i);
- // Newton step: optional, can be repeated for more accuracy
- //r = r * (1.5f - (0.5f*v) * r * r);
- return r;
+extern float __attribute__((overloadable)) half_rsqrt(float v) {
+ return 1.f / sqrt(v);
}
-extern float2 __attribute__((overloadable)) approx_rsqrt(float2 v) {
+extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
float2 r;
- r.x = approx_rsqrt(v.x);
- r.y = approx_rsqrt(v.y);
+ r.x = half_rsqrt(v.x);
+ r.y = half_rsqrt(v.y);
return r;
}
-extern float3 __attribute__((overloadable)) approx_rsqrt(float3 v) {
+extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
float3 r;
- r.x = approx_rsqrt(v.x);
- r.y = approx_rsqrt(v.y);
- r.z = approx_rsqrt(v.z);
+ r.x = half_rsqrt(v.x);
+ r.y = half_rsqrt(v.y);
+ r.z = half_rsqrt(v.z);
return r;
}
-extern float4 __attribute__((overloadable)) approx_rsqrt(float4 v) {
+extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
float4 r;
- r.x = approx_rsqrt(v.x);
- r.y = approx_rsqrt(v.y);
- r.z = approx_rsqrt(v.z);
- r.w = approx_rsqrt(v.w);
+ r.x = half_rsqrt(v.x);
+ r.y = half_rsqrt(v.y);
+ r.z = half_rsqrt(v.z);
+ r.w = half_rsqrt(v.w);
return r;
}
diff --git a/lib/Renderscript/runtime/arch/neon.ll b/lib/Renderscript/runtime/arch/neon.ll
index 42b04c1..010b252 100644
--- a/lib/Renderscript/runtime/arch/neon.ll
+++ b/lib/Renderscript/runtime/arch/neon.ll
@@ -738,38 +738,38 @@
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; APPROX_RECIP ;;;;;;;;;;
+;;;;;;;;; half_RECIP ;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-define float @_Z12approx_recipf(float %v) {
+define float @_Z10half_recipf(float %v) {
%1 = insertelement <2 x float> undef, float %v, i32 0
%2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
%3 = extractelement <2 x float> %2, i32 0
ret float %3
}
-define <2 x float> @_Z12approx_recip2Dv2_h(<2 x float> %v) nounwind readnone {
+define <2 x float> @_Z10half_recip2Dv2_h(<2 x float> %v) nounwind readnone {
%1 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %v) nounwind readnone
ret <2 x float> %1
}
-define <3 x float> @_Z12approx_recip3Dv3_h(<3 x float> %v) nounwind readnone {
+define <3 x float> @_Z10half_recip3Dv3_h(<3 x float> %v) nounwind readnone {
%1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
%3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
ret <3 x float> %3
}
-define <4 x float> @_Z12approx_recip4Dv4_h(<4 x float> %v) nounwind readnone {
+define <4 x float> @_Z10half_recip4Dv4_h(<4 x float> %v) nounwind readnone {
%1 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %v) nounwind readnone
ret <4 x float> %1
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; APPROX_SQRT ;;;;;;;;;;
+;;;;;;;;; half_SQRT ;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-define float @_Z11approx_sqrtf(float %v) {
+define float @_Z9half_sqrtf(float %v) {
%1 = insertelement <2 x float> undef, float %v, i32 0
%2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
%3 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %2) nounwind readnone
@@ -777,13 +777,13 @@
ret float %4
}
-define <2 x float> @_Z11approx_sqrt2Dv2_h(<2 x float> %v) nounwind readnone {
+define <2 x float> @_Z9half_sqrt2Dv2_h(<2 x float> %v) nounwind readnone {
%1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
%2 = tail call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %1) nounwind readnone
ret <2 x float> %2
}
-define <3 x float> @_Z11approx_sqrt3Dv3_h(<3 x float> %v) nounwind readnone {
+define <3 x float> @_Z9half_sqrt3Dv3_h(<3 x float> %v) nounwind readnone {
%1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
%3 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %2) nounwind readnone
@@ -791,7 +791,7 @@
ret <3 x float> %4
}
-define <4 x float> @_Z11approx_sqrt4Dv4_h(<4 x float> %v) nounwind readnone {
+define <4 x float> @_Z9half_sqrt4Dv4_h(<4 x float> %v) nounwind readnone {
%1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
%2 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %1) nounwind readnone
ret <4 x float> %2
@@ -799,29 +799,29 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;;;;;;; APPROX_RSQRT ;;;;;;;;;;
+;;;;;;;;; half_RSQRT ;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-define float @_Z12approx_rsqrtf(float %v) {
+define float @_Z10half_rsqrtf(float %v) {
%1 = insertelement <2 x float> undef, float %v, i32 0
%2 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %1) nounwind readnone
%3 = extractelement <2 x float> %2, i32 0
ret float %3
}
-define <2 x float> @_Z12approx_rsqrt2Dv2_h(<2 x float> %v) nounwind readnone {
+define <2 x float> @_Z10half_rsqrt2Dv2_h(<2 x float> %v) nounwind readnone {
%1 = tail call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %v) nounwind readnone
ret <2 x float> %1
}
-define <3 x float> @_Z12approx_rsqrt3Dv3_h(<3 x float> %v) nounwind readnone {
+define <3 x float> @_Z10half_rsqrt3Dv3_h(<3 x float> %v) nounwind readnone {
%1 = shufflevector <3 x float> %v, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%2 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %1) nounwind readnone
%3 = shufflevector <4 x float> %2, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
ret <3 x float> %3
}
-define <4 x float> @_Z12approx_rsqrt4Dv4_h(<4 x float> %v) nounwind readnone {
+define <4 x float> @_Z10half_rsqrt4Dv4_h(<4 x float> %v) nounwind readnone {
%1 = tail call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %v) nounwind readnone
ret <4 x float> %1
}
diff --git a/lib/Renderscript/runtime/rs_cl.c b/lib/Renderscript/runtime/rs_cl.c
index 1526773..bf055b3 100644
--- a/lib/Renderscript/runtime/rs_cl.c
+++ b/lib/Renderscript/runtime/rs_cl.c
@@ -881,51 +881,52 @@
return v / length(v);
}
-extern float __attribute__((overloadable)) approx_sqrt(float);
+extern float __attribute__((overloadable)) half_sqrt(float);
-extern float __attribute__((overloadable)) approx_length(float v) {
+extern float __attribute__((overloadable)) fast_length(float v) {
return v;
}
-extern float __attribute__((overloadable)) approx_length(float2 v) {
- return approx_sqrt(v.x*v.x + v.y*v.y);
+extern float __attribute__((overloadable)) fast_length(float2 v) {
+ return half_sqrt(v.x*v.x + v.y*v.y);
}
-extern float __attribute__((overloadable)) approx_length(float3 v) {
- return approx_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
+extern float __attribute__((overloadable)) fast_length(float3 v) {
+ return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
}
-extern float __attribute__((overloadable)) approx_length(float4 v) {
- return approx_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
+extern float __attribute__((overloadable)) fast_length(float4 v) {
+ return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
}
-extern float __attribute__((overloadable)) approx_distance(float lhs, float rhs) {
- return approx_length(lhs - rhs);
+extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
+ return fast_length(lhs - rhs);
}
-extern float __attribute__((overloadable)) approx_distance(float2 lhs, float2 rhs) {
- return approx_length(lhs - rhs);
+extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
+ return fast_length(lhs - rhs);
}
-extern float __attribute__((overloadable)) approx_distance(float3 lhs, float3 rhs) {
- return approx_length(lhs - rhs);
+extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
+ return fast_length(lhs - rhs);
}
-extern float __attribute__((overloadable)) approx_distance(float4 lhs, float4 rhs) {
- return approx_length(lhs - rhs);
+extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
+ return fast_length(lhs - rhs);
}
-extern float __attribute__((overloadable)) approx_rsqrt(float);
+extern float __attribute__((overloadable)) half_rsqrt(float);
-extern float __attribute__((overloadable)) approx_normalize(float v) {
+extern float __attribute__((overloadable)) fast_normalize(float v) {
return 1.f;
}
-extern float2 __attribute__((overloadable)) approx_normalize(float2 v) {
- return v * approx_rsqrt(v.x*v.x + v.y*v.y);
+extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
+ return v * half_rsqrt(v.x*v.x + v.y*v.y);
}
-extern float3 __attribute__((overloadable)) approx_normalize(float3 v) {
- return v * approx_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
+extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
+ return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
}
-extern float4 __attribute__((overloadable)) approx_normalize(float4 v) {
- return v * approx_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
+extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
+ return v * half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
}
-extern float __attribute__((overloadable)) approx_recip(float);
+extern float __attribute__((overloadable)) half_recip(float);
+/*
extern float __attribute__((overloadable)) approx_atan(float x) {
if (x == 0.f)
return 0.f;
@@ -936,6 +937,7 @@
return x * approx_recip(1.f + 0.28f * x*x);
}
FN_FUNC_FN(approx_atan)
+*/
#undef FN_FUNC_FN
#undef IN_FUNC_FN