Use llvm fabs intrinsic.
Change-Id: I7e593ec2306305bc510c0a18ebc697b18c5992dc
diff --git a/lib/Renderscript/runtime/math.ll b/lib/Renderscript/runtime/math.ll
index 4ea2b10..dd4dc4b 100644
--- a/lib/Renderscript/runtime/math.ll
+++ b/lib/Renderscript/runtime/math.ll
@@ -3,14 +3,35 @@
declare float @llvm.sqrt.f32(float)
declare float @llvm.pow.f32(float, float)
+declare float @llvm.fabs.f32(float)
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
+declare <3 x float> @llvm.fabs.v3f32(<3 x float>)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
-define float @_Z4sqrtf(float %v) {
+define float @_Z4sqrtf(float %v) nounwind readnone alwaysinline {
%1 = tail call float @llvm.sqrt.f32(float %v)
ret float %1
}
-define float @_Z3powf(float %v1, float %v2) {
+define float @_Z3powf(float %v1, float %v2) nounwind readnone alwaysinline {
%1 = tail call float @llvm.pow.f32(float %v1, float %v2)
ret float %1
}
+define float @_Z4fabsf(float %v) nounwind readnone alwaysinline {
+ %1 = tail call float @llvm.fabs.f32(float %v)
+ ret float %1
+}
+define <2 x float> @_Z4fabsDv2_f(<2 x float> %v) nounwind readnone alwaysinline {
+ %1 = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %v)
+ ret <2 x float> %1
+}
+define <3 x float> @_Z4fabsDv3_f(<3 x float> %v) nounwind readnone alwaysinline {
+ %1 = tail call <3 x float> @llvm.fabs.v3f32(<3 x float> %v)
+ ret <3 x float> %1
+}
+define <4 x float> @_Z4fabsDv4_f(<4 x float> %v) nounwind readnone alwaysinline {
+ %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %v)
+ ret <4 x float> %1
+}
+
diff --git a/lib/Renderscript/runtime/rs_cl.c b/lib/Renderscript/runtime/rs_cl.c
index b6c2b6a..858161d 100644
--- a/lib/Renderscript/runtime/rs_cl.c
+++ b/lib/Renderscript/runtime/rs_cl.c
@@ -404,8 +404,10 @@
extern float __attribute__((overloadable)) expm1(float);
FN_FUNC_FN(expm1)
-extern float __attribute__((overloadable)) fabs(float);
-FN_FUNC_FN(fabs)
+extern float __attribute__((overloadable)) fabs(float v);
+extern float2 __attribute__((overloadable)) fabs(float2 v);
+extern float3 __attribute__((overloadable)) fabs(float3 v);
+extern float4 __attribute__((overloadable)) fabs(float4 v);
extern float __attribute__((overloadable)) fdim(float, float);
FN_FUNC_FN_FN(fdim)