Refactor the libbcc runtime for x86 platform
This patch is merged/rebased from AOSP, where it was initially submitted to
frameworks/compile/libbcc by Jun Tian <jun.j.tian@intel.com>. All conflicts
have been resolved.
This patch refactors the libbcc runtime code to support x86 platform.
It removed the redundant x86 code and added the missing functions in
libclcore_x86.bc.
It resolved the RenderScript failures on the x86 platform.
Bug: 9961583
Change-Id: I2c8be0f710960ee5e0614721f5edfbaf028c67e1
diff --git a/driver/runtime/rs_cl.c b/driver/runtime/rs_cl.c
index b7f9158..7e8a574 100755
--- a/driver/runtime/rs_cl.c
+++ b/driver/runtime/rs_cl.c
@@ -591,6 +591,11 @@
extern float __attribute__((overloadable)) rsqrt(float v) {
return 1.f / sqrt(v);
}
+
+#if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+FN_FUNC_FN(sqrt)
+#endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+
FN_FUNC_FN(rsqrt)
extern float __attribute__((overloadable)) sin(float);
@@ -897,11 +902,43 @@
return r;
}
+#if !defined(ARCH_X86_HAVE_SSE3)
+
+extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
+ return lhs * rhs;
+}
+extern float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) {
+ return lhs.x*rhs.x + lhs.y*rhs.y;
+}
+extern float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) {
+ return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
+}
+extern float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) {
+ return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
+}
+
+extern float __attribute__((overloadable)) length(float v) {
+ return fabs(v);
+}
+extern float __attribute__((overloadable)) length(float2 v) {
+ return sqrt(v.x*v.x + v.y*v.y);
+}
+extern float __attribute__((overloadable)) length(float3 v) {
+ return sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
+}
+extern float __attribute__((overloadable)) length(float4 v) {
+ return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
+}
+
+#else
+
extern float __attribute__((overloadable)) length(float v);
extern float __attribute__((overloadable)) length(float2 v);
extern float __attribute__((overloadable)) length(float3 v);
extern float __attribute__((overloadable)) length(float4 v);
+#endif
+
extern float __attribute__((overloadable)) distance(float lhs, float rhs) {
return length(lhs - rhs);
}