Remove references/use of ARCH_X86_HAVE_* in frameworks/rs.
Bug: 11048298
These SSE-related defines are unnecessary because all x86 Android devices
have these features. We switch all the checks to __i386__ instead.
This also fixes an issue where non-ARM bitcode files were being created with
the ARM defines present. This is problematic when we then try to do things
that would be arch-specific (i.e. the presence of SSE-enabled functions for
clamp(), ...).
This also cleans up the different target architecture specified in the x86
bitcode files so that they match the platform target (i686-unknown-linux).
Change-Id: I4776bbdce360de26e8a00e05d2cb19341d94a173
diff --git a/driver/runtime/Android.mk b/driver/runtime/Android.mk
index 80fbb1c..39a48c9 100755
--- a/driver/runtime/Android.mk
+++ b/driver/runtime/Android.mk
@@ -42,17 +42,12 @@
arch/neon.ll \
arch/clamp.c
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
clcore_x86_files := \
$(clcore_base_files) \
arch/generic.c \
- arch/x86_sse2.ll
-
- # FIXME: without SSE3, it is still able to get better code through PSHUFD. But,
- # so far, there is no such device with SSE2 only.
- ifeq ($(ARCH_X86_HAVE_SSE3), true)
- clcore_x86_files += arch/x86_sse3.ll
- endif
+ arch/x86_sse2.ll \
+ arch/x86_sse3.ll
endif
ifeq "REL" "$(PLATFORM_VERSION_CODENAME)"
@@ -83,9 +78,8 @@
include $(LOCAL_PATH)/build_bc_lib.mk
-# Build an optimized version of the library if the device is SSE2- or above
-# capable.
-ifeq ($(ARCH_X86_HAVE_SSE2),true)
+# Build an optimized version of the library for x86 platforms (all have SSE2/3).
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
include $(CLEAR_VARS)
LOCAL_MODULE := libclcore_x86.bc
LOCAL_MODULE_TAGS := optional
diff --git a/driver/runtime/arch/generic.c b/driver/runtime/arch/generic.c
index 50722b1..eeddeaf 100644
--- a/driver/runtime/arch/generic.c
+++ b/driver/runtime/arch/generic.c
@@ -79,7 +79,7 @@
return r; \
}
-#if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
_CLAMP(float);
@@ -93,7 +93,7 @@
extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
-#endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#endif // !defined(__i386__)
_CLAMP(double);
_CLAMP(char);
diff --git a/driver/runtime/build_bc_lib.mk b/driver/runtime/build_bc_lib.mk
index ab2c17b..d58d45d 100644
--- a/driver/runtime/build_bc_lib.mk
+++ b/driver/runtime/build_bc_lib.mk
@@ -18,10 +18,13 @@
BCC_STRIP_ATTR := $(BUILD_OUT_EXECUTABLES)/bcc_strip_attr$(BUILD_EXECUTABLE_SUFFIX)
+bc_clang_cc1_cflags :=
+ifeq ($(TARGET_ARCH),arm)
# We need to pass the +long64 flag to the underlying version of Clang, since
# we are generating a library for use with Renderscript (64-bit long type,
# not 32-bit).
-bc_clang_cc1_cflags := -target-feature +long64
+bc_clang_cc1_cflags += -target-feature +long64
+endif
bc_translated_clang_cc1_cflags := $(addprefix -Xclang , $(bc_clang_cc1_cflags))
bc_cflags := -MD \
@@ -31,7 +34,7 @@
-O3 \
-fno-builtin \
-emit-llvm \
- -target armv7-none-linux-gnueabi \
+ -target $(RS_TRIPLE) \
-fsigned-char \
$(LOCAL_CFLAGS) \
$(bc_translated_clang_cc1_cflags)
@@ -41,13 +44,6 @@
endif
rs_debug_runtime:=
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
- bc_cflags += -DARCH_X86_HAVE_SSE2
-endif
-ifeq ($(ARCH_X86_HAVE_SSE3), true)
- bc_cflags += -DARCH_X86_HAVE_SSE3
-endif
-
c_sources := $(filter %.c,$(LOCAL_SRC_FILES))
ll_sources := $(filter %.ll,$(LOCAL_SRC_FILES))
diff --git a/driver/runtime/rs_cl.c b/driver/runtime/rs_cl.c
index 7e8a574..8da343c 100755
--- a/driver/runtime/rs_cl.c
+++ b/driver/runtime/rs_cl.c
@@ -592,9 +592,9 @@
return 1.f / sqrt(v);
}
-#if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
FN_FUNC_FN(sqrt)
-#endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#endif // !defined(__i386__)
FN_FUNC_FN(rsqrt)
@@ -902,7 +902,7 @@
return r;
}
-#if !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
return lhs * rhs;