Remove references/use of ARCH_X86_HAVE_* in frameworks/rs.

Bug: 11048298

These SSE-related defines are unnecessary because all x86 Android devices
have these features. We switch all the checks to __i386__ instead.

This also fixes an issue where non-ARM bitcode files were being created with
the ARM defines present. This is problematic when we then try to do things
that would be arch-specific (i.e. the presence of SSE-enabled functions for
clamp(), ...).

This also cleans up the different target architecture specified in the x86
bitcode files so that they match the platform target (i686-unknown-linux).

Change-Id: I4776bbdce360de26e8a00e05d2cb19341d94a173
diff --git a/driver/runtime/Android.mk b/driver/runtime/Android.mk
index 80fbb1c..39a48c9 100755
--- a/driver/runtime/Android.mk
+++ b/driver/runtime/Android.mk
@@ -42,17 +42,12 @@
     arch/neon.ll \
     arch/clamp.c
 
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
     clcore_x86_files := \
     $(clcore_base_files) \
     arch/generic.c \
-    arch/x86_sse2.ll
-
-    # FIXME: without SSE3, it is still able to get better code through PSHUFD. But,
-    # so far, there is no such device with SSE2 only.
-    ifeq ($(ARCH_X86_HAVE_SSE3), true)
-        clcore_x86_files += arch/x86_sse3.ll
-    endif
+    arch/x86_sse2.ll \
+    arch/x86_sse3.ll
 endif
 
 ifeq "REL" "$(PLATFORM_VERSION_CODENAME)"
@@ -83,9 +78,8 @@
 
 include $(LOCAL_PATH)/build_bc_lib.mk
 
-# Build an optimized version of the library if the device is SSE2- or above
-# capable.
-ifeq ($(ARCH_X86_HAVE_SSE2),true)
+# Build an optimized version of the library for x86 platforms (all have SSE2/3).
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
 include $(CLEAR_VARS)
 LOCAL_MODULE := libclcore_x86.bc
 LOCAL_MODULE_TAGS := optional
diff --git a/driver/runtime/arch/generic.c b/driver/runtime/arch/generic.c
index 50722b1..eeddeaf 100644
--- a/driver/runtime/arch/generic.c
+++ b/driver/runtime/arch/generic.c
@@ -79,7 +79,7 @@
     return r;                                                                       \
 }
 
-#if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
 
 _CLAMP(float);
 
@@ -93,7 +93,7 @@
 extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
 extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
 
-#endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#endif // !defined(__i386__)
 
 _CLAMP(double);
 _CLAMP(char);
diff --git a/driver/runtime/build_bc_lib.mk b/driver/runtime/build_bc_lib.mk
index ab2c17b..d58d45d 100644
--- a/driver/runtime/build_bc_lib.mk
+++ b/driver/runtime/build_bc_lib.mk
@@ -18,10 +18,13 @@
 
 BCC_STRIP_ATTR := $(BUILD_OUT_EXECUTABLES)/bcc_strip_attr$(BUILD_EXECUTABLE_SUFFIX)
 
+bc_clang_cc1_cflags :=
+ifeq ($(TARGET_ARCH),arm)
 # We need to pass the +long64 flag to the underlying version of Clang, since
 # we are generating a library for use with Renderscript (64-bit long type,
 # not 32-bit).
-bc_clang_cc1_cflags := -target-feature +long64
+bc_clang_cc1_cflags += -target-feature +long64
+endif
 bc_translated_clang_cc1_cflags := $(addprefix -Xclang , $(bc_clang_cc1_cflags))
 
 bc_cflags := -MD \
@@ -31,7 +34,7 @@
              -O3 \
              -fno-builtin \
              -emit-llvm \
-             -target armv7-none-linux-gnueabi \
+             -target $(RS_TRIPLE) \
              -fsigned-char \
              $(LOCAL_CFLAGS) \
              $(bc_translated_clang_cc1_cflags)
@@ -41,13 +44,6 @@
 endif
 rs_debug_runtime:=
 
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
-    bc_cflags += -DARCH_X86_HAVE_SSE2
-endif
-ifeq ($(ARCH_X86_HAVE_SSE3), true)
-    bc_cflags += -DARCH_X86_HAVE_SSE3
-endif
-
 c_sources := $(filter %.c,$(LOCAL_SRC_FILES))
 ll_sources := $(filter %.ll,$(LOCAL_SRC_FILES))
 
diff --git a/driver/runtime/rs_cl.c b/driver/runtime/rs_cl.c
index 7e8a574..8da343c 100755
--- a/driver/runtime/rs_cl.c
+++ b/driver/runtime/rs_cl.c
@@ -592,9 +592,9 @@
     return 1.f / sqrt(v);
 }
 
-#if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
 FN_FUNC_FN(sqrt)
-#endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#endif // !defined(__i386__)
 
 FN_FUNC_FN(rsqrt)
 
@@ -902,7 +902,7 @@
     return r;
 }
 
-#if !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
 
 extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
     return lhs * rhs;