Remove references/use of ARCH_X86_HAVE_*.

Bug: 11048298

These SSE-related defines are unnecessary because all x86 Android devices
have these features. We switch all the checks to __i386__ instead.

This also fixes an issue where non-ARM bitcode files were being created with
the ARM defines present. This is problematic when we then try to do things
that would be arch-specific (i.e. the presence of SSE-enabled functions for
clamp(), ...).

This also cleans up the different target architecture specified in the x86
bitcode files so that they match the platform target (i686-unknown-linux).

Change-Id: Ibbead4aecba2c90bdbe6ff362960079cb7a9e4dc
diff --git a/lib/Renderscript/RSInfo.cpp b/lib/Renderscript/RSInfo.cpp
index 4aa2e69..963ea85 100644
--- a/lib/Renderscript/RSInfo.cpp
+++ b/lib/Renderscript/RSInfo.cpp
@@ -35,7 +35,7 @@
 const char RSInfo::LibRSPath[] = "/system/lib/libRS.so";
 const char RSInfo::LibCLCorePath[] = "/system/lib/libclcore.bc";
 const char RSInfo::LibCLCoreDebugPath[] = "/system/lib/libclcore_debug.bc";
-#if defined(ARCH_X86_HAVE_SSE2)
+#if defined(__i386__)
 const char RSInfo::LibCLCoreX86Path[] = "/system/lib/libclcore_x86.bc";
 #endif
 #if defined(ARCH_ARM_HAVE_NEON)
diff --git a/lib/Renderscript/RSScript.cpp b/lib/Renderscript/RSScript.cpp
index 0f56471..50dbb73 100644
--- a/lib/Renderscript/RSScript.cpp
+++ b/lib/Renderscript/RSScript.cpp
@@ -27,8 +27,8 @@
   BCCContext &context = pScript.getSource().getContext();
   const char* core_lib = RSInfo::LibCLCorePath;
 
-  // SSE2- or above capable devices will use an optimized library.
-#if defined(ARCH_X86_HAVE_SSE2)
+  // x86 devices will use an optimized library.
+#if defined(__i386__)
   core_lib = RSInfo::LibCLCoreX86Path;
 #endif
 
diff --git a/lib/Renderscript/runtime/Android.mk b/lib/Renderscript/runtime/Android.mk
index 0fe2440..e994615 100755
--- a/lib/Renderscript/runtime/Android.mk
+++ b/lib/Renderscript/runtime/Android.mk
@@ -40,17 +40,12 @@
     math.ll \
     arch/neon.ll
 
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
     clcore_x86_files := \
     $(clcore_base_files) \
     arch/generic.c \
-    arch/x86_sse2.ll
-
-    # FIXME: without SSE3, it is still able to get better code through PSHUFD. But,
-    # so far, there is no such device with SSE2 only.
-    ifeq ($(ARCH_X86_HAVE_SSE3), true)
-        clcore_x86_files += arch/x86_sse3.ll
-    endif
+    arch/x86_sse2.ll \
+    arch/x86_sse3.ll
 endif
 
 ifeq "REL" "$(PLATFORM_VERSION_CODENAME)"
@@ -81,9 +76,8 @@
 
 include $(LOCAL_PATH)/build_bc_lib.mk
 
-# Build an optimized version of the library if the device is SSE2- or above
-# capable.
-ifeq ($(ARCH_X86_HAVE_SSE2),true)
+# Build an optimized version of the library for x86 platforms (all have SSE2/3).
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
 include $(CLEAR_VARS)
 LOCAL_MODULE := libclcore_x86.bc
 LOCAL_MODULE_TAGS := optional
diff --git a/lib/Renderscript/runtime/arch/generic.c b/lib/Renderscript/runtime/arch/generic.c
index 986c71e..dd3d768 100644
--- a/lib/Renderscript/runtime/arch/generic.c
+++ b/lib/Renderscript/runtime/arch/generic.c
@@ -27,7 +27,7 @@
 /*
  * CLAMP
  */
-#if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
 
 extern float __attribute__((overloadable)) clamp(float amount, float low, float high) {
     return amount < low ? low : (amount > high ? high : amount);
@@ -91,7 +91,7 @@
 extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
 extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
 
-#endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#endif // !defined(__i386__)
 
 /*
  * FMAX
diff --git a/lib/Renderscript/runtime/arch/x86_sse2.ll b/lib/Renderscript/runtime/arch/x86_sse2.ll
index e4fb035..7b19970 100644
--- a/lib/Renderscript/runtime/arch/x86_sse2.ll
+++ b/lib/Renderscript/runtime/arch/x86_sse2.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-unknown-linux-gnu"
+target triple = "i686-unknown-linux"
 
 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
diff --git a/lib/Renderscript/runtime/arch/x86_sse3.ll b/lib/Renderscript/runtime/arch/x86_sse3.ll
index 5c96daa..73af7fa 100644
--- a/lib/Renderscript/runtime/arch/x86_sse3.ll
+++ b/lib/Renderscript/runtime/arch/x86_sse3.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-unknown-linux-gnu"
+target triple = "i686-unknown-linux"
 
 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
diff --git a/lib/Renderscript/runtime/build_bc_lib.mk b/lib/Renderscript/runtime/build_bc_lib.mk
index 8c0851c..09f9540 100644
--- a/lib/Renderscript/runtime/build_bc_lib.mk
+++ b/lib/Renderscript/runtime/build_bc_lib.mk
@@ -18,10 +18,13 @@
 
 BCC_STRIP_ATTR := $(HOST_OUT_EXECUTABLES)/bcc_strip_attr$(HOST_EXECUTABLE_SUFFIX)
 
+bc_clang_cc1_cflags :=
+ifeq ($(TARGET_ARCH),arm)
 # We need to pass the +long64 flag to the underlying version of Clang, since
 # we are generating a library for use with Renderscript (64-bit long type,
 # not 32-bit).
-bc_clang_cc1_cflags := -target-feature +long64
+bc_clang_cc1_cflags += -target-feature +long64
+endif
 bc_translated_clang_cc1_cflags := $(addprefix -Xclang , $(bc_clang_cc1_cflags))
 
 bc_cflags := -MD \
@@ -31,7 +34,7 @@
              -O3 \
              -fno-builtin \
              -emit-llvm \
-             -target armv7-none-linux-gnueabi \
+             -target $(RS_TRIPLE) \
              -fsigned-char \
              $(bc_translated_clang_cc1_cflags)
 
@@ -40,13 +43,6 @@
 endif
 rs_debug_runtime:=
 
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
-    bc_cflags += -DARCH_X86_HAVE_SSE2
-endif
-ifeq ($(ARCH_X86_HAVE_SSE3), true)
-    bc_cflags += -DARCH_X86_HAVE_SSE3
-endif
-
 c_sources := $(filter %.c,$(LOCAL_SRC_FILES))
 ll_sources := $(filter %.ll,$(LOCAL_SRC_FILES))
 
diff --git a/lib/Renderscript/runtime/rs_cl.c b/lib/Renderscript/runtime/rs_cl.c
index 7e8a574..8da343c 100644
--- a/lib/Renderscript/runtime/rs_cl.c
+++ b/lib/Renderscript/runtime/rs_cl.c
@@ -592,9 +592,9 @@
     return 1.f / sqrt(v);
 }
 
-#if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
 FN_FUNC_FN(sqrt)
-#endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#endif // !defined(__i386__)
 
 FN_FUNC_FN(rsqrt)
 
@@ -902,7 +902,7 @@
     return r;
 }
 
-#if !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
 
 extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
     return lhs * rhs;