am 47be3b30: merge in klp-release (no-op)

* commit '47be3b30b679504c8d9d4d474b7de290c68ec6ca':
  The NEON yuv asm code may overrun the buffer
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index 03fdcba..fb55d30 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -43,10 +43,6 @@
     LOCAL_ASFLAGS := -mfpu=neon
 endif
 
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
-    LOCAL_CFLAGS += -DARCH_X86_HAVE_SSE2
-endif
-
 LOCAL_SHARED_LIBRARIES += libRS libcutils libutils liblog libsync
 LOCAL_SHARED_LIBRARIES += libbcc libbcinfo
 
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index 0669326..c956f43 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -396,8 +396,8 @@
         return false;
     }
 
-#if defined(ARCH_X86_HAVE_SSE2)
-    // SSE2- or above capable devices will use an optimized library.
+#if defined(__i386__)
+    // x86 devices will use an optimized library.
     core_lib = bcc::RSInfo::LibCLCoreX86Path;
 #endif
 
diff --git a/driver/runtime/Android.mk b/driver/runtime/Android.mk
index 80fbb1c..39a48c9 100755
--- a/driver/runtime/Android.mk
+++ b/driver/runtime/Android.mk
@@ -42,17 +42,12 @@
     arch/neon.ll \
     arch/clamp.c
 
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
     clcore_x86_files := \
     $(clcore_base_files) \
     arch/generic.c \
-    arch/x86_sse2.ll
-
-    # FIXME: without SSE3, it is still able to get better code through PSHUFD. But,
-    # so far, there is no such device with SSE2 only.
-    ifeq ($(ARCH_X86_HAVE_SSE3), true)
-        clcore_x86_files += arch/x86_sse3.ll
-    endif
+    arch/x86_sse2.ll \
+    arch/x86_sse3.ll
 endif
 
 ifeq "REL" "$(PLATFORM_VERSION_CODENAME)"
@@ -83,9 +78,8 @@
 
 include $(LOCAL_PATH)/build_bc_lib.mk
 
-# Build an optimized version of the library if the device is SSE2- or above
-# capable.
-ifeq ($(ARCH_X86_HAVE_SSE2),true)
+# Build an optimized version of the library for x86 platforms (all have SSE2/3).
+ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),x86 x86_64))
 include $(CLEAR_VARS)
 LOCAL_MODULE := libclcore_x86.bc
 LOCAL_MODULE_TAGS := optional
diff --git a/driver/runtime/arch/generic.c b/driver/runtime/arch/generic.c
index 50722b1..eeddeaf 100644
--- a/driver/runtime/arch/generic.c
+++ b/driver/runtime/arch/generic.c
@@ -79,7 +79,7 @@
     return r;                                                                       \
 }
 
-#if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
 
 _CLAMP(float);
 
@@ -93,7 +93,7 @@
 extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
 extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
 
-#endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#endif // !defined(__i386__)
 
 _CLAMP(double);
 _CLAMP(char);
diff --git a/driver/runtime/arch/x86_sse2.ll b/driver/runtime/arch/x86_sse2.ll
index e4fb035..7b19970 100644
--- a/driver/runtime/arch/x86_sse2.ll
+++ b/driver/runtime/arch/x86_sse2.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-unknown-linux-gnu"
+target triple = "i686-unknown-linux"
 
 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
diff --git a/driver/runtime/arch/x86_sse3.ll b/driver/runtime/arch/x86_sse3.ll
index 5c96daa..73af7fa 100644
--- a/driver/runtime/arch/x86_sse3.ll
+++ b/driver/runtime/arch/x86_sse3.ll
@@ -1,5 +1,5 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-unknown-linux-gnu"
+target triple = "i686-unknown-linux"
 
 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
diff --git a/driver/runtime/build_bc_lib.mk b/driver/runtime/build_bc_lib.mk
index ab2c17b..d58d45d 100644
--- a/driver/runtime/build_bc_lib.mk
+++ b/driver/runtime/build_bc_lib.mk
@@ -18,10 +18,13 @@
 
 BCC_STRIP_ATTR := $(BUILD_OUT_EXECUTABLES)/bcc_strip_attr$(BUILD_EXECUTABLE_SUFFIX)
 
+bc_clang_cc1_cflags :=
+ifeq ($(TARGET_ARCH),arm)
 # We need to pass the +long64 flag to the underlying version of Clang, since
 # we are generating a library for use with Renderscript (64-bit long type,
 # not 32-bit).
-bc_clang_cc1_cflags := -target-feature +long64
+bc_clang_cc1_cflags += -target-feature +long64
+endif
 bc_translated_clang_cc1_cflags := $(addprefix -Xclang , $(bc_clang_cc1_cflags))
 
 bc_cflags := -MD \
@@ -31,7 +34,7 @@
              -O3 \
              -fno-builtin \
              -emit-llvm \
-             -target armv7-none-linux-gnueabi \
+             -target $(RS_TRIPLE) \
              -fsigned-char \
              $(LOCAL_CFLAGS) \
              $(bc_translated_clang_cc1_cflags)
@@ -41,13 +44,6 @@
 endif
 rs_debug_runtime:=
 
-ifeq ($(ARCH_X86_HAVE_SSE2), true)
-    bc_cflags += -DARCH_X86_HAVE_SSE2
-endif
-ifeq ($(ARCH_X86_HAVE_SSE3), true)
-    bc_cflags += -DARCH_X86_HAVE_SSE3
-endif
-
 c_sources := $(filter %.c,$(LOCAL_SRC_FILES))
 ll_sources := $(filter %.ll,$(LOCAL_SRC_FILES))
 
diff --git a/driver/runtime/rs_cl.c b/driver/runtime/rs_cl.c
index 7e8a574..8da343c 100755
--- a/driver/runtime/rs_cl.c
+++ b/driver/runtime/rs_cl.c
@@ -592,9 +592,9 @@
     return 1.f / sqrt(v);
 }
 
-#if !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
 FN_FUNC_FN(sqrt)
-#endif // !defined(ARCH_X86_HAVE_SSE2) && !defined(ARCH_X86_HAVE_SSE3)
+#endif // !defined(__i386__)
 
 FN_FUNC_FN(rsqrt)
 
@@ -902,7 +902,7 @@
     return r;
 }
 
-#if !defined(ARCH_X86_HAVE_SSE3)
+#if !defined(__i386__)
 
 extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
     return lhs * rhs;