[RenderScript] Add optimized ASIMD and SSE3 instrinsics to support lib.
Bug: 28388236
- Add ASIMD & SEE3 Intrinsics to support lib cpu driver.
On arm64, average perf diff is 3X.
Change-Id: Ic344da4ca97059c4aee209510c657d672e3e9796
diff --git a/v8/renderscript/rs_support/Android.mk b/v8/renderscript/rs_support/Android.mk
index dbac81c..de8fae0 100644
--- a/v8/renderscript/rs_support/Android.mk
+++ b/v8/renderscript/rs_support/Android.mk
@@ -142,18 +142,34 @@
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
LOCAL_CFLAGS_arm := -DARCH_ARM_HAVE_VFP -DARCH_ARM_USE_INTRINSICS
LOCAL_ASFLAGS_arm := -mfpu=neon
-# Clang does not support nested .irp in *_Blur.S
-LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
LOCAL_SRC_FILES_arm := \
- cpu_ref/rsCpuIntrinsics_neon_3DLUT.S \
- cpu_ref/rsCpuIntrinsics_neon_ColorMatrix.S \
- cpu_ref/rsCpuIntrinsics_neon_Blend.S \
- cpu_ref/rsCpuIntrinsics_neon_Blur.S \
- cpu_ref/rsCpuIntrinsics_neon_Convolve.S \
- cpu_ref/rsCpuIntrinsics_neon_Resize.S \
- cpu_ref/rsCpuIntrinsics_neon_YuvToRGB.S
+ cpu_ref/rsCpuIntrinsics_neon_3DLUT.S \
+ cpu_ref/rsCpuIntrinsics_neon_Blend.S \
+ cpu_ref/rsCpuIntrinsics_neon_Blur.S \
+ cpu_ref/rsCpuIntrinsics_neon_ColorMatrix.S \
+ cpu_ref/rsCpuIntrinsics_neon_Convolve.S \
+ cpu_ref/rsCpuIntrinsics_neon_Resize.S \
+ cpu_ref/rsCpuIntrinsics_neon_YuvToRGB.S
endif
+LOCAL_CFLAGS_arm64 += \
+ -DARCH_ARM_USE_INTRINSICS \
+ -DARCH_ARM64_USE_INTRINSICS \
+ -DARCH_ARM64_HAVE_NEON
+LOCAL_SRC_FILES_arm64 += \
+ cpu_ref/rsCpuIntrinsics_advsimd_3DLUT.S \
+ cpu_ref/rsCpuIntrinsics_advsimd_Blend.S \
+ cpu_ref/rsCpuIntrinsics_advsimd_Blur.S \
+ cpu_ref/rsCpuIntrinsics_advsimd_ColorMatrix.S \
+ cpu_ref/rsCpuIntrinsics_advsimd_Convolve.S \
+ cpu_ref/rsCpuIntrinsics_advsimd_Resize.S \
+ cpu_ref/rsCpuIntrinsics_advsimd_YuvToRGB.S
+
+LOCAL_CFLAGS_x86 += -DARCH_X86_HAVE_SSSE3
+LOCAL_SRC_FILES_x86 += cpu_ref/rsCpuIntrinsics_x86.cpp
+LOCAL_CFLAGS_x86_64 += -DARCH_X86_HAVE_SSSE3
+LOCAL_SRC_FILES_x86_64 += cpu_ref/rsCpuIntrinsics_x86.cpp
+
LOCAL_REQUIRED_MODULES := libblasV8
LOCAL_STATIC_LIBRARIES := libbnnmlowpV8
LOCAL_LDFLAGS += -llog -ldl -Wl,--exclude-libs,libc++_static.a