Neon detection for RS SDK compat lib.
Change-Id: I3887158c7ec97ba116c28dc7b1d0c789b81fae60
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index 4e9470e..5e79169 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -103,8 +103,6 @@
extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
-//#undef ARCH_ARM_HAVE_NEON
-
void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -131,13 +129,15 @@
case BLEND_DST:
break;
case BLEND_SRC_OVER:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSrcOver_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendSrcOver_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -148,13 +148,15 @@
}
break;
case BLEND_DST_OVER:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendDstOver_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendDstOver_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -165,13 +167,15 @@
}
break;
case BLEND_SRC_IN:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSrcIn_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendSrcIn_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -181,13 +185,15 @@
}
break;
case BLEND_DST_IN:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendDstIn_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendDstIn_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -197,13 +203,15 @@
}
break;
case BLEND_SRC_OUT:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSrcOut_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendSrcOut_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -213,13 +221,15 @@
}
break;
case BLEND_DST_OUT:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendDstOut_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendDstOut_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -229,13 +239,15 @@
}
break;
case BLEND_SRC_ATOP:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSrcAtop_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendSrcAtop_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -247,13 +259,15 @@
}
break;
case BLEND_DST_ATOP:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendDstAtop_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendDstAtop_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -265,13 +279,15 @@
}
break;
case BLEND_XOR:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendXor_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendXor_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -287,13 +303,15 @@
rsAssert(false);
break;
case BLEND_MULTIPLY:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendMultiply_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendMultiply_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -378,13 +396,15 @@
rsAssert(false);
break;
case BLEND_ADD:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendAdd_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendAdd_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {
@@ -397,13 +417,15 @@
}
break;
case BLEND_SUBTRACT:
-#if defined(ARCH_ARM_HAVE_NEON)
- if((x1 + 8) < x2) {
- uint32_t len = (x2 - x1) >> 3;
- rsdIntrinsicBlendSub_K(out, in, len);
- x1 += len << 3;
- out += len << 3;
- in += len << 3;
+#if defined(ARCH_ARM_HAVE_VFP)
+ if (gArchUseSIMD) {
+ if((x1 + 8) < x2) {
+ uint32_t len = (x2 - x1) >> 3;
+ rsdIntrinsicBlendSub_K(out, in, len);
+ x1 += len << 3;
+ out += len << 3;
+ in += len << 3;
+ }
}
#endif
for (;x1 < x2; x1++, out++, in++) {