Enable ARM64 intrinsics.

This also moves ARM intrinsic ifdefs behing ARCH_ARM_USE_INTRINSICS instead of ARCH_ARM_HAVE_VFP.

Change-Id: I48d3d55c77feb931e22288828247e281db43d32b
diff --git a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
index 0d7a86b..552a835 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
@@ -211,7 +211,7 @@
     }
 
     if(x2 > x1) {
-#if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_X86_HAVE_SSSE3)
+#if defined(ARCH_ARM_USE_INTRINSICS) || defined(ARCH_X86_HAVE_SSSE3)
         if (gArchUseSIMD) {
             int32_t len = (x2 - x1 - 1) >> 1;
             if(len > 0) {