Cleanup type offsets which cannot be calculated for flexible YUV.

Support flexible YUV

bug 10567550

Change-Id: I4f6e5a8d86eeee635605460f1751208f3320969b
(cherry picked from commit a75372759e288be3fb8835735a830b1f7d1a4c42)
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index 4f56443..7546b38 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -107,6 +107,7 @@
 };
 
 extern "C" void rsdIntrinsicYuv_K(void *dst, const uchar *Y, const uchar *uv, uint32_t count, const short *param);
+extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t count, const short *param);
 extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v, uint32_t count, const short *param);
 
 void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p,
@@ -135,91 +136,63 @@
     uint32_t x1 = xstart;
     uint32_t x2 = xend;
 
-    switch (cp->alloc->mHal.state.yuv) {
-    // In API 17 there was no yuv format and the intrinsic treated everything as NV21
-    case 0:
-#if !defined(RS_SERVER)
-    case HAL_PIXEL_FORMAT_YCrCb_420_SP:  // NV21
-#endif
-        {
-            const uchar *pinUV = (const uchar *)cp->alloc->mHal.drvState.lod[1].mallocPtr;
-            size_t strideUV = cp->alloc->mHal.drvState.lod[1].stride;
-            const uchar *uv = pinUV + ((p->y >> 1) * strideUV);
+    const size_t cstep = cp->alloc->mHal.drvState.yuv.step;
 
-            if (pinUV == NULL) {
-                // Legacy yuv support didn't fill in uv
-                strideUV = strideY;
-                uv = ((uint8_t *)cp->alloc->mHal.drvState.lod[0].mallocPtr) +
-                    (strideY * p->dimY) +
-                    ((p->y >> 1) * strideUV);
-            }
+    const uchar *pinU = (const uchar *)cp->alloc->mHal.drvState.lod[1].mallocPtr;
+    const size_t strideU = cp->alloc->mHal.drvState.lod[1].stride;
+    const uchar *u = pinU + ((p->y >> 1) * strideU);
 
-            if(x2 > x1) {
-                if (gArchUseSIMD) {
-            #if defined(ARCH_ARM_HAVE_VFP)
-                    int32_t len = (x2 - x1 - 1) >> 3;
-                    if(len > 0) {
-                        //                    ALOGE("%p, %p, %p, %d, %p", out, Y, uv, len, YuvCoeff);
-                        rsdIntrinsicYuv_K(out, Y, uv, len, YuvCoeff);
-                        x1 += len << 3;
-                        out += len << 3;
-                    }
-            #endif
+    const uchar *pinV = (const uchar *)cp->alloc->mHal.drvState.lod[2].mallocPtr;
+    const size_t strideV = cp->alloc->mHal.drvState.lod[2].stride;
+    const uchar *v = pinV + ((p->y >> 1) * strideV);
+
+    if (pinU == NULL) {
+        // Legacy yuv support didn't fill in uv
+        v = ((uint8_t *)cp->alloc->mHal.drvState.lod[0].mallocPtr) +
+            (strideY * p->dimY) +
+            ((p->y >> 1) * strideY);
+        u = v + 1;
+    }
+
+#if defined(ARCH_ARM_HAVE_VFP)
+    if((x2 > x1) && gArchUseSIMD) {
+        int32_t len = (x2 - x1 - 1) >> 3;
+        if(len > 0) {
+            if (cstep == 1) {
+                rsdIntrinsicYuv2_K(out, Y, u, v, len, YuvCoeff);
+                x1 += len << 3;
+                out += len << 3;
+            } else if (cstep == 2) {
+                // Check for proper interleave
+                intptr_t ipu = (intptr_t)u;
+                intptr_t ipv = (intptr_t)v;
+
+                if (ipu == (ipv + 1)) {
+                    rsdIntrinsicYuv_K(out, Y, v, len, YuvCoeff);
+                    x1 += len << 3;
+                    out += len << 3;
+                } else if (ipu == (ipv - 1)) {
+                    rsdIntrinsicYuvR_K(out, Y, u, len, YuvCoeff);
+                    x1 += len << 3;
+                    out += len << 3;
                 }
 
-               // ALOGE("y %i  %i  %i", p->y, x1, x2);
-                while(x1 < x2) {
-                    uchar u = uv[(x1 & 0xffffe) + 1];
-                    uchar v = uv[(x1 & 0xffffe) + 0];
-                    *out = rsYuvToRGBA_uchar4(Y[x1], u, v);
-                    out++;
-                    x1++;
-                    *out = rsYuvToRGBA_uchar4(Y[x1], u, v);
-                    out++;
-                    x1++;
-                }
             }
         }
-        break;
-
-#if !defined(RS_SERVER)
-    case HAL_PIXEL_FORMAT_YV12:
-        {
-            const uchar *pinU = (const uchar *)cp->alloc->mHal.drvState.lod[1].mallocPtr;
-            const size_t strideU = cp->alloc->mHal.drvState.lod[1].stride;
-            const uchar *u = pinU + ((p->y >> 1) * strideU);
-
-            const uchar *pinV = (const uchar *)cp->alloc->mHal.drvState.lod[2].mallocPtr;
-            const size_t strideV = cp->alloc->mHal.drvState.lod[2].stride;
-            const uchar *v = pinV + ((p->y >> 1) * strideV);
-
-            if(x2 > x1) {
-        #if defined(ARCH_ARM_HAVE_VFP)
-                if (gArchUseSIMD) {
-                    int32_t len = (x2 - x1 - 1) >> 3;
-                    if(len > 0) {
-                        rsdIntrinsicYuv2_K(out, Y, u, v, len, YuvCoeff);
-                        x1 += len << 3;
-                        out += len << 3;
-                    }
-                }
-        #endif
-
-               // ALOGE("y %i  %i  %i", p->y, x1, x2);
-                while(x1 < x2) {
-                    uchar ut = u[x1];
-                    uchar vt = v[x1];
-                    *out = rsYuvToRGBA_uchar4(Y[x1], ut, vt);
-                    out++;
-                    x1++;
-                    *out = rsYuvToRGBA_uchar4(Y[x1], ut, vt);
-                    out++;
-                    x1++;
-                }
-            }
-        }
-        break;
+    }
 #endif
+
+    if(x2 > x1) {
+       // ALOGE("y %i  %i  %i", p->y, x1, x2);
+        while(x1 < x2) {
+            int cx = (x1 >> 1) * cstep;
+            *out = rsYuvToRGBA_uchar4(Y[x1], u[cx], v[cx]);
+            out++;
+            x1++;
+            *out = rsYuvToRGBA_uchar4(Y[x1], u[cx], v[cx]);
+            out++;
+            x1++;
+        }
     }
 
 }