Fix performance regression in color matrix.

Previous bug fix which added "len &= 3" was buggy.

Change-Id: I32dba6b340ced35a7686243c5d9c468b5ade1ad6
diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
index 90ce5f8..aec45c3 100644
--- a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
+++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
@@ -881,7 +881,7 @@
         if (gArchUseSIMD) {
             if((cp->mOptKernel != NULL) && (len >= 4)) {
                 cp->mOptKernel(out, in, cp->ip, len >> 2);
-                len &= 3;
+                len &= ~3;
                 x1 += len;
                 out += outstep * len;
                 in += instep * len;