am 4283f579: Fix rounding bug of IntrinsicConvolve in cpu_ref

* commit '4283f579c424f07bc07c7f075398053eed3f8281':
  Fix rounding bug of IntrinsicConvolve in cpu_ref
diff --git a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
index f9b70cc..e3fa245 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
@@ -105,7 +105,7 @@
                 convert_float4(py2[x]) * coeff[7] +
                 convert_float4(py2[x2]) * coeff[8];
 
-    px = clamp(px, 0.f, 255.f);
+    px = clamp(px + 0.5f, 0.f, 255.f);
     uchar4 o = {(uchar)px.x, (uchar)px.y, (uchar)px.z, (uchar)px.w};
     *out = o;
 }
@@ -127,7 +127,7 @@
                 convert_float2(py2[x]) * coeff[7] +
                 convert_float2(py2[x2]) * coeff[8];
 
-    px = clamp(px, 0.f, 255.f);
+    px = clamp(px + 0.5f, 0.f, 255.f);
     *out = convert_uchar2(px);
 }
 
@@ -147,7 +147,7 @@
                ((float)py2[x1]) * coeff[6] +
                ((float)py2[x]) * coeff[7] +
                ((float)py2[x2]) * coeff[8];
-    *out = clamp(px, 0.f, 255.f);
+    *out = clamp(px + 0.5f, 0.f, 255.f);
 }
 
 static void ConvolveOneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
diff --git a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
index 815badf..e591e44 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
@@ -125,7 +125,7 @@
                 convert_float4(py4[x2]) * coeff[22] +
                 convert_float4(py4[x3]) * coeff[23] +
                 convert_float4(py4[x4]) * coeff[24];
-    px = clamp(px, 0.f, 255.f);
+    px = clamp(px + 0.5f, 0.f, 255.f);
     *out = convert_uchar4(px);
 }
 
@@ -168,7 +168,7 @@
                 convert_float2(py4[x2]) * coeff[22] +
                 convert_float2(py4[x3]) * coeff[23] +
                 convert_float2(py4[x4]) * coeff[24];
-    px = clamp(px, 0.f, 255.f);
+    px = clamp(px + 0.5f, 0.f, 255.f);
     *out = convert_uchar2(px);
 }
 
@@ -211,7 +211,7 @@
                (float)(py4[x2]) * coeff[22] +
                (float)(py4[x3]) * coeff[23] +
                (float)(py4[x4]) * coeff[24];
-    px = clamp(px, 0.f, 255.f);
+    px = clamp(px + 0.5f, 0.f, 255.f);
     *out = px;
 }