Fix rounding bug of IntrinsicConvolve in cpu_ref

bug 18121051

Change-Id: Ica3e32731a7f78c8869b84e1e257216a52f79f8a
diff --git a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
index 552a835..e5953cf 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
@@ -105,7 +105,7 @@
                 convert_float4(py2[x]) * coeff[7] +
                 convert_float4(py2[x2]) * coeff[8];
 
-    px = clamp(px, 0.f, 255.f);
+    px = clamp(px + 0.5f, 0.f, 255.f);
     uchar4 o = {(uchar)px.x, (uchar)px.y, (uchar)px.z, (uchar)px.w};
     *out = o;
 }
@@ -127,7 +127,7 @@
                 convert_float2(py2[x]) * coeff[7] +
                 convert_float2(py2[x2]) * coeff[8];
 
-    px = clamp(px, 0.f, 255.f);
+    px = clamp(px + 0.5f, 0.f, 255.f);
     *out = convert_uchar2(px);
 }
 
@@ -147,7 +147,7 @@
                ((float)py2[x1]) * coeff[6] +
                ((float)py2[x]) * coeff[7] +
                ((float)py2[x2]) * coeff[8];
-    *out = clamp(px, 0.f, 255.f);
+    *out = clamp(px + 0.5f, 0.f, 255.f);
 }
 
 static void ConvolveOneF4(const RsForEachStubParamStruct *p, uint32_t x, float4 *out,