Fix intrinsic bugs.
Change-Id: I027e5dcd8e538e52a21941facc5b93db2a6eac8c
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index d44b07a..1a28aab 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -211,7 +211,7 @@
int t = (x2 - x1) >> 2;
t &= ~1;
if(t) {
- rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, 0, t << 2);
+ rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, 0, t );
len -= t << 2;
ptrIn += t << 2;
out += t << 2;
@@ -345,7 +345,7 @@
float *fout = (float *)buf;
int y = p->y;
- if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius))) {
+ if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius -1))) {
const uchar *pi = pin + (y - cp->mIradius) * stride;
OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, x1, x2);
} else {
diff --git a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
index d36639f..bcd5ffd 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
@@ -62,7 +62,7 @@
rsAssert(slot == 0);
memcpy (&fp, data, dataLength);
for(int ct=0; ct < 25; ct++) {
- ip[ct] = (short)(fp[ct] * 255.f + 0.5f);
+ ip[ct] = (short)(fp[ct] * 255.f);
}
}
@@ -109,7 +109,11 @@
px = clamp(px, 0.f, 255.f);
uchar4 o = {(uchar)px.x, (uchar)px.y, (uchar)px.z, (uchar)px.w};
- *out = o;
+ //if ((out[0].r != o.r) || (out[0].y != o.y) || (out[0].z != o.z) || (out[0].w != o.w)) {
+ //ALOGE("x %i %i,%i,%i,%i %i,%i,%i,%i", x, o.x, o.y, o.z, o.w, out[0].x, out[0].y, out[0].z, out[0].w);
+ //}
+ //o.w = 0xff;
+ out->rgba = o.rgba;
}
extern "C" void rsdIntrinsicConvolve5x5_K(void *dst, const void *y0, const void *y1,
@@ -171,9 +175,9 @@
: RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5) {
mRootPtr = &kernel;
- for(int ct=0; ct < 9; ct++) {
+ for(int ct=0; ct < 25; ct++) {
fp[ct] = 1.f / 25.f;
- ip[ct] = (short)(fp[ct] * 255.f + 0.5f);
+ ip[ct] = (short)(fp[ct] * 255.f);
}
}
diff --git a/cpu_ref/rsCpuIntrinsics_neon.S b/cpu_ref/rsCpuIntrinsics_neon.S
index 76e709e..d62b5a9 100644
--- a/cpu_ref/rsCpuIntrinsics_neon.S
+++ b/cpu_ref/rsCpuIntrinsics_neon.S
@@ -524,6 +524,8 @@
vld1.16 {d0, d1, d2, d3}, [r6]!
vld1.16 {d4, d5, d6}, [r6]
+ vmov.u32 q15, #0x7f
+
/* load the count */
ldr r6, [sp, #32 + 64]
@@ -652,10 +654,13 @@
+ vadd.i32 q4, q4, q15
+ vadd.i32 q5, q5, q15
/* Narrow it to a d-reg 32 -> 16 bit */
- vshrn.i32 d8, q4, #8
- vshrn.i32 d9, q5, #8
+ vrshrn.i32 d8, q4, #8
+ vrshrn.i32 d9, q5, #8
+
/* Pack 16 -> 8 bit, saturate, put two pixels into D reg */
vqmovun.s16 d8, q4