Align all allocations to a 16-byte boundary.
This change also fixes an issue in the Blur intrinsic, where we mis-cast a
float array to float4 (and thus encountered some new alignment errors with
the updated LLVM).
Change-Id: I3955b38f156c35f4d160652c75ab416bae09b2c8
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index 0d9fde8..d44b07a 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -272,8 +272,8 @@
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
- float stackbuf[4 * 2048];
- float *buf = &stackbuf[0];
+ float4 stackbuf[2048];
+ float4 *buf = &stackbuf[0];
RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr;
if (!cp->mAlloc.get()) {
ALOGE("Blur executed without input, skipping");
@@ -291,7 +291,7 @@
cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], p->dimX * 16);
cp->mScratchSize[p->lid] = p->dimX;
}
- buf = (float *)cp->mScratch[p->lid];
+ buf = (float4 *)cp->mScratch[p->lid];
}
float4 *fout = (float4 *)buf;
int y = p->y;
@@ -308,20 +308,20 @@
x1 = xstart;
while ((x1 < (uint32_t)cp->mIradius) && (x1 < x2)) {
- OneHU4(p, out, x1, (float4 *)buf, cp->mFp, cp->mIradius);
+ OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
#if defined(ARCH_ARM_HAVE_NEON)
if ((x1 + cp->mIradius) < x2) {
- rsdIntrinsicBlurHFU4_K(out, ((float4 *)buf) - cp->mIradius, cp->mFp,
+ rsdIntrinsicBlurHFU4_K(out, buf - cp->mIradius, cp->mFp,
cp->mIradius * 2 + 1, x1, x2 - cp->mIradius);
out += (x2 - cp->mIradius) - x1;
x1 = x2 - cp->mIradius;
}
#endif
while(x2 > x1) {
- OneHU4(p, out, x1, (float4 *)buf, cp->mFp, cp->mIradius);
+ OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index 2f0c0d8..57e8b6e 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -310,16 +310,16 @@
}
ptr = (uint8_t*)alloc->mHal.state.userProvidedPtr;
} else {
- if (forceZero) {
- ptr = (uint8_t *)calloc(1, allocSize);
- } else {
- ptr = (uint8_t *)malloc(allocSize);
- }
+ // We align all allocations to a 16-byte boundary.
+ ptr = (uint8_t *)memalign(16, allocSize);
if (!ptr) {
alloc->mHal.drv = NULL;
free(drv);
return false;
}
+ if (forceZero) {
+ memset(ptr, 0, allocSize);
+ }
}
// Build the pointer tables
size_t verifySize = AllocationBuildPointerTable(rsc, alloc, alloc->getType(), ptr);