Merge "Fix crash running blur on 4K images"
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index f3a656d..b2bd3ce 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -289,10 +289,12 @@
if (p->dimX > 2048) {
if ((p->dimX > cp->mScratchSize[p->lid]) || !cp->mScratch[p->lid]) {
- cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], p->dimX * 16);
+ // Pad the side of the allocation by one unit to allow alignment later
+ cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], (p->dimX + 1) * 16);
cp->mScratchSize[p->lid] = p->dimX;
}
- buf = (float4 *)cp->mScratch[p->lid];
+ // realloc only aligns to 8 bytes so we manually align to 16.
+ buf = (float4 *) ((((intptr_t)cp->mScratch[p->lid]) + 15) & ~0xf);
}
float4 *fout = (float4 *)buf;
int y = p->y;
@@ -407,6 +409,8 @@
mScratch = new void *[mCtx->getThreadCount()];
mScratchSize = new size_t[mCtx->getThreadCount()];
+ memset(mScratch, 0, sizeof(void *) * mCtx->getThreadCount());
+ memset(mScratchSize, 0, sizeof(size_t) * mCtx->getThreadCount());
ComputeGaussianWeights();
}
diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp
index e2ac102..755e8d0 100644
--- a/cpu_ref/rsCpuIntrinsicHistogram.cpp
+++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp
@@ -58,13 +58,23 @@
static void kernelP1U2(const RsForEachStubParamStruct *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
static void kernelP1U1(const RsForEachStubParamStruct *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
+ static void kernelP1L4(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep);
+ static void kernelP1L3(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep);
+ static void kernelP1L2(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep);
+ static void kernelP1L1(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep);
+
};
}
@@ -92,11 +102,11 @@
uint32_t usrLen, const RsScriptCall *sc) {
const uint32_t threads = mCtx->getThreadCount();
- const uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
+ uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
switch (slot) {
case 0:
- switch(mAllocOut->getType()->getElement()->getVectorSize()) {
+ switch(vSize) {
case 1:
mRootPtr = &kernelP1U1;
break;
@@ -105,6 +115,7 @@
break;
case 3:
mRootPtr = &kernelP1U3;
+ vSize = 4;
break;
case 4:
mRootPtr = &kernelP1U4;
@@ -112,10 +123,23 @@
}
break;
case 1:
- mRootPtr = &kernelP1L;
+ switch(ain->getType()->getElement()->getVectorSize()) {
+ case 1:
+ mRootPtr = &kernelP1L1;
+ break;
+ case 2:
+ mRootPtr = &kernelP1L2;
+ break;
+ case 3:
+ mRootPtr = &kernelP1L3;
+ break;
+ case 4:
+ mRootPtr = &kernelP1L4;
+ break;
+ }
break;
}
- memset(mSums, 0, 256 * 4 * threads * vSize);
+ memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
}
void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain,
@@ -126,6 +150,8 @@
uint32_t threads = mCtx->getThreadCount();
uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
+ if (vSize == 3) vSize = 4;
+
for (uint32_t ct=0; ct < (256 * vSize); ct++) {
o[ct] = mSums[ct];
for (uint32_t t=1; t < threads; t++) {
@@ -147,7 +173,7 @@
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
sums[(in[3] << 2) + 3] ++;
- in += 4;
+ in += instep;
}
}
@@ -163,7 +189,7 @@
sums[(in[0] << 2) ] ++;
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
- in += 4;
+ in += instep;
}
}
@@ -176,15 +202,15 @@
int * sums = &cp->mSums[256 * 2 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
- sums[(in[0] << 2) ] ++;
- sums[(in[1] << 2) + 1] ++;
- in += 2;
+ sums[(in[0] << 1) ] ++;
+ sums[(in[1] << 1) + 1] ++;
+ in += instep;
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
uchar *in = (uchar *)p->in;
@@ -195,8 +221,56 @@
(cp->mDotI[1] * in[1]) +
(cp->mDotI[2] * in[2]) +
(cp->mDotI[3] * in[3]);
- sums[t >> 8] ++;
- in += 4;
+ sums[(t + 0x7f) >> 8] ++;
+ in += instep;
+ }
+}
+
+void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
+ uchar *in = (uchar *)p->in;
+ int * sums = &cp->mSums[256 * p->lid];
+
+ for (uint32_t x = xstart; x < xend; x++) {
+ int t = (cp->mDotI[0] * in[0]) +
+ (cp->mDotI[1] * in[1]) +
+ (cp->mDotI[2] * in[2]);
+ sums[(t + 0x7f) >> 8] ++;
+ in += instep;
+ }
+}
+
+void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
+ uchar *in = (uchar *)p->in;
+ int * sums = &cp->mSums[256 * p->lid];
+
+ for (uint32_t x = xstart; x < xend; x++) {
+ int t = (cp->mDotI[0] * in[0]) +
+ (cp->mDotI[1] * in[1]);
+ sums[(t + 0x7f) >> 8] ++;
+ in += instep;
+ }
+}
+
+void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
+ uchar *in = (uchar *)p->in;
+ int * sums = &cp->mSums[256 * p->lid];
+
+ for (uint32_t x = xstart; x < xend; x++) {
+ int t = (cp->mDotI[0] * in[0]);
+ sums[(t + 0x7f) >> 8] ++;
+ in += instep;
}
}
@@ -204,6 +278,14 @@
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
+ uchar *in = (uchar *)p->in;
+ int * sums = &cp->mSums[256 * p->lid];
+
+ for (uint32_t x = xstart; x < xend; x++) {
+ sums[in[0]] ++;
+ in += instep;
+ }
}
diff --git a/rsScriptIntrinsic.cpp b/rsScriptIntrinsic.cpp
index 927168a..ab439e6 100644
--- a/rsScriptIntrinsic.cpp
+++ b/rsScriptIntrinsic.cpp
@@ -22,9 +22,13 @@
using namespace android::renderscript;
ScriptIntrinsic::ScriptIntrinsic(Context *rsc) : Script(rsc) {
+ mIntrinsicID = 0;
}
ScriptIntrinsic::~ScriptIntrinsic() {
+ if (mIntrinsicID != 0) {
+ mRSC->mHal.funcs.script.destroy(mRSC, this);
+ }
}
bool ScriptIntrinsic::init(Context *rsc, RsScriptIntrinsicID iid, Element *e) {