am 69b99d3e: Merge "More IP cleanup."
* commit '69b99d3e6891c46fabc19f9f78ea89e8d9133f9c':
More IP cleanup.
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index f3a656d..b2bd3ce 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -289,10 +289,12 @@
if (p->dimX > 2048) {
if ((p->dimX > cp->mScratchSize[p->lid]) || !cp->mScratch[p->lid]) {
- cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], p->dimX * 16);
+ // Pad the side of the allocation by one unit to allow alignment later
+ cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], (p->dimX + 1) * 16);
cp->mScratchSize[p->lid] = p->dimX;
}
- buf = (float4 *)cp->mScratch[p->lid];
+ // realloc only aligns to 8 bytes so we manually align to 16.
+ buf = (float4 *) ((((intptr_t)cp->mScratch[p->lid]) + 15) & ~0xf);
}
float4 *fout = (float4 *)buf;
int y = p->y;
@@ -407,6 +409,8 @@
mScratch = new void *[mCtx->getThreadCount()];
mScratchSize = new size_t[mCtx->getThreadCount()];
+ memset(mScratch, 0, sizeof(void *) * mCtx->getThreadCount());
+ memset(mScratchSize, 0, sizeof(size_t) * mCtx->getThreadCount());
ComputeGaussianWeights();
}
diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp
index e2ac102..cdfe7d1 100644
--- a/cpu_ref/rsCpuIntrinsicHistogram.cpp
+++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp
@@ -58,13 +58,23 @@
static void kernelP1U2(const RsForEachStubParamStruct *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
static void kernelP1U1(const RsForEachStubParamStruct *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
+ static void kernelP1L4(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep);
+ static void kernelP1L3(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep);
+ static void kernelP1L2(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep);
+ static void kernelP1L1(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep);
+
};
}
@@ -92,11 +102,11 @@
uint32_t usrLen, const RsScriptCall *sc) {
const uint32_t threads = mCtx->getThreadCount();
- const uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
+ uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
switch (slot) {
case 0:
- switch(mAllocOut->getType()->getElement()->getVectorSize()) {
+ switch(vSize) {
case 1:
mRootPtr = &kernelP1U1;
break;
@@ -105,6 +115,7 @@
break;
case 3:
mRootPtr = &kernelP1U3;
+ vSize = 4;
break;
case 4:
mRootPtr = &kernelP1U4;
@@ -112,10 +123,23 @@
}
break;
case 1:
- mRootPtr = &kernelP1L;
+ switch(ain->getType()->getElement()->getVectorSize()) {
+ case 1:
+ mRootPtr = &kernelP1L1;
+ break;
+ case 2:
+ mRootPtr = &kernelP1L2;
+ break;
+ case 3:
+ mRootPtr = &kernelP1L3;
+ break;
+ case 4:
+ mRootPtr = &kernelP1L4;
+ break;
+ }
break;
}
- memset(mSums, 0, 256 * 4 * threads * vSize);
+ memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
}
void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain,
@@ -126,10 +150,12 @@
uint32_t threads = mCtx->getThreadCount();
uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
+ if (vSize == 3) vSize = 4;
+
for (uint32_t ct=0; ct < (256 * vSize); ct++) {
o[ct] = mSums[ct];
for (uint32_t t=1; t < threads; t++) {
- o[ct] += mSums[ct + 256 * vSize];
+ o[ct] += mSums[ct + (256 * vSize * t)];
}
}
}
@@ -147,7 +173,7 @@
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
sums[(in[3] << 2) + 3] ++;
- in += 4;
+ in += instep;
}
}
@@ -163,7 +189,7 @@
sums[(in[0] << 2) ] ++;
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
- in += 4;
+ in += instep;
}
}
@@ -176,15 +202,15 @@
int * sums = &cp->mSums[256 * 2 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
- sums[(in[0] << 2) ] ++;
- sums[(in[1] << 2) + 1] ++;
- in += 2;
+ sums[(in[0] << 1) ] ++;
+ sums[(in[1] << 1) + 1] ++;
+ in += instep;
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
uchar *in = (uchar *)p->in;
@@ -195,8 +221,56 @@
(cp->mDotI[1] * in[1]) +
(cp->mDotI[2] * in[2]) +
(cp->mDotI[3] * in[3]);
- sums[t >> 8] ++;
- in += 4;
+ sums[(t + 0x7f) >> 8] ++;
+ in += instep;
+ }
+}
+
+void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
+ uchar *in = (uchar *)p->in;
+ int * sums = &cp->mSums[256 * p->lid];
+
+ for (uint32_t x = xstart; x < xend; x++) {
+ int t = (cp->mDotI[0] * in[0]) +
+ (cp->mDotI[1] * in[1]) +
+ (cp->mDotI[2] * in[2]);
+ sums[(t + 0x7f) >> 8] ++;
+ in += instep;
+ }
+}
+
+void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
+ uchar *in = (uchar *)p->in;
+ int * sums = &cp->mSums[256 * p->lid];
+
+ for (uint32_t x = xstart; x < xend; x++) {
+ int t = (cp->mDotI[0] * in[0]) +
+ (cp->mDotI[1] * in[1]);
+ sums[(t + 0x7f) >> 8] ++;
+ in += instep;
+ }
+}
+
+void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
+ uchar *in = (uchar *)p->in;
+ int * sums = &cp->mSums[256 * p->lid];
+
+ for (uint32_t x = xstart; x < xend; x++) {
+ int t = (cp->mDotI[0] * in[0]);
+ sums[(t + 0x7f) >> 8] ++;
+ in += instep;
}
}
@@ -204,12 +278,20 @@
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
+ uchar *in = (uchar *)p->in;
+ int * sums = &cp->mSums[256 * p->lid];
+
+ for (uint32_t x = xstart; x < xend; x++) {
+ sums[in[0]] ++;
+ in += instep;
+ }
}
RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
const Script *s, const Element *e)
- : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLUR) {
+ : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) {
mRootPtr = NULL;
mSums = new int[256 * 4 * mCtx->getThreadCount()];
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index c956f43..2b69d85 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -24,6 +24,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+ #include <sys/stat.h>
#include <unistd.h>
#else
#include <bcc/BCCContext.h>
@@ -62,6 +63,18 @@
return std::string(buf);
}
+// Check if a path exists and attempt to create it if it doesn't.
+static bool ensureCacheDirExists(const char *path) {
+ if (access(path, R_OK | W_OK | X_OK) == 0) {
+ // Done if we can rwx the directory
+ return true;
+ }
+ if (mkdir(path, 0700) == 0) {
+ return true;
+ }
+ return false;
+}
+
// Attempt to load the shared library from origName, but then fall back to
// creating the symlinked shared library if necessary (to ensure instancing).
// This function returns the dlopen()-ed handle if successful.
@@ -91,9 +104,16 @@
return loaded;
}
- // Construct an appropriately randomized filename for the symlink.
std::string newName(cacheDir);
- newName.append("/com.android.renderscript.cache/librs.");
+ newName.append("/com.android.renderscript.cache/");
+
+ if (!ensureCacheDirExists(newName.c_str())) {
+ ALOGE("Could not verify or create cache dir: %s", cacheDir);
+ return NULL;
+ }
+
+ // Construct an appropriately randomized filename for the symlink.
+ newName.append("librs.");
newName.append(resName);
newName.append("#");
newName.append(getRandomString(6)); // 62^6 potential filename variants.
diff --git a/rsScriptIntrinsic.cpp b/rsScriptIntrinsic.cpp
index 927168a..ab439e6 100644
--- a/rsScriptIntrinsic.cpp
+++ b/rsScriptIntrinsic.cpp
@@ -22,9 +22,13 @@
using namespace android::renderscript;
ScriptIntrinsic::ScriptIntrinsic(Context *rsc) : Script(rsc) {
+ mIntrinsicID = 0;
}
ScriptIntrinsic::~ScriptIntrinsic() {
+ if (mIntrinsicID != 0) {
+ mRSC->mHal.funcs.script.destroy(mRSC, this);
+ }
}
bool ScriptIntrinsic::init(Context *rsc, RsScriptIntrinsicID iid, Element *e) {