Merge script groups.
Change-Id: Id5baf5e7c59a004127250fced91b6b33b1fe053b
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index c78508c..b814ada 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -176,6 +176,7 @@
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
RsForEachStubParamStruct p;
memcpy(&p, &mtls->fep, sizeof(p));
+ p.lid = idx;
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
uint32_t sig = mtls->sig;
@@ -222,6 +223,7 @@
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
RsForEachStubParamStruct p;
memcpy(&p, &mtls->fep, sizeof(p));
+ p.lid = idx;
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
uint32_t sig = mtls->sig;
@@ -328,8 +330,7 @@
}
void rsdScriptLaunchThreads(const Context *rsc,
- Script *s,
- uint32_t slot,
+ bool isThreadable,
const Allocation * ain,
Allocation * aout,
const void * usr,
@@ -337,11 +338,10 @@
const RsScriptCall *sc,
MTLaunchStruct *mtls) {
- Script * oldTLS = setTLS(s);
Context *mrsc = (Context *)rsc;
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
- if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) {
+ if ((dc->mWorkers.mCount > 1) && isThreadable && !dc->mInForEach) {
dc->mInForEach = true;
if (mtls->fep.dimY > 1) {
mtls->mSliceSize = mtls->fep.dimY / (dc->mWorkers.mCount * 4);
@@ -382,8 +382,6 @@
}
}
}
-
- setTLS(oldTLS);
}
void rsdScriptInvokeForEach(const Context *rsc,
@@ -415,7 +413,9 @@
}
- rsdScriptLaunchThreads(rsc, s, slot, ain, aout, usr, usrLen, sc, &mtls);
+ Script * oldTLS = setTLS(s);
+ rsdScriptLaunchThreads(rsc, s->mHal.info.isThreadable, ain, aout, usr, usrLen, sc, &mtls);
+ setTLS(oldTLS);
}
diff --git a/driver/rsdBcc.h b/driver/rsdBcc.h
index ab62f14..4a42eb5 100644
--- a/driver/rsdBcc.h
+++ b/driver/rsdBcc.h
@@ -161,8 +161,7 @@
} MTLaunchStruct;
void rsdScriptLaunchThreads(const android::renderscript::Context *rsc,
- android::renderscript::Script *s,
- uint32_t slot,
+ bool isThreadable,
const android::renderscript::Allocation * ain,
android::renderscript::Allocation * aout,
const void * usr,
diff --git a/driver/rsdScriptGroup.cpp b/driver/rsdScriptGroup.cpp
index b19678d..f4f0f1c 100644
--- a/driver/rsdScriptGroup.cpp
+++ b/driver/rsdScriptGroup.cpp
@@ -25,6 +25,7 @@
#include "rsScriptGroup.h"
#include "rsdScriptGroup.h"
#include "rsdBcc.h"
+#include "rsdAllocation.h"
using namespace android;
using namespace android::renderscript;
@@ -47,12 +48,88 @@
android::renderscript::Allocation *) {
}
+struct ScriptList {
+ size_t count;
+ Allocation *const* ins;
+ bool const* inExts;
+ Allocation *const* outs;
+ bool const* outExts;
+ const void *const* usrPtrs;
+ size_t const *usrSizes;
+ uint32_t const *sigs;
+ const void *const* fnPtrs;
+
+ const ScriptKernelID *const* kernels;
+};
+
+typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep);
+
+static void ScriptGroupRoot(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+
+ const ScriptList *sl = (const ScriptList *)p->usr;
+ RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
+ const void *oldUsr = p->usr;
+
+ for(size_t ct=0; ct < sl->count; ct++) {
+ ScriptGroupRootFunc_t func;
+ func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
+ mp->usr = sl->usrPtrs[ct];
+
+ mp->ptrIn = NULL;
+ mp->in = NULL;
+ mp->ptrOut = NULL;
+ mp->out = NULL;
+
+ if (sl->ins[ct]) {
+ DrvAllocation *drv = (DrvAllocation *)sl->ins[ct]->mHal.drv;
+ mp->ptrIn = (const uint8_t *)drv->lod[0].mallocPtr;
+ mp->in = mp->ptrIn;
+ if (sl->inExts[ct]) {
+ mp->in = mp->ptrIn + drv->lod[0].stride * p->y;
+ } else {
+ if (drv->lod[0].dimY > p->lid) {
+ mp->in = mp->ptrIn + drv->lod[0].stride * p->lid;
+ }
+ }
+ }
+
+ if (sl->outs[ct]) {
+ DrvAllocation *drv = (DrvAllocation *)sl->outs[ct]->mHal.drv;
+ mp->ptrOut = (uint8_t *)drv->lod[0].mallocPtr;
+ mp->out = mp->ptrOut;
+ if (sl->outExts[ct]) {
+ mp->out = mp->ptrOut + drv->lod[0].stride * p->y;
+ } else {
+ if (drv->lod[0].dimY > p->lid) {
+ mp->out = mp->ptrOut + drv->lod[0].stride * p->lid;
+ }
+ }
+ }
+
+ //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
+ func(p, xstart, xend, instep, outstep);
+ }
+ //ALOGE("script group root");
+
+ //ConvolveParams *cp = (ConvolveParams *)p->usr;
+
+ mp->usr = oldUsr;
+}
+
+
void rsdScriptGroupExecute(const android::renderscript::Context *rsc,
const android::renderscript::ScriptGroup *sg) {
Vector<Allocation *> ins;
+ Vector<bool> inExts;
Vector<Allocation *> outs;
+ Vector<bool> outExts;
Vector<const ScriptKernelID *> kernels;
+ bool fieldDep = false;
for (size_t ct=0; ct < sg->mNodes.size(); ct++) {
ScriptGroup::Node *n = sg->mNodes[ct];
@@ -71,6 +148,8 @@
const ScriptKernelID *k = n->mKernels[ct2];
Allocation *ain = NULL;
Allocation *aout = NULL;
+ bool inExt = false;
+ bool outExt = false;
for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
if (n->mInputs[ct3]->mDstKernel.get() == k) {
@@ -81,6 +160,7 @@
for (size_t ct3=0; ct3 < sg->mInputs.size(); ct3++) {
if (sg->mInputs[ct3]->mKernel == k) {
ain = sg->mInputs[ct3]->mAlloc.get();
+ inExt = true;
//ALOGE(" io in %p", ain);
}
}
@@ -88,12 +168,16 @@
for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
if (n->mOutputs[ct3]->mSource.get() == k) {
aout = n->mOutputs[ct3]->mAlloc.get();
+ if(n->mOutputs[ct3]->mDstField.get() != NULL) {
+ fieldDep = true;
+ }
//ALOGE(" link out %p", aout);
}
}
for (size_t ct3=0; ct3 < sg->mOutputs.size(); ct3++) {
if (sg->mOutputs[ct3]->mKernel == k) {
aout = sg->mOutputs[ct3]->mAlloc.get();
+ outExt = true;
//ALOGE(" io out %p", aout);
}
}
@@ -101,7 +185,9 @@
if ((k->mHasKernelOutput == (aout != NULL)) &&
(k->mHasKernelInput == (ain != NULL))) {
ins.add(ain);
+ inExts.add(inExt);
outs.add(aout);
+ outExts.add(outExt);
kernels.add(k);
}
}
@@ -110,33 +196,65 @@
RsdHal * dc = (RsdHal *)rsc->mHal.drv;
MTLaunchStruct mtls;
- for (size_t ct=0; ct < ins.size(); ct++) {
- Script *s = kernels[ct]->mScript;
- DrvScript *drv = (DrvScript *)s->mHal.drv;
- uint32_t slot = kernels[ct]->mSlot;
+ if(fieldDep) {
+ for (size_t ct=0; ct < ins.size(); ct++) {
+ Script *s = kernels[ct]->mScript;
+ DrvScript *drv = (DrvScript *)s->mHal.drv;
+ uint32_t slot = kernels[ct]->mSlot;
- rsdScriptInvokeForEachMtlsSetup(rsc, ins[ct], outs[ct], NULL, 0, NULL, &mtls);
- mtls.script = s;
- mtls.fep.slot = slot;
+ rsdScriptInvokeForEachMtlsSetup(rsc, ins[ct], outs[ct], NULL, 0, NULL, &mtls);
+ mtls.script = s;
- if (drv->mIntrinsicID) {
- mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root;
- mtls.fep.usr = drv->mIntrinsicData;
- } else {
- mtls.kernel = reinterpret_cast<ForEachFunc_t>(
- drv->mExecutable->getExportForeachFuncAddrs()[slot]);
- rsAssert(mtls.kernel != NULL);
- mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
+ if (drv->mIntrinsicID) {
+ mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root;
+ mtls.fep.usr = drv->mIntrinsicData;
+ } else {
+ mtls.kernel = reinterpret_cast<ForEachFunc_t>(
+ drv->mExecutable->getExportForeachFuncAddrs()[slot]);
+ rsAssert(mtls.kernel != NULL);
+ mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
+ }
+
+ rsdScriptLaunchThreads(rsc, s->mHal.info.isThreadable, ins[ct], outs[ct],
+ NULL, 0, NULL, &mtls);
}
+ } else {
+ ScriptList sl;
+ sl.ins = ins.array();
+ sl.outs = outs.array();
+ sl.kernels = kernels.array();
+ sl.count = kernels.size();
-// typedef void (*outer_foreach_t)(
- // const android::renderscript::RsForEachStubParamStruct *,
- // uint32_t x1, uint32_t x2,
- // uint32_t instep, uint32_t outstep);
- //outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+ Vector<const void *> usrPtrs;
+ Vector<const void *> fnPtrs;
+ Vector<uint32_t> sigs;
+ for (size_t ct=0; ct < kernels.size(); ct++) {
+ Script *s = kernels[ct]->mScript;
+ DrvScript *drv = (DrvScript *)s->mHal.drv;
- rsdScriptLaunchThreads(rsc, s, slot, ins[ct], outs[ct], NULL, 0, NULL, &mtls);
+ if (drv->mIntrinsicID) {
+ fnPtrs.add((void *)drv->mIntrinsicFuncs.root);
+ usrPtrs.add(drv->mIntrinsicData);
+ sigs.add(0);
+ } else {
+ int slot = kernels[ct]->mSlot;
+ fnPtrs.add((void *)drv->mExecutable->getExportForeachFuncAddrs()[slot]);
+ usrPtrs.add(NULL);
+ sigs.add(drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second);
+ }
+ }
+ sl.sigs = sigs.array();
+ sl.usrPtrs = usrPtrs.array();
+ sl.fnPtrs = fnPtrs.array();
+ sl.inExts = inExts.array();
+ sl.outExts = outExts.array();
+
+ rsdScriptInvokeForEachMtlsSetup(rsc, ins[0], outs[0], NULL, 0, NULL, &mtls);
+ mtls.script = NULL;
+ mtls.kernel = (void (*)())&ScriptGroupRoot;
+ mtls.fep.usr = &sl;
+ rsdScriptLaunchThreads(rsc, true, ins[0], outs[0], NULL, 0, NULL, &mtls);
}
}