Intrinisics
Change-Id: I1ce02ecd853382a2c92823b021750b93f1786ccf
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index a36f728..9225e06 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -18,6 +18,7 @@
#include "rsdBcc.h"
#include "rsdRuntime.h"
#include "rsdAllocation.h"
+#include "rsdIntrinsics.h"
#include <bcc/BCCContext.h>
#include <bcc/Renderscript/RSCompilerDriver.h>
@@ -36,6 +37,7 @@
using namespace android::renderscript;
struct DrvScript {
+ RsScriptIntrisicID mIntrinsicID;
int (*mRoot)();
int (*mRootExpand)();
void (*mInit)();
@@ -167,27 +169,42 @@
}
+bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrisicID iid, Element *e) {
+ pthread_mutex_lock(&rsdgInitMutex);
+
+ DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
+ if (drv == NULL) {
+ goto error;
+ }
+ s->mHal.drv = drv;
+ drv->mIntrinsicID = iid;
+
+ s->mHal.info.exportedVariableCount = 1;
+
+
+
+
+ pthread_mutex_unlock(&rsdgInitMutex);
+ return true;
+
+error:
+ pthread_mutex_unlock(&rsdgInitMutex);
+ return false;
+}
+
typedef struct {
+ RsForEachStubParamStruct fep;
+
Context *rsc;
Script *script;
ForEachFunc_t kernel;
uint32_t sig;
const Allocation * ain;
Allocation * aout;
- const void * usr;
- size_t usrLen;
uint32_t mSliceSize;
volatile int mSliceNum;
- const uint8_t *ptrIn;
- uint32_t eStrideIn;
- uint8_t *ptrOut;
- uint32_t eStrideOut;
-
- uint32_t yStrideIn;
- uint32_t yStrideOut;
-
uint32_t xStart;
uint32_t xEnd;
uint32_t yStart;
@@ -196,20 +213,13 @@
uint32_t zEnd;
uint32_t arrayStart;
uint32_t arrayEnd;
-
- uint32_t dimX;
- uint32_t dimY;
- uint32_t dimZ;
- uint32_t dimArray;
} MTLaunchStruct;
typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
static void wc_xy(void *usr, uint32_t idx) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
RsForEachStubParamStruct p;
- memset(&p, 0, sizeof(p));
- p.usr = mtls->usr;
- p.usr_len = mtls->usrLen;
+ memcpy(&p, &mtls->fep, sizeof(p));
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
uint32_t sig = mtls->sig;
@@ -226,9 +236,9 @@
//ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
//ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
for (p.y = yStart; p.y < yEnd; p.y++) {
- p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
- p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
+ p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y);
+ p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y);
+ fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
}
}
}
@@ -236,9 +246,7 @@
static void wc_x(void *usr, uint32_t idx) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
RsForEachStubParamStruct p;
- memset(&p, 0, sizeof(p));
- p.usr = mtls->usr;
- p.usr_len = mtls->usrLen;
+ memcpy(&p, &mtls->fep, sizeof(p));
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
uint32_t sig = mtls->sig;
@@ -255,9 +263,9 @@
//ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
//ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
- p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
- p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
- fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
+ p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
+ p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
+ fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
}
}
@@ -275,24 +283,29 @@
MTLaunchStruct mtls;
memset(&mtls, 0, sizeof(mtls));
- //ALOGE("for each script %p in %p out %p", s, ain, aout);
+ ALOGE("for each script %p in %p out %p", s, ain, aout);
DrvScript *drv = (DrvScript *)s->mHal.drv;
- rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
- mtls.kernel = reinterpret_cast<ForEachFunc_t>(
- drv->mExecutable->getExportForeachFuncAddrs()[slot]);
- rsAssert(mtls.kernel != NULL);
- mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
+
+ if (drv->mIntrinsicID) {
+ mtls.kernel = (void (*)())&rsdIntrinsic_Convolve3x3_uchar4;
+ } else {
+ rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
+ mtls.kernel = reinterpret_cast<ForEachFunc_t>(
+ drv->mExecutable->getExportForeachFuncAddrs()[slot]);
+ rsAssert(mtls.kernel != NULL);
+ mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
+ }
if (ain) {
- mtls.dimX = ain->getType()->getDimX();
- mtls.dimY = ain->getType()->getDimY();
- mtls.dimZ = ain->getType()->getDimZ();
+ mtls.fep.dimX = ain->getType()->getDimX();
+ mtls.fep.dimY = ain->getType()->getDimY();
+ mtls.fep.dimZ = ain->getType()->getDimZ();
//mtls.dimArray = ain->getType()->getDimArray();
} else if (aout) {
- mtls.dimX = aout->getType()->getDimX();
- mtls.dimY = aout->getType()->getDimY();
- mtls.dimZ = aout->getType()->getDimZ();
+ mtls.fep.dimX = aout->getType()->getDimX();
+ mtls.fep.dimY = aout->getType()->getDimY();
+ mtls.fep.dimZ = aout->getType()->getDimZ();
//mtls.dimArray = aout->getType()->getDimArray();
} else {
rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
@@ -300,24 +313,24 @@
}
if (!sc || (sc->xEnd == 0)) {
- mtls.xEnd = mtls.dimX;
+ mtls.xEnd = mtls.fep.dimX;
} else {
- rsAssert(sc->xStart < mtls.dimX);
- rsAssert(sc->xEnd <= mtls.dimX);
+ rsAssert(sc->xStart < mtls.fep.dimX);
+ rsAssert(sc->xEnd <= mtls.fep.dimX);
rsAssert(sc->xStart < sc->xEnd);
- mtls.xStart = rsMin(mtls.dimX, sc->xStart);
- mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
+ mtls.xStart = rsMin(mtls.fep.dimX, sc->xStart);
+ mtls.xEnd = rsMin(mtls.fep.dimX, sc->xEnd);
if (mtls.xStart >= mtls.xEnd) return;
}
if (!sc || (sc->yEnd == 0)) {
- mtls.yEnd = mtls.dimY;
+ mtls.yEnd = mtls.fep.dimY;
} else {
- rsAssert(sc->yStart < mtls.dimY);
- rsAssert(sc->yEnd <= mtls.dimY);
+ rsAssert(sc->yStart < mtls.fep.dimY);
+ rsAssert(sc->yEnd <= mtls.fep.dimY);
rsAssert(sc->yStart < sc->yEnd);
- mtls.yStart = rsMin(mtls.dimY, sc->yStart);
- mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
+ mtls.yStart = rsMin(mtls.fep.dimY, sc->yStart);
+ mtls.yEnd = rsMin(mtls.fep.dimY, sc->yEnd);
if (mtls.yStart >= mtls.yEnd) return;
}
@@ -335,40 +348,41 @@
mtls.ain = ain;
mtls.aout = aout;
mtls.script = s;
- mtls.usr = usr;
- mtls.usrLen = usrLen;
+ mtls.fep.usr = usr;
+ mtls.fep.usrLen = usrLen;
mtls.mSliceSize = 10;
mtls.mSliceNum = 0;
- mtls.ptrIn = NULL;
- mtls.eStrideIn = 0;
+ mtls.fep.ptrIn = NULL;
+ mtls.fep.eStrideIn = 0;
if (ain) {
DrvAllocation *aindrv = (DrvAllocation *)ain->mHal.drv;
- mtls.ptrIn = (const uint8_t *)aindrv->lod[0].mallocPtr;
- mtls.eStrideIn = ain->getType()->getElementSizeBytes();
- mtls.yStrideIn = aindrv->lod[0].stride;
+ mtls.fep.ptrIn = (const uint8_t *)aindrv->lod[0].mallocPtr;
+ mtls.fep.eStrideIn = ain->getType()->getElementSizeBytes();
+ mtls.fep.yStrideIn = aindrv->lod[0].stride;
}
- mtls.ptrOut = NULL;
- mtls.eStrideOut = 0;
+ mtls.fep.ptrOut = NULL;
+ mtls.fep.eStrideOut = 0;
if (aout) {
DrvAllocation *aoutdrv = (DrvAllocation *)aout->mHal.drv;
- mtls.ptrOut = (uint8_t *)aoutdrv->lod[0].mallocPtr;
- mtls.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls.yStrideOut = aoutdrv->lod[0].stride;
+ mtls.fep.ptrOut = (uint8_t *)aoutdrv->lod[0].mallocPtr;
+ mtls.fep.eStrideOut = aout->getType()->getElementSizeBytes();
+ mtls.fep.yStrideOut = aoutdrv->lod[0].stride;
}
+
if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) {
dc->mInForEach = true;
- if (mtls.dimY > 1) {
- mtls.mSliceSize = mtls.dimY / (dc->mWorkers.mCount * 4);
+ if (mtls.fep.dimY > 1) {
+ mtls.mSliceSize = mtls.fep.dimY / (dc->mWorkers.mCount * 4);
if(mtls.mSliceSize < 1) {
mtls.mSliceSize = 1;
}
rsdLaunchThreads(mrsc, wc_xy, &mtls);
} else {
- mtls.mSliceSize = mtls.dimX / (dc->mWorkers.mCount * 4);
+ mtls.mSliceSize = mtls.fep.dimX / (dc->mWorkers.mCount * 4);
if(mtls.mSliceSize < 1) {
mtls.mSliceSize = 1;
}
@@ -380,9 +394,7 @@
//ALOGE("launch 1");
} else {
RsForEachStubParamStruct p;
- memset(&p, 0, sizeof(p));
- p.usr = mtls.usr;
- p.usr_len = mtls.usrLen;
+ memcpy(&p, &mtls.fep, sizeof(p));
uint32_t sig = mtls.sig;
//ALOGE("launch 3");
@@ -390,13 +402,11 @@
for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
- uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
- mtls.dimX * mtls.dimY * p.z +
- mtls.dimX * p.y;
- p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
- p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
- fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
- mtls.eStrideOut);
+ uint32_t offset = mtls.fep.dimY * mtls.fep.dimZ * p.ar[0] +
+ mtls.fep.dimY * p.z + p.y;
+ p.out = mtls.fep.ptrOut + (mtls.fep.yStrideOut * offset);
+ p.in = mtls.fep.ptrIn + (mtls.fep.yStrideIn * offset);
+ fn(&p, mtls.xStart, mtls.xEnd, mtls.fep.eStrideIn, mtls.fep.eStrideOut);
}
}
}
@@ -451,6 +461,11 @@
//rsAssert(!script->mFieldIsObject[slot]);
//ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
+ if (drv->mIntrinsicID) {
+ rsdIntrinsic_Convolve3x3_SetVar(dc, script, slot, data, dataLength);
+ return;
+ }
+
int32_t *destPtr = reinterpret_cast<int32_t *>(
drv->mExecutable->getExportVarAddrs()[slot]);
if (!destPtr) {