Migrate thread launch to driver.
Change-Id: If182c524cceb327547640f22f956856d291d1787
diff --git a/libs/rs/driver/rsdBcc.cpp b/libs/rs/driver/rsdBcc.cpp
index 36a4b01..2038a4c 100644
--- a/libs/rs/driver/rsdBcc.cpp
+++ b/libs/rs/driver/rsdBcc.cpp
@@ -55,6 +55,15 @@
};
+static Script * setTLS(Script *sc) {
+ ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey);
+ rsAssert(tls);
+ Script *old = tls->mScript;
+ tls->mScript = sc;
+ return old;
+}
+
+
// Input: cacheDir
// Input: resName
// Input: extName
@@ -234,13 +243,215 @@
}
+typedef struct {
+ Context *rsc;
+ Script *script;
+ const Allocation * ain;
+ Allocation * aout;
+ const void * usr;
-int rsdScriptInvokeRoot(const Context *dc, const Script *script) {
- DrvScript *drv = (DrvScript *)script->mHal.drv;
- return drv->mRoot();
+ uint32_t mSliceSize;
+ volatile int mSliceNum;
+
+ const uint8_t *ptrIn;
+ uint32_t eStrideIn;
+ uint8_t *ptrOut;
+ uint32_t eStrideOut;
+
+ uint32_t xStart;
+ uint32_t xEnd;
+ uint32_t yStart;
+ uint32_t yEnd;
+ uint32_t zStart;
+ uint32_t zEnd;
+ uint32_t arrayStart;
+ uint32_t arrayEnd;
+
+ uint32_t dimX;
+ uint32_t dimY;
+ uint32_t dimZ;
+ uint32_t dimArray;
+} MTLaunchStruct;
+typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
+
+static void wc_xy(void *usr, uint32_t idx) {
+ MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+
+ while (1) {
+ uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
+ uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
+ uint32_t yEnd = yStart + mtls->mSliceSize;
+ yEnd = rsMin(yEnd, mtls->yEnd);
+ if (yEnd <= yStart) {
+ return;
+ }
+
+ //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
+ //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
+ for (uint32_t y = yStart; y < yEnd; y++) {
+ uint32_t offset = mtls->dimX * y;
+ uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
+ const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
+
+ for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
+ ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
+ xPtrIn += mtls->eStrideIn;
+ xPtrOut += mtls->eStrideOut;
+ }
+ }
+ }
}
-void rsdScriptInvokeInit(const Context *dc, const Script *script) {
+static void wc_x(void *usr, uint32_t idx) {
+ MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+
+ while (1) {
+ uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
+ uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
+ uint32_t xEnd = xStart + mtls->mSliceSize;
+ xEnd = rsMin(xEnd, mtls->xEnd);
+ if (xEnd <= xStart) {
+ return;
+ }
+
+ //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
+ //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
+ uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
+ const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
+ for (uint32_t x = xStart; x < xEnd; x++) {
+ ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0);
+ xPtrIn += mtls->eStrideIn;
+ xPtrOut += mtls->eStrideOut;
+ }
+ }
+}
+
+void rsdScriptInvokeForEach(const Context *rsc,
+ Script *s,
+ const Allocation * ain,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc) {
+
+ RsHal * dc = (RsHal *)rsc->mHal.drv;
+
+ MTLaunchStruct mtls;
+ memset(&mtls, 0, sizeof(mtls));
+
+ if (ain) {
+ mtls.dimX = ain->getType()->getDimX();
+ mtls.dimY = ain->getType()->getDimY();
+ mtls.dimZ = ain->getType()->getDimZ();
+ //mtls.dimArray = ain->getType()->getDimArray();
+ } else if (aout) {
+ mtls.dimX = aout->getType()->getDimX();
+ mtls.dimY = aout->getType()->getDimY();
+ mtls.dimZ = aout->getType()->getDimZ();
+ //mtls.dimArray = aout->getType()->getDimArray();
+ } else {
+ rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
+ return;
+ }
+
+ if (!sc || (sc->xEnd == 0)) {
+ mtls.xEnd = mtls.dimX;
+ } else {
+ rsAssert(sc->xStart < mtls.dimX);
+ rsAssert(sc->xEnd <= mtls.dimX);
+ rsAssert(sc->xStart < sc->xEnd);
+ mtls.xStart = rsMin(mtls.dimX, sc->xStart);
+ mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
+ if (mtls.xStart >= mtls.xEnd) return;
+ }
+
+ if (!sc || (sc->yEnd == 0)) {
+ mtls.yEnd = mtls.dimY;
+ } else {
+ rsAssert(sc->yStart < mtls.dimY);
+ rsAssert(sc->yEnd <= mtls.dimY);
+ rsAssert(sc->yStart < sc->yEnd);
+ mtls.yStart = rsMin(mtls.dimY, sc->yStart);
+ mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
+ if (mtls.yStart >= mtls.yEnd) return;
+ }
+
+ mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
+ mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
+ mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
+ mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
+
+ rsAssert(ain->getType()->getDimZ() == 0);
+
+ Context *mrsc = (Context *)rsc;
+ Script * oldTLS = setTLS(s);
+
+ mtls.rsc = mrsc;
+ mtls.ain = ain;
+ mtls.aout = aout;
+ mtls.script = s;
+ mtls.usr = usr;
+ mtls.mSliceSize = 10;
+ mtls.mSliceNum = 0;
+
+ mtls.ptrIn = NULL;
+ mtls.eStrideIn = 0;
+ if (ain) {
+ mtls.ptrIn = (const uint8_t *)ain->getPtr();
+ mtls.eStrideIn = ain->getType()->getElementSizeBytes();
+ }
+
+ mtls.ptrOut = NULL;
+ mtls.eStrideOut = 0;
+ if (aout) {
+ mtls.ptrOut = (uint8_t *)aout->getPtr();
+ mtls.eStrideOut = aout->getType()->getElementSizeBytes();
+ }
+
+ if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
+ if (mtls.dimY > 1) {
+ rsdLaunchThreads(mrsc, wc_xy, &mtls);
+ } else {
+ rsdLaunchThreads(mrsc, wc_x, &mtls);
+ }
+
+ //LOGE("launch 1");
+ } else {
+ //LOGE("launch 3");
+ for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
+ for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
+ for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
+ uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
+ mtls.dimX * mtls.dimY * z +
+ mtls.dimX * y;
+ uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
+ const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
+
+ for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
+ ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar);
+ xPtrIn += mtls.eStrideIn;
+ xPtrOut += mtls.eStrideOut;
+ }
+ }
+ }
+ }
+ }
+
+ setTLS(oldTLS);
+}
+
+
+int rsdScriptInvokeRoot(const Context *dc, Script *script) {
+ DrvScript *drv = (DrvScript *)script->mHal.drv;
+
+ Script * oldTLS = setTLS(script);
+ int ret = drv->mRoot();
+ setTLS(oldTLS);
+
+ return ret;
+}
+
+void rsdScriptInvokeInit(const Context *dc, Script *script) {
DrvScript *drv = (DrvScript *)script->mHal.drv;
if (drv->mInit) {
@@ -249,15 +460,17 @@
}
-void rsdScriptInvokeFunction(const Context *dc, const Script *script,
+void rsdScriptInvokeFunction(const Context *dc, Script *script,
uint32_t slot,
const void *params,
size_t paramLength) {
DrvScript *drv = (DrvScript *)script->mHal.drv;
//LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
+ Script * oldTLS = setTLS(script);
((void (*)(const void *, uint32_t))
drv->mInvokeFunctions[slot])(params, paramLength);
+ setTLS(oldTLS);
}
void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
diff --git a/libs/rs/driver/rsdBcc.h b/libs/rs/driver/rsdBcc.h
index f697f29..6723a36 100644
--- a/libs/rs/driver/rsdBcc.h
+++ b/libs/rs/driver/rsdBcc.h
@@ -25,14 +25,23 @@
uint8_t const *bitcode, size_t bitcodeSize,
uint32_t flags, android::renderscript::RsHalSymbolLookupFunc lookupFunc);
void rsdScriptInvokeFunction(const android::renderscript::Context *dc,
- const android::renderscript::Script *script,
+ android::renderscript::Script *script,
uint32_t slot,
const void *params,
size_t paramLength);
+
+void rsdScriptInvokeForEach(const android::renderscript::Context *rsc,
+ android::renderscript::Script *s,
+ const android::renderscript::Allocation * ain,
+ android::renderscript::Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc);
+
int rsdScriptInvokeRoot(const android::renderscript::Context *dc,
- const android::renderscript::Script *script);
+ android::renderscript::Script *script);
void rsdScriptInvokeInit(const android::renderscript::Context *dc,
- const android::renderscript::Script *script);
+ android::renderscript::Script *script);
void rsdScriptSetGlobalVar(const android::renderscript::Context *,
const android::renderscript::Script *,
diff --git a/libs/rs/driver/rsdCore.cpp b/libs/rs/driver/rsdCore.cpp
index 79fcab5..bb6cce9 100644
--- a/libs/rs/driver/rsdCore.cpp
+++ b/libs/rs/driver/rsdCore.cpp
@@ -20,16 +20,29 @@
#include <malloc.h>
#include "rsContext.h"
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sched.h>
+#include <cutils/properties.h>
+#include <cutils/sched_policy.h>
+#include <sys/syscall.h>
+#include <string.h>
+
using namespace android;
using namespace android::renderscript;
+static void Shutdown(Context *rsc);
+static void SetPriority(const Context *rsc, int32_t priority);
+
static RsdHalFunctions FunctionTable = {
+ Shutdown,
NULL,
- NULL,
+ SetPriority,
{
rsdScriptInit,
rsdScriptInvokeFunction,
rsdScriptInvokeRoot,
+ rsdScriptInvokeForEach,
rsdScriptInvokeInit,
rsdScriptSetGlobalVar,
rsdScriptSetGlobalBind,
@@ -39,16 +52,134 @@
};
+
+static void * HelperThreadProc(void *vrsc) {
+ Context *rsc = static_cast<Context *>(vrsc);
+ RsHal *dc = (RsHal *)rsc->mHal.drv;
+
+
+ uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount);
+
+ //LOGV("RS helperThread starting %p idx=%i", rsc, idx);
+
+ dc->mWorkers.mLaunchSignals[idx].init();
+ dc->mWorkers.mNativeThreadId[idx] = gettid();
+
+#if 0
+ typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
+ cpu_set_t cpuset;
+ memset(&cpuset, 0, sizeof(cpuset));
+ cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
+ int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
+ sizeof(cpuset), &cpuset);
+ LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
+#endif
+
+ int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct);
+ if (status) {
+ LOGE("pthread_setspecific %i", status);
+ }
+
+ while (!dc->mExit) {
+ dc->mWorkers.mLaunchSignals[idx].wait();
+ if (dc->mWorkers.mLaunchCallback) {
+ dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx);
+ }
+ android_atomic_dec(&dc->mWorkers.mRunningCount);
+ dc->mWorkers.mCompleteSignal.set();
+ }
+
+ //LOGV("RS helperThread exited %p idx=%i", rsc, idx);
+ return NULL;
+}
+
+void rsdLaunchThreads(Context *rsc, WorkerCallback_t cbk, void *data) {
+ RsHal *dc = (RsHal *)rsc->mHal.drv;
+
+ dc->mWorkers.mLaunchData = data;
+ dc->mWorkers.mLaunchCallback = cbk;
+ android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
+ for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
+ dc->mWorkers.mLaunchSignals[ct].set();
+ }
+ while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) {
+ dc->mWorkers.mCompleteSignal.wait();
+ }
+}
+
bool rsdHalInit(Context *rsc, uint32_t version_major, uint32_t version_minor) {
rsc->mHal.funcs = FunctionTable;
- /*
- rsc->mHal.drv = (RsHal *)calloc(1, sizeof(RsHal));
+ RsHal *dc = (RsHal *)calloc(1, sizeof(RsHal));
if (!rsc->mHal.drv) {
return false;
}
- */
+ rsc->mHal.drv = dc;
+
+ int cpu = sysconf(_SC_NPROCESSORS_ONLN);
+ LOGV("RS Launching thread(s), reported CPU count %i", cpu);
+ if (cpu < 2) cpu = 0;
+
+ dc->mWorkers.mCount = (uint32_t)cpu;
+ dc->mWorkers.mThreadId = (pthread_t *) calloc(dc->mWorkers.mCount, sizeof(pthread_t));
+ dc->mWorkers.mNativeThreadId = (pid_t *) calloc(dc->mWorkers.mCount, sizeof(pid_t));
+ dc->mWorkers.mLaunchSignals = new Signal[dc->mWorkers.mCount];
+ dc->mWorkers.mLaunchCallback = NULL;
+
+ dc->mWorkers.mCompleteSignal.init();
+
+ android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
+ android_atomic_release_store(0, &dc->mWorkers.mLaunchCount);
+
+ int status;
+ pthread_attr_t threadAttr;
+ status = pthread_attr_init(&threadAttr);
+ if (status) {
+ LOGE("Failed to init thread attribute.");
+ return false;
+ }
+
+ for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) {
+ status = pthread_create(&dc->mWorkers.mThreadId[ct], &threadAttr, HelperThreadProc, rsc);
+ if (status) {
+ dc->mWorkers.mCount = ct;
+ LOGE("Created fewer than expected number of RS threads.");
+ break;
+ }
+ }
+ while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) {
+ usleep(100);
+ }
+
+ pthread_attr_destroy(&threadAttr);
return true;
}
+
+void SetPriority(const Context *rsc, int32_t priority) {
+ RsHal *dc = (RsHal *)rsc->mHal.drv;
+ for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) {
+ setpriority(PRIO_PROCESS, dc->mWorkers.mNativeThreadId[ct], priority);
+ }
+}
+
+void Shutdown(Context *rsc) {
+ RsHal *dc = (RsHal *)rsc->mHal.drv;
+
+ dc->mExit = true;
+ dc->mWorkers.mLaunchData = NULL;
+ dc->mWorkers.mLaunchCallback = NULL;
+ android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount);
+ for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
+ dc->mWorkers.mLaunchSignals[ct].set();
+ }
+ int status;
+ void *res;
+ for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) {
+ status = pthread_join(dc->mWorkers.mThreadId[ct], &res);
+ }
+ rsAssert(android_atomic_acquire_load(&dc->mWorkers.mRunningCount) == 0);
+}
+
+
diff --git a/libs/rs/driver/rsdCore.h b/libs/rs/driver/rsdCore.h
index 78596a1..02b2fbc 100644
--- a/libs/rs/driver/rsdCore.h
+++ b/libs/rs/driver/rsdCore.h
@@ -20,28 +20,36 @@
#include <rs_hal.h>
#include <bcc/bcc.h>
-typedef void (* InvokeFunc_t)(void);
+#include "rsMutex.h"
+#include "rsSignal.h"
-struct RsHalRec {
+
+typedef void (* InvokeFunc_t)(void);
+typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
+
+typedef struct RsHalRec {
uint32_t version_major;
uint32_t version_minor;
-};
-struct RsHalProgramStoreRec {
-};
+ struct Workers {
+ volatile int mRunningCount;
+ volatile int mLaunchCount;
+ uint32_t mCount;
+ pthread_t *mThreadId;
+ pid_t *mNativeThreadId;
+ android::renderscript::Signal mCompleteSignal;
-struct RsHalProgramRasterRec {
-};
-
-struct RsHalProgramVertexRec {
-};
-
-struct RsHalProgramFragmentRec {
-
-};
+ android::renderscript::Signal *mLaunchSignals;
+ WorkerCallback_t mLaunchCallback;
+ void *mLaunchData;
+ };
+ Workers mWorkers;
+ bool mExit;
+} RsHal;
+void rsdLaunchThreads(android::renderscript::Context *rsc, WorkerCallback_t cbk, void *data);
#endif
diff --git a/libs/rs/rsContext.cpp b/libs/rs/rsContext.cpp
index 7dc26d2..339a773 100644
--- a/libs/rs/rsContext.cpp
+++ b/libs/rs/rsContext.cpp
@@ -554,56 +554,6 @@
mExit = true;
}
-void * Context::helperThreadProc(void *vrsc) {
- Context *rsc = static_cast<Context *>(vrsc);
- uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount);
-
- //LOGV("RS helperThread starting %p idx=%i", rsc, idx);
-
- rsc->mWorkers.mLaunchSignals[idx].init();
- rsc->mWorkers.mNativeThreadId[idx] = gettid();
-
-#if 0
- typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
- cpu_set_t cpuset;
- memset(&cpuset, 0, sizeof(cpuset));
- cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
- int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
- sizeof(cpuset), &cpuset);
- LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
-#endif
-
- setpriority(PRIO_PROCESS, rsc->mWorkers.mNativeThreadId[idx], rsc->mThreadPriority);
- int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct);
- if (status) {
- LOGE("pthread_setspecific %i", status);
- }
-
- while (!rsc->mExit) {
- rsc->mWorkers.mLaunchSignals[idx].wait();
- if (rsc->mWorkers.mLaunchCallback) {
- rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx);
- }
- android_atomic_dec(&rsc->mWorkers.mRunningCount);
- rsc->mWorkers.mCompleteSignal.set();
- }
-
- //LOGV("RS helperThread exited %p idx=%i", rsc, idx);
- return NULL;
-}
-
-void Context::launchThreads(WorkerCallback_t cbk, void *data) {
- mWorkers.mLaunchData = data;
- mWorkers.mLaunchCallback = cbk;
- android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
- for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
- mWorkers.mLaunchSignals[ct].set();
- }
- while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
- mWorkers.mCompleteSignal.wait();
- }
-}
-
void Context::setPriority(int32_t p) {
// Note: If we put this in the proper "background" policy
// the wallpapers can become completly unresponsive at times.
@@ -620,9 +570,6 @@
}
#else
setpriority(PRIO_PROCESS, mNativeThreadId, p);
- for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
- setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], p);
- }
#endif
}
@@ -691,16 +638,8 @@
if (!rsdHalInit(this, 0, 0)) {
return false;
}
+ mHal.funcs.setPriority(this, mThreadPriority);
- int cpu = sysconf(_SC_NPROCESSORS_ONLN);
- LOGV("RS Launching thread(s), reported CPU count %i", cpu);
- if (cpu < 2) cpu = 0;
-
- mWorkers.mCount = (uint32_t)cpu;
- mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
- mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
- mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
- mWorkers.mLaunchCallback = NULL;
status = pthread_create(&mThreadId, &threadAttr, threadProc, this);
if (status) {
LOGE("Failed to start rs context thread.");
@@ -714,20 +653,6 @@
return false;
}
- mWorkers.mCompleteSignal.init();
- android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
- android_atomic_release_store(0, &mWorkers.mLaunchCount);
- for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
- status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
- if (status) {
- mWorkers.mCount = ct;
- LOGE("Created fewer than expected number of RS threads.");
- break;
- }
- }
- while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
- usleep(100);
- }
pthread_attr_destroy(&threadAttr);
return true;
}
@@ -744,17 +669,10 @@
mIO.shutdown();
int status = pthread_join(mThreadId, &res);
- // Cleanup compute threads.
- mWorkers.mLaunchData = NULL;
- mWorkers.mLaunchCallback = NULL;
- android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
- for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
- mWorkers.mLaunchSignals[ct].set();
+
+ if (mHal.funcs.shutdownDriver) {
+ mHal.funcs.shutdownDriver(this);
}
- for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
- status = pthread_join(mWorkers.mThreadId[ct], &res);
- }
- rsAssert(android_atomic_acquire_load(&mWorkers.mRunningCount) == 0);
// Global structure cleanup.
pthread_mutex_lock(&gInitMutex);
diff --git a/libs/rs/rsContext.h b/libs/rs/rsContext.h
index dee16d6..72574a60 100644
--- a/libs/rs/rsContext.h
+++ b/libs/rs/rsContext.h
@@ -43,7 +43,6 @@
#include "rsgApiStructs.h"
#include "rsLocklessFifo.h"
-
#include <ui/egl/android_natives.h>
#endif // ANDROID_RS_SERIALIZE
@@ -91,15 +90,6 @@
// Library mutex (for providing thread-safe calls from the runtime)
static pthread_mutex_t gLibMutex;
- struct ScriptTLSStruct {
- Context * mContext;
- Script * mScript;
- };
-
- //const RsHalComputeFunctions *mHalComputeFuncs;
- //const RsHalGraphicsFunctions *mHalGraphicsFuncs;
- //RsHal *mHal;
-
class PushState {
public:
PushState(Context *);
@@ -117,9 +107,6 @@
ScriptTLSStruct *mTlsStruct;
RsSurfaceConfig mUserSurfaceConfig;
- typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
-
- //StructuredAllocationContext mStateAllocation;
ElementState mStateElement;
TypeState mStateType;
SamplerState mStateSampler;
@@ -230,8 +217,6 @@
uint32_t getMaxVertexUniformVectors() const {return mGL.mMaxVertexUniformVectors;}
uint32_t getMaxVertexAttributes() const {return mGL.mMaxVertexAttribs;}
- void launchThreads(WorkerCallback_t cbk, void *data);
- uint32_t getWorkerPoolSize() const {return (uint32_t)mWorkers.mCount;}
uint32_t getDPI() const {return mDPI;}
void setDPI(uint32_t dpi) {mDPI = dpi;}
@@ -288,20 +273,6 @@
pthread_t mThreadId;
pid_t mNativeThreadId;
- struct Workers {
- volatile int mRunningCount;
- volatile int mLaunchCount;
- uint32_t mCount;
- pthread_t *mThreadId;
- pid_t *mNativeThreadId;
- Signal mCompleteSignal;
-
- Signal *mLaunchSignals;
- WorkerCallback_t mLaunchCallback;
- void *mLaunchData;
- };
- Workers mWorkers;
-
ObjectBaseRef<Script> mRootScript;
ObjectBaseRef<ProgramFragment> mFragment;
ObjectBaseRef<ProgramVertex> mVertex;
diff --git a/libs/rs/rsScriptC.cpp b/libs/rs/rsScriptC.cpp
index f99534f..d5c486b 100644
--- a/libs/rs/rsScriptC.cpp
+++ b/libs/rs/rsScriptC.cpp
@@ -38,9 +38,6 @@
ScriptC::~ScriptC() {
mRSC->mHal.funcs.script.destroy(mRSC, this);
-
- //free(mEnviroment.mScriptText);
- //mEnviroment.mScriptText = NULL;
}
void ScriptC::setupScript(Context *rsc) {
@@ -79,15 +76,6 @@
return NULL;
}
-Script * ScriptC::setTLS(Script *sc) {
- Context::ScriptTLSStruct * tls = (Context::ScriptTLSStruct *)
- pthread_getspecific(Context::gThreadTLSKey);
- rsAssert(tls);
- Script *old = tls->mScript;
- tls->mScript = sc;
- return old;
-}
-
void ScriptC::setupGLState(Context *rsc) {
if (mEnviroment.mFragmentStore.get()) {
rsc->setProgramStore(mEnviroment.mFragmentStore.get());
@@ -113,215 +101,32 @@
setupScript(rsc);
uint32_t ret = 0;
- Script * oldTLS = setTLS(this);
if (rsc->props.mLogScripts) {
LOGV("%p ScriptC::run invoking root, ptr %p", rsc, mHal.info.root);
}
- ret = mHal.info.root();
+ ret = rsc->mHal.funcs.script.invokeRoot(rsc, this);
if (rsc->props.mLogScripts) {
LOGV("%p ScriptC::run invoking complete, ret=%i", rsc, ret);
}
- setTLS(oldTLS);
return ret;
}
-typedef struct {
- Context *rsc;
- ScriptC *script;
- const Allocation * ain;
- Allocation * aout;
- const void * usr;
-
- uint32_t mSliceSize;
- volatile int mSliceNum;
-
- const uint8_t *ptrIn;
- uint32_t eStrideIn;
- uint8_t *ptrOut;
- uint32_t eStrideOut;
-
- uint32_t xStart;
- uint32_t xEnd;
- uint32_t yStart;
- uint32_t yEnd;
- uint32_t zStart;
- uint32_t zEnd;
- uint32_t arrayStart;
- uint32_t arrayEnd;
-
- uint32_t dimX;
- uint32_t dimY;
- uint32_t dimZ;
- uint32_t dimArray;
-} MTLaunchStruct;
-typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
-
-static void wc_xy(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
-
- while (1) {
- uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
- uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
- uint32_t yEnd = yStart + mtls->mSliceSize;
- yEnd = rsMin(yEnd, mtls->yEnd);
- if (yEnd <= yStart) {
- return;
- }
-
- //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
- //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
- for (uint32_t y = yStart; y < yEnd; y++) {
- uint32_t offset = mtls->dimX * y;
- uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
- const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
-
- for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
- ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
- xPtrIn += mtls->eStrideIn;
- xPtrOut += mtls->eStrideOut;
- }
- }
- }
-}
-
-static void wc_x(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
-
- while (1) {
- uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
- uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
- uint32_t xEnd = xStart + mtls->mSliceSize;
- xEnd = rsMin(xEnd, mtls->xEnd);
- if (xEnd <= xStart) {
- return;
- }
-
- //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
- //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
- uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
- const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
- for (uint32_t x = xStart; x < xEnd; x++) {
- ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0);
- xPtrIn += mtls->eStrideIn;
- xPtrOut += mtls->eStrideOut;
- }
- }
-}
void ScriptC::runForEach(Context *rsc,
const Allocation * ain,
Allocation * aout,
const void * usr,
const RsScriptCall *sc) {
- MTLaunchStruct mtls;
- memset(&mtls, 0, sizeof(mtls));
+
Context::PushState ps(rsc);
-
- if (ain) {
- mtls.dimX = ain->getType()->getDimX();
- mtls.dimY = ain->getType()->getDimY();
- mtls.dimZ = ain->getType()->getDimZ();
- //mtls.dimArray = ain->getType()->getDimArray();
- } else if (aout) {
- mtls.dimX = aout->getType()->getDimX();
- mtls.dimY = aout->getType()->getDimY();
- mtls.dimZ = aout->getType()->getDimZ();
- //mtls.dimArray = aout->getType()->getDimArray();
- } else {
- rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
- return;
- }
-
- if (!sc || (sc->xEnd == 0)) {
- mtls.xEnd = mtls.dimX;
- } else {
- rsAssert(sc->xStart < mtls.dimX);
- rsAssert(sc->xEnd <= mtls.dimX);
- rsAssert(sc->xStart < sc->xEnd);
- mtls.xStart = rsMin(mtls.dimX, sc->xStart);
- mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
- if (mtls.xStart >= mtls.xEnd) return;
- }
-
- if (!sc || (sc->yEnd == 0)) {
- mtls.yEnd = mtls.dimY;
- } else {
- rsAssert(sc->yStart < mtls.dimY);
- rsAssert(sc->yEnd <= mtls.dimY);
- rsAssert(sc->yStart < sc->yEnd);
- mtls.yStart = rsMin(mtls.dimY, sc->yStart);
- mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
- if (mtls.yStart >= mtls.yEnd) return;
- }
-
- mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
- mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
- mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
- mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
-
- rsAssert(ain->getType()->getDimZ() == 0);
-
setupGLState(rsc);
setupScript(rsc);
- Script * oldTLS = setTLS(this);
-
- mtls.rsc = rsc;
- mtls.ain = ain;
- mtls.aout = aout;
- mtls.script = this;
- mtls.usr = usr;
- mtls.mSliceSize = 10;
- mtls.mSliceNum = 0;
-
- mtls.ptrIn = NULL;
- mtls.eStrideIn = 0;
- if (ain) {
- mtls.ptrIn = (const uint8_t *)ain->getPtr();
- mtls.eStrideIn = ain->getType()->getElementSizeBytes();
- }
-
- mtls.ptrOut = NULL;
- mtls.eStrideOut = 0;
- if (aout) {
- mtls.ptrOut = (uint8_t *)aout->getPtr();
- mtls.eStrideOut = aout->getType()->getElementSizeBytes();
- }
-
- if ((rsc->getWorkerPoolSize() > 1) && mHal.info.isThreadable) {
- if (mtls.dimY > 1) {
- rsc->launchThreads(wc_xy, &mtls);
- } else {
- rsc->launchThreads(wc_x, &mtls);
- }
-
- //LOGE("launch 1");
- } else {
- //LOGE("launch 3");
- for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
- for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
- for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
- uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
- mtls.dimX * mtls.dimY * z +
- mtls.dimX * y;
- uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
- const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
-
- for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
- ((rs_t)mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar);
- xPtrIn += mtls.eStrideIn;
- xPtrOut += mtls.eStrideOut;
- }
- }
- }
- }
- }
-
- setTLS(oldTLS);
+ rsc->mHal.funcs.script.invokeForEach(rsc, this, ain, aout, usr, 0, sc);
}
void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len) {
@@ -330,14 +135,11 @@
return;
}
setupScript(rsc);
- Script * oldTLS = setTLS(this);
if (rsc->props.mLogScripts) {
LOGV("%p ScriptC::Invoke invoking slot %i, ptr %p", rsc, slot, this);
}
rsc->mHal.funcs.script.invokeFunction(rsc, this, slot, data, len);
-
- setTLS(oldTLS);
}
ScriptCState::ScriptCState() {
diff --git a/libs/rs/rsScriptC.h b/libs/rs/rsScriptC.h
index da5cb2b..2edeb9b 100644
--- a/libs/rs/rsScriptC.h
+++ b/libs/rs/rsScriptC.h
@@ -56,7 +56,7 @@
bool runCompiler(Context *rsc, const char *resName, const char *cacheDir,
const uint8_t *bitcode, size_t bitcodeLen);
-protected:
+//protected:
void setupScript(Context *);
void setupGLState(Context *);
Script * setTLS(Script *);
diff --git a/libs/rs/rsScriptC_Lib.cpp b/libs/rs/rsScriptC_Lib.cpp
index 8095f5a..4e8cbdc 100644
--- a/libs/rs/rsScriptC_Lib.cpp
+++ b/libs/rs/rsScriptC_Lib.cpp
@@ -25,8 +25,8 @@
using namespace android;
using namespace android::renderscript;
-#define GET_TLS() Context::ScriptTLSStruct * tls = \
- (Context::ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \
+#define GET_TLS() ScriptTLSStruct * tls = \
+ (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \
Context * rsc = tls->mContext; \
ScriptC * sc = (ScriptC *) tls->mScript
@@ -1006,6 +1006,3 @@
return NULL;
}
-
-
-
diff --git a/libs/rs/rsScriptC_LibGL.cpp b/libs/rs/rsScriptC_LibGL.cpp
index 15426bc..4047049 100644
--- a/libs/rs/rsScriptC_LibGL.cpp
+++ b/libs/rs/rsScriptC_LibGL.cpp
@@ -32,8 +32,8 @@
using namespace android;
using namespace android::renderscript;
-#define GET_TLS() Context::ScriptTLSStruct * tls = \
- (Context::ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \
+#define GET_TLS() ScriptTLSStruct * tls = \
+ (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \
Context * rsc = tls->mContext; \
ScriptC * sc = (ScriptC *) tls->mScript
diff --git a/libs/rs/rs_hal.h b/libs/rs/rs_hal.h
index 48e3f36..17983ce 100644
--- a/libs/rs/rs_hal.h
+++ b/libs/rs/rs_hal.h
@@ -31,18 +31,21 @@
class ScriptC;
-typedef struct RsHalRec RsHal;
-
typedef void *(*RsHalSymbolLookupFunc)(void *usrptr, char const *symbolName);
+typedef struct ScriptTLSStructRec {
+ Context * mContext;
+ Script * mScript;
+} ScriptTLSStruct;
/**
* Script management functions
*/
typedef struct {
- void (*shutdownDriver)(RsHal dc);
+ void (*shutdownDriver)(Context *);
void (*getVersion)(unsigned int *major, unsigned int *minor);
+ void (*setPriority)(const Context *, int32_t priority);
@@ -55,12 +58,19 @@
uint32_t flags,
RsHalSymbolLookupFunc lookupFunc);
- void (*invokeFunction)(const Context *rsc, const Script *s,
+ void (*invokeFunction)(const Context *rsc, Script *s,
uint32_t slot,
const void *params,
size_t paramLength);
- int (*invokeRoot)(const Context *rsc, const Script *s);
- void (*invokeInit)(const Context *rsc, const Script *s);
+ int (*invokeRoot)(const Context *rsc, Script *s);
+ void (*invokeForEach)(const Context *rsc,
+ Script *s,
+ const Allocation * ain,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc);
+ void (*invokeInit)(const Context *rsc, Script *s);
void (*setGlobalVar)(const Context *rsc, const Script *s,
uint32_t slot,