Support for general reduction kernels.
Requires coordinated change in frameworks/base.
Requires coordinated change in frameworks/compile/libbcc in order
for RsTest to run.
At present, general reduction kernels are run single-threaded.
Also: Remove dead struct field MTLaunchStructForEach::sig.
Bug: 23535724
Change-Id: Ice17ccf20a902f8a106eaa62ec071d46e3c0ad8c
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index 5adca54..7308b54 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -497,6 +497,7 @@
// Copy info over to runtime
script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
script->mHal.info.exportedReduceCount = mScriptExec->getExportedReduceCount();
+ script->mHal.info.exportedReduceNewCount = mScriptExec->getExportedReduceNewCount();
script->mHal.info.exportedForEachCount = mScriptExec->getExportedForEachCount();
script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
@@ -553,7 +554,7 @@
return true;
}
-// Preliminary work to prepare a reduce-style kernel for launch.
+// Preliminary work to prepare a simple reduce-style kernel for launch.
bool RsdCpuScriptImpl::reduceMtlsSetup(const Allocation *ain,
const Allocation *aout,
const RsScriptCall *sc,
@@ -591,6 +592,77 @@
return true;
}
+// Preliminary work to prepare a general reduce-style kernel for launch.
+bool RsdCpuScriptImpl::reduceNewMtlsSetup(const Allocation ** ains,
+ uint32_t inLen,
+ const Allocation * aout,
+ const RsScriptCall *sc,
+ MTLaunchStructReduceNew *mtls) {
+ rsAssert(ains && (inLen >= 1) && aout);
+ memset(mtls, 0, sizeof(MTLaunchStructReduceNew));
+ mtls->dimPtr = &mtls->redp.dim;
+
+ for (int index = inLen; --index >= 0;) {
+ if (allocationLODIsNull(ains[index])) {
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "reduce called with null in allocations");
+ return false;
+ }
+ }
+
+ if (allocationLODIsNull(aout)) {
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "reduce called with null out allocation");
+ return false;
+ }
+
+ const Allocation *ain0 = ains[0];
+ const Type *inType = ain0->getType();
+
+ mtls->redp.dim.x = inType->getDimX();
+ mtls->redp.dim.y = inType->getDimY();
+ mtls->redp.dim.z = inType->getDimZ();
+
+ for (int Index = inLen; --Index >= 1;) {
+ if (!ain0->hasSameDims(ains[Index])) {
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "Failed to launch reduction kernel;"
+ "dimensions of input allocations do not match.");
+ return false;
+ }
+ }
+
+ if (!setUpMtlsDimensions(mtls, mtls->redp.dim, sc)) {
+ return false;
+ }
+
+ // The X & Y walkers always want 0-1 min even if dim is not present
+ mtls->end.x = rsMax((uint32_t)1, mtls->end.x);
+ mtls->end.y = rsMax((uint32_t)1, mtls->end.y);
+
+ mtls->rs = mCtx;
+
+ // Currently not threaded.
+ mtls->isThreadable = false;
+ mtls->mSliceNum = -1;
+
+ // Set up output,
+ mtls->redp.outLen = 1;
+ mtls->redp.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
+ mtls->redp.outStride[0] = aout->getType()->getElementSizeBytes();
+
+ // Set up input.
+ memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
+ mtls->redp.inLen = inLen;
+ for (int index = inLen; --index >= 0;) {
+ mtls->redp.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
+ mtls->redp.inStride[index] = ains[index]->getType()->getElementSizeBytes();
+ }
+
+ // All validation passed, ok to launch threads
+ return true;
+}
+
// Preliminary work to prepare a forEach-style kernel for launch.
bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
uint32_t inLen,
@@ -626,13 +698,11 @@
for (int Index = inLen; --Index >= 1;) {
if (!ain0->hasSameDims(ains[Index])) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; dimensions of input and output"
+ "Failed to launch kernel; dimensions of input"
"allocations do not match.");
-
return false;
}
}
-
} else if (aout != nullptr) {
const Type *outType = aout->getType();
@@ -729,12 +799,25 @@
}
}
+void RsdCpuScriptImpl::invokeReduceNew(uint32_t slot,
+ const Allocation ** ains, uint32_t inLen,
+ Allocation *aout,
+ const RsScriptCall *sc) {
+ MTLaunchStructReduceNew mtls;
+
+ if (reduceNewMtlsSetup(ains, inLen, aout, sc, &mtls)) {
+ reduceNewKernelSetup(slot, &mtls);
+ RsdCpuScriptImpl *oldTLS = mCtx->setTLS(this);
+ mCtx->launchReduceNew(ains, inLen, aout, &mtls);
+ mCtx->setTLS(oldTLS);
+ }
+}
+
void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *mtls) {
mtls->script = this;
mtls->fep.slot = slot;
mtls->kernel = mScriptExec->getForEachFunction(slot);
rsAssert(mtls->kernel != nullptr);
- mtls->sig = mScriptExec->getForEachSignature(slot);
}
void RsdCpuScriptImpl::reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls) {
@@ -743,6 +826,19 @@
rsAssert(mtls->kernel != nullptr);
}
+void RsdCpuScriptImpl::reduceNewKernelSetup(uint32_t slot, MTLaunchStructReduceNew *mtls) {
+ mtls->script = this;
+ mtls->redp.slot = slot;
+
+ const ReduceNewDescription *desc = mScriptExec->getReduceNewDescription(slot);
+ mtls->accumFunc = desc->accumFunc;
+ mtls->initFunc = desc->initFunc; // might legally be nullptr
+ mtls->outFunc = desc->outFunc; // might legally be nullptr
+ mtls->accumSize = desc->accumSize;
+
+ rsAssert(mtls->accumFunc != nullptr);
+}
+
int RsdCpuScriptImpl::invokeRoot() {
RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
int ret = mRoot();