blob: 6bc98b419af13788478103d34c77b32739fde284 [file] [log] [blame]
Yang Ni1ffd86b2015-01-07 09:16:40 -08001#include "rsCpuScriptGroup2.h"
2
Yang Nida0f0692015-01-12 13:03:40 -08003#include <dlfcn.h>
Yabin Cui433558f2015-02-23 18:25:55 -08004#include <stdio.h>
5#include <stdlib.h>
6#include <unistd.h>
Yang Nida0f0692015-01-12 13:03:40 -08007
Yang Ni062c2872015-02-20 15:20:00 -08008#include <set>
9#include <sstream>
Yang Nida0f0692015-01-12 13:03:40 -080010#include <string>
11#include <vector>
12
13#ifndef RS_COMPATIBILITY_LIB
14#include "bcc/Config/Config.h"
15#include <sys/wait.h>
16#endif
17
Yang Ni1ffd86b2015-01-07 09:16:40 -080018#include "cpu_ref/rsCpuCore.h"
19#include "rsClosure.h"
20#include "rsContext.h"
21#include "rsCpuCore.h"
Yang Ni2abfcc62015-02-17 16:05:19 -080022#include "rsCpuExecutable.h"
Yang Ni1ffd86b2015-01-07 09:16:40 -080023#include "rsCpuScript.h"
24#include "rsScript.h"
25#include "rsScriptGroup2.h"
Yang Nida0f0692015-01-12 13:03:40 -080026#include "rsScriptIntrinsic.h"
27
28using std::string;
29using std::vector;
Yang Ni1ffd86b2015-01-07 09:16:40 -080030
31namespace android {
32namespace renderscript {
33
34namespace {
35
Yang Nida0f0692015-01-12 13:03:40 -080036const size_t DefaultKernelArgCount = 2;
Yang Ni1ffd86b2015-01-07 09:16:40 -080037
38void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
39 uint32_t xend, uint32_t outstep) {
Yang Niff2bb542015-02-02 14:33:47 -080040 const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr;
Yang Nieb9aa672015-01-27 14:32:25 -080041 RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
42 const void **oldIns = kparams->ins;
43 uint32_t *oldStrides = kparams->inEStrides;
Yang Ni1ffd86b2015-01-07 09:16:40 -080044
Yang Nieb9aa672015-01-27 14:32:25 -080045 std::vector<const void*> ins(DefaultKernelArgCount);
46 std::vector<uint32_t> strides(DefaultKernelArgCount);
Yang Ni1ffd86b2015-01-07 09:16:40 -080047
Yang Nieb9aa672015-01-27 14:32:25 -080048 for (CPUClosure* cpuClosure : closures) {
49 const Closure* closure = cpuClosure->mClosure;
Yang Ni1ffd86b2015-01-07 09:16:40 -080050
Yang Nieb9aa672015-01-27 14:32:25 -080051 auto in_iter = ins.begin();
52 auto stride_iter = strides.begin();
Yang Ni1ffd86b2015-01-07 09:16:40 -080053
Yang Niff2bb542015-02-02 14:33:47 -080054 for (size_t i = 0; i < closure->mNumArg; i++) {
55 const void* arg = closure->mArgs[i];
Yang Nieb9aa672015-01-27 14:32:25 -080056 const Allocation* a = (const Allocation*)arg;
57 const uint32_t eStride = a->mHal.state.elementSizeBytes;
58 const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
59 eStride * xstart;
60 if (kparams->dimY > 1) {
61 ptr += a->mHal.drvState.lod[0].stride * kparams->y;
62 }
63 *in_iter++ = ptr;
64 *stride_iter++ = eStride;
65 }
66
67 mutable_kparams->ins = &ins[0];
68 mutable_kparams->inEStrides = &strides[0];
69
70 const Allocation* out = closure->mReturnValue;
71 const uint32_t ostep = out->mHal.state.elementSizeBytes;
72 const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
73 ostep * xstart;
74 if (kparams->dimY > 1) {
75 ptr += out->mHal.drvState.lod[0].stride * kparams->y;
76 }
77
78 mutable_kparams->out = (void*)ptr;
79
Yang Nieb9aa672015-01-27 14:32:25 -080080 cpuClosure->mFunc(kparams, xstart, xend, ostep);
Yang Ni1ffd86b2015-01-07 09:16:40 -080081 }
82
Yang Nieb9aa672015-01-27 14:32:25 -080083 mutable_kparams->ins = oldIns;
84 mutable_kparams->inEStrides = oldStrides;
Yang Ni1ffd86b2015-01-07 09:16:40 -080085}
86
Yang Nida0f0692015-01-12 13:03:40 -080087} // namespace
88
Yang Ni062c2872015-02-20 15:20:00 -080089Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
90 mGroup(group), mFunc(nullptr) {
91 mName = strndup(name, strlen(name));
92}
93
Yang Nida0f0692015-01-12 13:03:40 -080094Batch::~Batch() {
Yang Nieb9aa672015-01-27 14:32:25 -080095 for (CPUClosure* c : mClosures) {
96 delete c;
97 }
Yang Ni062c2872015-02-20 15:20:00 -080098 free(mName);
Yang Nida0f0692015-01-12 13:03:40 -080099}
100
Yang Niff2bb542015-02-02 14:33:47 -0800101bool Batch::conflict(CPUClosure* cpuClosure) const {
Yang Nieb9aa672015-01-27 14:32:25 -0800102 if (mClosures.empty()) {
103 return false;
104 }
Yang Nida0f0692015-01-12 13:03:40 -0800105
Yang Niff2bb542015-02-02 14:33:47 -0800106 const Closure* closure = cpuClosure->mClosure;
107
Yang Ni062c2872015-02-20 15:20:00 -0800108 if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
Yang Nieb9aa672015-01-27 14:32:25 -0800109 // An invoke should be in a batch by itself, so it conflicts with any other
110 // closure.
Yang Ni1ffd86b2015-01-07 09:16:40 -0800111 return true;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800112 }
Yang Nieb9aa672015-01-27 14:32:25 -0800113
Yang Niff2bb542015-02-02 14:33:47 -0800114 const auto& globalDeps = closure->mGlobalDeps;
115 const auto& argDeps = closure->mArgDeps;
116
117 for (CPUClosure* c : mClosures) {
118 const Closure* batched = c->mClosure;
119 if (globalDeps.find(batched) != globalDeps.end()) {
120 return true;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800121 }
Yang Niff2bb542015-02-02 14:33:47 -0800122 const auto& it = argDeps.find(batched);
123 if (it != argDeps.end()) {
124 const auto& args = (*it).second;
125 for (const auto &p1 : *args) {
126 if (p1.second->get() != nullptr) {
127 return true;
Yang Nieb9aa672015-01-27 14:32:25 -0800128 }
129 }
130 }
131 }
Yang Niff2bb542015-02-02 14:33:47 -0800132
Yang Nieb9aa672015-01-27 14:32:25 -0800133 return false;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800134}
135
Yang Ni1ffd86b2015-01-07 09:16:40 -0800136CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
137 const ScriptGroupBase *sg) :
Yang Ni062c2872015-02-20 15:20:00 -0800138 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
139 mExecutable(nullptr), mScriptObj(nullptr) {
Yang Nieb9aa672015-01-27 14:32:25 -0800140 rsAssert(!mGroup->mClosures.empty());
Yang Ni1ffd86b2015-01-07 09:16:40 -0800141
Yang Ni062c2872015-02-20 15:20:00 -0800142 Batch* batch = new Batch(this, "Batch0");
143 int i = 0;
Yang Nieb9aa672015-01-27 14:32:25 -0800144 for (Closure* closure: mGroup->mClosures) {
Yang Nieb9aa672015-01-27 14:32:25 -0800145 CPUClosure* cc;
Yang Ni062c2872015-02-20 15:20:00 -0800146 const IDBase* funcID = closure->mFunctionID.get();
147 RsdCpuScriptImpl* si =
148 (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
149 if (closure->mIsKernel) {
Yang Nieb9aa672015-01-27 14:32:25 -0800150 MTLaunchStruct mtls;
Yang Ni062c2872015-02-20 15:20:00 -0800151 si->forEachKernelSetup(funcID->mSlot, &mtls);
152 cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
Yang Nieb9aa672015-01-27 14:32:25 -0800153 } else {
Yang Nieb9aa672015-01-27 14:32:25 -0800154 cc = new CPUClosure(closure, si);
155 }
156
157 if (batch->conflict(cc)) {
158 mBatches.push_back(batch);
Yang Ni062c2872015-02-20 15:20:00 -0800159 std::stringstream ss;
160 ss << "Batch" << ++i;
161 batch = new Batch(this, ss.str().c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800162 }
163
164 batch->mClosures.push_back(cc);
Yang Ni1ffd86b2015-01-07 09:16:40 -0800165 }
Yang Nida0f0692015-01-12 13:03:40 -0800166
Yang Nieb9aa672015-01-27 14:32:25 -0800167 rsAssert(!batch->mClosures.empty());
168 mBatches.push_back(batch);
Yang Nida0f0692015-01-12 13:03:40 -0800169
170#ifndef RS_COMPATIBILITY_LIB
Yang Ni062c2872015-02-20 15:20:00 -0800171 compile(mGroup->mCacheDir);
172 if (mScriptObj != nullptr && mExecutable != nullptr) {
173 for (Batch* batch : mBatches) {
174 batch->resolveFuncPtr(mScriptObj);
175 }
Yang Nieb9aa672015-01-27 14:32:25 -0800176 }
Yang Ni062c2872015-02-20 15:20:00 -0800177#endif // RS_COMPATIBILITY_LIB
178}
179
180void Batch::resolveFuncPtr(void* sharedObj) {
181 std::string funcName(mName);
182 if (mClosures.front()->mClosure->mIsKernel) {
183 funcName.append(".expand");
184 }
185 mFunc = dlsym(sharedObj, funcName.c_str());
186 rsAssert (mFunc != nullptr);
Yang Ni1ffd86b2015-01-07 09:16:40 -0800187}
188
189CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
Yang Nieb9aa672015-01-27 14:32:25 -0800190 for (Batch* batch : mBatches) {
191 delete batch;
192 }
Yang Ni062c2872015-02-20 15:20:00 -0800193 // TODO: move this dlclose into ~ScriptExecutable().
194 if (mScriptObj != nullptr) {
195 dlclose(mScriptObj);
196 }
197 delete mExecutable;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800198}
199
Yang Nida0f0692015-01-12 13:03:40 -0800200namespace {
201
202#ifndef RS_COMPATIBILITY_LIB
203
204string getFileName(string path) {
Yang Nieb9aa672015-01-27 14:32:25 -0800205 unsigned found = path.find_last_of("/\\");
206 return path.substr(found + 1);
Yang Nida0f0692015-01-12 13:03:40 -0800207}
208
209void setupCompileArguments(
Yang Ni062c2872015-02-20 15:20:00 -0800210 const vector<string>& inputs, const vector<string>& kernelBatches,
211 const vector<string>& invokeBatches,
Yang Nieb9aa672015-01-27 14:32:25 -0800212 const string& output_dir, const string& output_filename,
213 const string& rsLib, vector<const char*>* args) {
214 args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
215 args->push_back("-fPIC");
216 args->push_back("-embedRSInfo");
217 args->push_back("-mtriple");
218 args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
219 args->push_back("-bclib");
220 args->push_back(rsLib.c_str());
221 for (const string& input : inputs) {
222 args->push_back(input.c_str());
223 }
Yang Ni062c2872015-02-20 15:20:00 -0800224 for (const string& batch : kernelBatches) {
225 args->push_back("-merge");
226 args->push_back(batch.c_str());
227 }
228 for (const string& batch : invokeBatches) {
229 args->push_back("-invoke");
230 args->push_back(batch.c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800231 }
232 args->push_back("-output_path");
233 args->push_back(output_dir.c_str());
234 args->push_back("-o");
235 args->push_back(output_filename.c_str());
236 args->push_back(nullptr);
Yang Nida0f0692015-01-12 13:03:40 -0800237}
238
Yang Nida0f0692015-01-12 13:03:40 -0800239bool fuseAndCompile(const char** arguments,
240 const string& commandLine) {
Yang Nieb9aa672015-01-27 14:32:25 -0800241 const pid_t pid = fork();
Yang Nida0f0692015-01-12 13:03:40 -0800242
Yang Nieb9aa672015-01-27 14:32:25 -0800243 if (pid == -1) {
244 ALOGE("Couldn't fork for bcc execution");
245 return false;
246 }
Yang Nida0f0692015-01-12 13:03:40 -0800247
Yang Nieb9aa672015-01-27 14:32:25 -0800248 if (pid == 0) {
249 // Child process
250 ALOGV("Invoking BCC with: %s", commandLine.c_str());
251 execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
Yang Nida0f0692015-01-12 13:03:40 -0800252
Yang Nieb9aa672015-01-27 14:32:25 -0800253 ALOGE("execv() failed: %s", strerror(errno));
254 abort();
255 return false;
256 }
Yang Nida0f0692015-01-12 13:03:40 -0800257
Yang Nieb9aa672015-01-27 14:32:25 -0800258 // Parent process
259 int status = 0;
260 const pid_t w = waitpid(pid, &status, 0);
261 if (w == -1) {
262 return false;
263 }
Yang Nida0f0692015-01-12 13:03:40 -0800264
Yang Nieb9aa672015-01-27 14:32:25 -0800265 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
266 ALOGE("bcc terminated unexpectedly");
267 return false;
268 }
Yang Nida0f0692015-01-12 13:03:40 -0800269
Yang Nieb9aa672015-01-27 14:32:25 -0800270 return true;
Yang Nida0f0692015-01-12 13:03:40 -0800271}
Yang Ni062c2872015-02-20 15:20:00 -0800272
273void generateSourceSlot(const Closure& closure,
274 const std::vector<std::string>& inputs,
275 std::stringstream& ss) {
276 const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
277 const Script* script = funcID->mScript;
278
279 rsAssert (!script->isIntrinsic());
280
281 const RsdCpuScriptImpl *cpuScript =
282 (const RsdCpuScriptImpl*)script->mHal.drv;
283 const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
284
285 const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
286 inputs.begin();
287
288 ss << index << "," << funcID->mSlot << ".";
289}
290
291#endif // RS_COMPATIBILTY_LIB
Yang Nida0f0692015-01-12 13:03:40 -0800292
293} // anonymous namespace
294
Yang Ni062c2872015-02-20 15:20:00 -0800295void CpuScriptGroup2Impl::compile(const char* cacheDir) {
Yang Nida0f0692015-01-12 13:03:40 -0800296#ifndef RS_COMPATIBILITY_LIB
Yang Ni062c2872015-02-20 15:20:00 -0800297 if (mGroup->mClosures.size() < 2) {
Yang Nieb9aa672015-01-27 14:32:25 -0800298 return;
Yang Nida0f0692015-01-12 13:03:40 -0800299 }
300
Yang Nieb9aa672015-01-27 14:32:25 -0800301 //===--------------------------------------------------------------------===//
302 // Fuse the input kernels and generate native code in an object file
303 //===--------------------------------------------------------------------===//
Yang Nida0f0692015-01-12 13:03:40 -0800304
Yang Ni062c2872015-02-20 15:20:00 -0800305 std::set<string> inputSet;
306 for (Closure* closure : mGroup->mClosures) {
307 const Script* script = closure->mFunctionID.get()->mScript;
Yang Nida0f0692015-01-12 13:03:40 -0800308
Yang Ni062c2872015-02-20 15:20:00 -0800309 // If any script is an intrinsic, give up trying fusing the kernels.
Yang Nieb9aa672015-01-27 14:32:25 -0800310 if (script->isIntrinsic()) {
311 return;
312 }
Yang Nida0f0692015-01-12 13:03:40 -0800313
Yang Nieb9aa672015-01-27 14:32:25 -0800314 const RsdCpuScriptImpl *cpuScript =
315 (const RsdCpuScriptImpl*)script->mHal.drv;
Yang Nieb9aa672015-01-27 14:32:25 -0800316 const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
Yang Ni062c2872015-02-20 15:20:00 -0800317 inputSet.insert(bitcodeFilename);
318 }
Yang Nida0f0692015-01-12 13:03:40 -0800319
Yang Ni062c2872015-02-20 15:20:00 -0800320 std::vector<string> inputs(inputSet.begin(), inputSet.end());
321
322 std::vector<string> kernelBatches;
323 std::vector<string> invokeBatches;
324
325 int i = 0;
326 for (const auto& batch : mBatches) {
327 rsAssert(batch->size() > 0);
328
329 std::stringstream ss;
330 ss << batch->mName << ":";
331
332 if (!batch->mClosures.front()->mClosure->mIsKernel) {
333 rsAssert(batch->size() == 1);
334 generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss);
335 invokeBatches.push_back(ss.str());
336 } else {
337 for (const auto& cpuClosure : batch->mClosures) {
338 generateSourceSlot(*cpuClosure->mClosure, inputs, ss);
339 }
340 kernelBatches.push_back(ss.str());
341 }
Yang Nieb9aa672015-01-27 14:32:25 -0800342 }
Yang Nida0f0692015-01-12 13:03:40 -0800343
Yabin Cui433558f2015-02-23 18:25:55 -0800344 rsAssert(cacheDir != nullptr);
345 string objFilePath(cacheDir);
346 objFilePath.append("/fusedXXXXXX.o");
347 // Find unique object file name, to make following file names unique.
348 int tempfd = mkstemps(&objFilePath[0], 2);
349 if (tempfd == -1) {
350 return;
351 }
352 TEMP_FAILURE_RETRY(close(tempfd));
353
354 string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2));
Yang Nieb9aa672015-01-27 14:32:25 -0800355 string rsLibPath(SYSLIBPATH"/libclcore.bc");
356 vector<const char*> arguments;
Yang Ni062c2872015-02-20 15:20:00 -0800357 setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
358 outputFileName, rsLibPath, &arguments);
Yang Ni2abfcc62015-02-17 16:05:19 -0800359 std::unique_ptr<const char> joined(
360 rsuJoinStrings(arguments.size() - 1, arguments.data()));
361 string commandLine (joined.get());
Yang Nida0f0692015-01-12 13:03:40 -0800362
Yang Nieb9aa672015-01-27 14:32:25 -0800363 if (!fuseAndCompile(arguments.data(), commandLine)) {
Yabin Cui433558f2015-02-23 18:25:55 -0800364 unlink(objFilePath.c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800365 return;
366 }
Yang Nida0f0692015-01-12 13:03:40 -0800367
Yang Nieb9aa672015-01-27 14:32:25 -0800368 //===--------------------------------------------------------------------===//
369 // Create and load the shared lib
370 //===--------------------------------------------------------------------===//
371
372 const char* resName = outputFileName.c_str();
373
374 if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
375 ALOGE("Failed to link object file '%s'", resName);
376 return;
377 }
378
Yang Ni062c2872015-02-20 15:20:00 -0800379 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
380 if (mScriptObj == nullptr) {
Yang Nieb9aa672015-01-27 14:32:25 -0800381 ALOGE("Unable to load '%s'", resName);
382 return;
383 }
384
385 mExecutable = ScriptExecutable::createFromSharedObject(
Yang Ni062c2872015-02-20 15:20:00 -0800386 nullptr, // RS context. Unused.
387 mScriptObj);
Yang Nida0f0692015-01-12 13:03:40 -0800388
389#endif // RS_COMPATIBILITY_LIB
390}
391
Yang Ni1ffd86b2015-01-07 09:16:40 -0800392void CpuScriptGroup2Impl::execute() {
Yang Nieb9aa672015-01-27 14:32:25 -0800393 for (auto batch : mBatches) {
394 batch->setGlobalsForBatch();
395 batch->run();
396 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800397}
398
Yang Nida0f0692015-01-12 13:03:40 -0800399void Batch::setGlobalsForBatch() {
Yang Nieb9aa672015-01-27 14:32:25 -0800400 for (CPUClosure* cpuClosure : mClosures) {
401 const Closure* closure = cpuClosure->mClosure;
Yang Ni062c2872015-02-20 15:20:00 -0800402 const IDBase* funcID = closure->mFunctionID.get();
403 Script* s = funcID->mScript;;
Yang Nieb9aa672015-01-27 14:32:25 -0800404 for (const auto& p : closure->mGlobals) {
405 const void* value = p.second.first;
406 int size = p.second.second;
407 if (value == nullptr && size == 0) {
408 // This indicates the current closure depends on another closure for a
409 // global in their shared module (script). In this case we don't need to
410 // copy the value. For example, an invoke intializes a global variable
411 // which a kernel later reads.
412 continue;
413 }
Yang Niff2bb542015-02-02 14:33:47 -0800414 rsAssert(p.first != nullptr);
415 ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)",
416 closure, p.first, p.first->mScript, p.first->mSlot);
Yang Ni062c2872015-02-20 15:20:00 -0800417 Script* script = p.first->mScript;
418 const RsdCpuScriptImpl *cpuScript =
419 (const RsdCpuScriptImpl*)script->mHal.drv;
420 int slot = p.first->mSlot;
421 ScriptExecutable* exec = mGroup->getExecutable();
422 if (exec != nullptr) {
423 const char* varName = cpuScript->getFieldName(slot);
424 void* addr = exec->getFieldAddress(varName);
425 if (size < 0) {
426 rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
427 (rs_object_base*)addr, (ObjectBase*)value);
428 } else {
429 memcpy(addr, (const void*)&value, size);
430 }
Yang Nieb9aa672015-01-27 14:32:25 -0800431 } else {
Yang Ni062c2872015-02-20 15:20:00 -0800432 // We use -1 size to indicate an ObjectBase rather than a primitive type
433 if (size < 0) {
434 s->setVarObj(slot, (ObjectBase*)value);
435 } else {
436 s->setVar(slot, (const void*)&value, size);
437 }
Yang Nieb9aa672015-01-27 14:32:25 -0800438 }
439 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800440 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800441}
442
Yang Nida0f0692015-01-12 13:03:40 -0800443void Batch::run() {
Yang Ni062c2872015-02-20 15:20:00 -0800444 if (!mClosures.front()->mClosure->mIsKernel) {
445 rsAssert(mClosures.size() == 1);
446
447 // This batch contains a single closure for an invoke function
448 CPUClosure* cc = mClosures.front();
449 const Closure* c = cc->mClosure;
450
451 if (mFunc != nullptr) {
452 // TODO: Need align pointers for x86_64.
453 // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
454 ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
455 } else {
456 const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
457 rsAssert(invokeID != nullptr);
458 cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
459 }
460
461 return;
462 }
463
464 if (mFunc != nullptr) {
Yang Nieb9aa672015-01-27 14:32:25 -0800465 MTLaunchStruct mtls;
466 const CPUClosure* firstCpuClosure = mClosures.front();
467 const CPUClosure* lastCpuClosure = mClosures.back();
468
469 firstCpuClosure->mSi->forEachMtlsSetup(
Yang Niff2bb542015-02-02 14:33:47 -0800470 (const Allocation**)firstCpuClosure->mClosure->mArgs,
471 firstCpuClosure->mClosure->mNumArg,
Yang Nieb9aa672015-01-27 14:32:25 -0800472 lastCpuClosure->mClosure->mReturnValue,
473 nullptr, 0, nullptr, &mtls);
474
475 mtls.script = nullptr;
476 mtls.fep.usr = nullptr;
Yang Ni062c2872015-02-20 15:20:00 -0800477 mtls.kernel = (ForEachFunc_t)mFunc;
Yang Nieb9aa672015-01-27 14:32:25 -0800478
479 mGroup->getCpuRefImpl()->launchThreads(
Yang Niff2bb542015-02-02 14:33:47 -0800480 (const Allocation**)firstCpuClosure->mClosure->mArgs,
481 firstCpuClosure->mClosure->mNumArg,
Yang Nieb9aa672015-01-27 14:32:25 -0800482 lastCpuClosure->mClosure->mReturnValue,
483 nullptr, &mtls);
484
485 return;
486 }
487
Yang Nieb9aa672015-01-27 14:32:25 -0800488 for (CPUClosure* cpuClosure : mClosures) {
489 const Closure* closure = cpuClosure->mClosure;
Yang Ni062c2872015-02-20 15:20:00 -0800490 const ScriptKernelID* kernelID =
491 (const ScriptKernelID*)closure->mFunctionID.get();
Yang Nieb9aa672015-01-27 14:32:25 -0800492 cpuClosure->mSi->preLaunch(kernelID->mSlot,
Yang Niff2bb542015-02-02 14:33:47 -0800493 (const Allocation**)closure->mArgs,
494 closure->mNumArg, closure->mReturnValue,
Yang Ni062c2872015-02-20 15:20:00 -0800495 nullptr, 0, nullptr);
Yang Nieb9aa672015-01-27 14:32:25 -0800496 }
497
498 const CPUClosure* cpuClosure = mClosures.front();
499 const Closure* closure = cpuClosure->mClosure;
Yang Nida0f0692015-01-12 13:03:40 -0800500 MTLaunchStruct mtls;
Yang Nida0f0692015-01-12 13:03:40 -0800501
Yang Niff2bb542015-02-02 14:33:47 -0800502 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
503 closure->mNumArg,
Yang Nieb9aa672015-01-27 14:32:25 -0800504 closure->mReturnValue,
505 nullptr, 0, nullptr, &mtls)) {
Yang Nida0f0692015-01-12 13:03:40 -0800506
Yang Nieb9aa672015-01-27 14:32:25 -0800507 mtls.script = nullptr;
508 mtls.kernel = (void (*)())&groupRoot;
509 mtls.fep.usr = &mClosures;
Yang Nida0f0692015-01-12 13:03:40 -0800510
Yang Nieb9aa672015-01-27 14:32:25 -0800511 mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
512 }
Yang Nida0f0692015-01-12 13:03:40 -0800513
Yang Nieb9aa672015-01-27 14:32:25 -0800514 for (CPUClosure* cpuClosure : mClosures) {
515 const Closure* closure = cpuClosure->mClosure;
Yang Ni062c2872015-02-20 15:20:00 -0800516 const ScriptKernelID* kernelID =
517 (const ScriptKernelID*)closure->mFunctionID.get();
Yang Nieb9aa672015-01-27 14:32:25 -0800518 cpuClosure->mSi->postLaunch(kernelID->mSlot,
Yang Niff2bb542015-02-02 14:33:47 -0800519 (const Allocation**)closure->mArgs,
520 closure->mNumArg, closure->mReturnValue,
Yang Nieb9aa672015-01-27 14:32:25 -0800521 nullptr, 0, nullptr);
522 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800523}
524
525} // namespace renderscript
526} // namespace android