blob: 2e50ecb9de776996eb5c81691446897f9c32cc88 [file] [log] [blame]
Yang Ni1ffd86b2015-01-07 09:16:40 -08001#include "rsCpuScriptGroup2.h"
2
Yang Nida0f0692015-01-12 13:03:40 -08003#include <dlfcn.h>
Yabin Cui433558f2015-02-23 18:25:55 -08004#include <stdio.h>
5#include <stdlib.h>
6#include <unistd.h>
Yang Nida0f0692015-01-12 13:03:40 -08007
Yang Ni062c2872015-02-20 15:20:00 -08008#include <set>
9#include <sstream>
Yang Nida0f0692015-01-12 13:03:40 -080010#include <string>
11#include <vector>
12
13#ifndef RS_COMPATIBILITY_LIB
14#include "bcc/Config/Config.h"
15#include <sys/wait.h>
16#endif
17
Yang Ni1ffd86b2015-01-07 09:16:40 -080018#include "cpu_ref/rsCpuCore.h"
19#include "rsClosure.h"
20#include "rsContext.h"
21#include "rsCpuCore.h"
Yang Ni2abfcc62015-02-17 16:05:19 -080022#include "rsCpuExecutable.h"
Yang Ni1ffd86b2015-01-07 09:16:40 -080023#include "rsCpuScript.h"
24#include "rsScript.h"
25#include "rsScriptGroup2.h"
Yang Nida0f0692015-01-12 13:03:40 -080026#include "rsScriptIntrinsic.h"
27
28using std::string;
29using std::vector;
Yang Ni1ffd86b2015-01-07 09:16:40 -080030
31namespace android {
32namespace renderscript {
33
34namespace {
35
Yang Nida0f0692015-01-12 13:03:40 -080036const size_t DefaultKernelArgCount = 2;
Yang Ni1ffd86b2015-01-07 09:16:40 -080037
David Grossb0abb142015-03-12 15:23:03 -070038void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
Yang Ni1ffd86b2015-01-07 09:16:40 -080039 uint32_t xend, uint32_t outstep) {
David Grossb0abb142015-03-12 15:23:03 -070040 const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
41 RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
Yang Ni1ffd86b2015-01-07 09:16:40 -080042
David Grossb0abb142015-03-12 15:23:03 -070043 const size_t oldInLen = mutable_kinfo->inLen;
44
45 decltype(mutable_kinfo->inStride) oldInStride;
46 memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
Yang Ni1ffd86b2015-01-07 09:16:40 -080047
Yang Nieb9aa672015-01-27 14:32:25 -080048 for (CPUClosure* cpuClosure : closures) {
49 const Closure* closure = cpuClosure->mClosure;
Yang Ni1ffd86b2015-01-07 09:16:40 -080050
David Grossb0abb142015-03-12 15:23:03 -070051 // There had better be enough space in mutable_kinfo
52 rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
Yang Ni1ffd86b2015-01-07 09:16:40 -080053
Yang Niff2bb542015-02-02 14:33:47 -080054 for (size_t i = 0; i < closure->mNumArg; i++) {
55 const void* arg = closure->mArgs[i];
Yang Nieb9aa672015-01-27 14:32:25 -080056 const Allocation* a = (const Allocation*)arg;
57 const uint32_t eStride = a->mHal.state.elementSizeBytes;
58 const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
59 eStride * xstart;
David Grossb0abb142015-03-12 15:23:03 -070060 if (kinfo->dim.y > 1) {
61 ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
Yang Nieb9aa672015-01-27 14:32:25 -080062 }
David Grossb0abb142015-03-12 15:23:03 -070063 mutable_kinfo->inPtr[i] = ptr;
64 mutable_kinfo->inStride[i] = eStride;
Yang Nieb9aa672015-01-27 14:32:25 -080065 }
David Grossb0abb142015-03-12 15:23:03 -070066 mutable_kinfo->inLen = closure->mNumArg;
Yang Nieb9aa672015-01-27 14:32:25 -080067
68 const Allocation* out = closure->mReturnValue;
69 const uint32_t ostep = out->mHal.state.elementSizeBytes;
70 const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
71 ostep * xstart;
David Grossb0abb142015-03-12 15:23:03 -070072 if (kinfo->dim.y > 1) {
73 ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
Yang Nieb9aa672015-01-27 14:32:25 -080074 }
75
David Grossb0abb142015-03-12 15:23:03 -070076 rsAssert(kinfo->outLen <= 1);
77 mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
Yang Nieb9aa672015-01-27 14:32:25 -080078
David Grossb0abb142015-03-12 15:23:03 -070079 cpuClosure->mFunc(kinfo, xstart, xend, ostep);
Yang Ni1ffd86b2015-01-07 09:16:40 -080080 }
81
David Grossb0abb142015-03-12 15:23:03 -070082 mutable_kinfo->inLen = oldInLen;
83 memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
Yang Ni1ffd86b2015-01-07 09:16:40 -080084}
85
Yang Nida0f0692015-01-12 13:03:40 -080086} // namespace
87
Yang Ni062c2872015-02-20 15:20:00 -080088Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
89 mGroup(group), mFunc(nullptr) {
90 mName = strndup(name, strlen(name));
91}
92
Yang Nida0f0692015-01-12 13:03:40 -080093Batch::~Batch() {
Yang Nieb9aa672015-01-27 14:32:25 -080094 for (CPUClosure* c : mClosures) {
95 delete c;
96 }
Yang Ni062c2872015-02-20 15:20:00 -080097 free(mName);
Yang Nida0f0692015-01-12 13:03:40 -080098}
99
Yang Niff2bb542015-02-02 14:33:47 -0800100bool Batch::conflict(CPUClosure* cpuClosure) const {
Yang Nieb9aa672015-01-27 14:32:25 -0800101 if (mClosures.empty()) {
102 return false;
103 }
Yang Nida0f0692015-01-12 13:03:40 -0800104
Yang Niff2bb542015-02-02 14:33:47 -0800105 const Closure* closure = cpuClosure->mClosure;
106
Yang Ni062c2872015-02-20 15:20:00 -0800107 if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
Yang Nieb9aa672015-01-27 14:32:25 -0800108 // An invoke should be in a batch by itself, so it conflicts with any other
109 // closure.
Yang Ni1ffd86b2015-01-07 09:16:40 -0800110 return true;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800111 }
Yang Nieb9aa672015-01-27 14:32:25 -0800112
Yang Niff2bb542015-02-02 14:33:47 -0800113 const auto& globalDeps = closure->mGlobalDeps;
114 const auto& argDeps = closure->mArgDeps;
115
116 for (CPUClosure* c : mClosures) {
117 const Closure* batched = c->mClosure;
118 if (globalDeps.find(batched) != globalDeps.end()) {
119 return true;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800120 }
Yang Niff2bb542015-02-02 14:33:47 -0800121 const auto& it = argDeps.find(batched);
122 if (it != argDeps.end()) {
123 const auto& args = (*it).second;
124 for (const auto &p1 : *args) {
125 if (p1.second->get() != nullptr) {
126 return true;
Yang Nieb9aa672015-01-27 14:32:25 -0800127 }
128 }
129 }
130 }
Yang Niff2bb542015-02-02 14:33:47 -0800131
Yang Nieb9aa672015-01-27 14:32:25 -0800132 return false;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800133}
134
Yang Ni1ffd86b2015-01-07 09:16:40 -0800135CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
136 const ScriptGroupBase *sg) :
Yang Ni062c2872015-02-20 15:20:00 -0800137 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
138 mExecutable(nullptr), mScriptObj(nullptr) {
Yang Nieb9aa672015-01-27 14:32:25 -0800139 rsAssert(!mGroup->mClosures.empty());
Yang Ni1ffd86b2015-01-07 09:16:40 -0800140
Yang Ni062c2872015-02-20 15:20:00 -0800141 Batch* batch = new Batch(this, "Batch0");
142 int i = 0;
Yang Nieb9aa672015-01-27 14:32:25 -0800143 for (Closure* closure: mGroup->mClosures) {
Yang Nieb9aa672015-01-27 14:32:25 -0800144 CPUClosure* cc;
Yang Ni062c2872015-02-20 15:20:00 -0800145 const IDBase* funcID = closure->mFunctionID.get();
146 RsdCpuScriptImpl* si =
147 (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
148 if (closure->mIsKernel) {
Yang Nieb9aa672015-01-27 14:32:25 -0800149 MTLaunchStruct mtls;
Yang Ni062c2872015-02-20 15:20:00 -0800150 si->forEachKernelSetup(funcID->mSlot, &mtls);
151 cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
Yang Nieb9aa672015-01-27 14:32:25 -0800152 } else {
Yang Nieb9aa672015-01-27 14:32:25 -0800153 cc = new CPUClosure(closure, si);
154 }
155
156 if (batch->conflict(cc)) {
157 mBatches.push_back(batch);
Yang Ni062c2872015-02-20 15:20:00 -0800158 std::stringstream ss;
159 ss << "Batch" << ++i;
160 batch = new Batch(this, ss.str().c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800161 }
162
163 batch->mClosures.push_back(cc);
Yang Ni1ffd86b2015-01-07 09:16:40 -0800164 }
Yang Nida0f0692015-01-12 13:03:40 -0800165
Yang Nieb9aa672015-01-27 14:32:25 -0800166 rsAssert(!batch->mClosures.empty());
167 mBatches.push_back(batch);
Yang Nida0f0692015-01-12 13:03:40 -0800168
169#ifndef RS_COMPATIBILITY_LIB
Yang Ni062c2872015-02-20 15:20:00 -0800170 compile(mGroup->mCacheDir);
171 if (mScriptObj != nullptr && mExecutable != nullptr) {
172 for (Batch* batch : mBatches) {
173 batch->resolveFuncPtr(mScriptObj);
174 }
Yang Nieb9aa672015-01-27 14:32:25 -0800175 }
Yang Ni062c2872015-02-20 15:20:00 -0800176#endif // RS_COMPATIBILITY_LIB
177}
178
179void Batch::resolveFuncPtr(void* sharedObj) {
180 std::string funcName(mName);
181 if (mClosures.front()->mClosure->mIsKernel) {
182 funcName.append(".expand");
183 }
184 mFunc = dlsym(sharedObj, funcName.c_str());
185 rsAssert (mFunc != nullptr);
Yang Ni1ffd86b2015-01-07 09:16:40 -0800186}
187
188CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
Yang Nieb9aa672015-01-27 14:32:25 -0800189 for (Batch* batch : mBatches) {
190 delete batch;
191 }
Yang Ni062c2872015-02-20 15:20:00 -0800192 // TODO: move this dlclose into ~ScriptExecutable().
193 if (mScriptObj != nullptr) {
194 dlclose(mScriptObj);
195 }
196 delete mExecutable;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800197}
198
Yang Nida0f0692015-01-12 13:03:40 -0800199namespace {
200
201#ifndef RS_COMPATIBILITY_LIB
202
Yang Niedf4ea32015-03-11 09:07:15 -0700203string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
204 *coreLibRelaxedPath = "";
205
206 // If we're debugging, use the debug library.
207 if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
208 return SYSLIBPATH"/libclcore_debug.bc";
209 }
210
211 // Check for a platform specific library
212
213#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
214 // NEON-capable ARMv7a devices can use an accelerated math library
215 // for all reduced precision scripts.
216 // ARMv8 does not use NEON, as ASIMD can be used with all precision
217 // levels.
218 *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc";
219#endif
220
221#if defined(__i386__) || defined(__x86_64__)
222 // x86 devices will use an optimized library.
223 return SYSLIBPATH"/libclcore_x86.bc";
224#else
225 return SYSLIBPATH"/libclcore.bc";
226#endif
227}
228
Yang Nida0f0692015-01-12 13:03:40 -0800229string getFileName(string path) {
Yang Nieb9aa672015-01-27 14:32:25 -0800230 unsigned found = path.find_last_of("/\\");
231 return path.substr(found + 1);
Yang Nida0f0692015-01-12 13:03:40 -0800232}
233
234void setupCompileArguments(
Yang Ni062c2872015-02-20 15:20:00 -0800235 const vector<string>& inputs, const vector<string>& kernelBatches,
236 const vector<string>& invokeBatches,
Yang Nieb9aa672015-01-27 14:32:25 -0800237 const string& output_dir, const string& output_filename,
Yang Niedf4ea32015-03-11 09:07:15 -0700238 const string& coreLibPath, const string& coreLibRelaxedPath,
239 vector<const char*>* args) {
Yang Nieb9aa672015-01-27 14:32:25 -0800240 args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
241 args->push_back("-fPIC");
242 args->push_back("-embedRSInfo");
243 args->push_back("-mtriple");
244 args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
245 args->push_back("-bclib");
Yang Niedf4ea32015-03-11 09:07:15 -0700246 args->push_back(coreLibPath.c_str());
247 args->push_back("-bclib_relaxed");
248 args->push_back(coreLibRelaxedPath.c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800249 for (const string& input : inputs) {
250 args->push_back(input.c_str());
251 }
Yang Ni062c2872015-02-20 15:20:00 -0800252 for (const string& batch : kernelBatches) {
253 args->push_back("-merge");
254 args->push_back(batch.c_str());
255 }
256 for (const string& batch : invokeBatches) {
257 args->push_back("-invoke");
258 args->push_back(batch.c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800259 }
260 args->push_back("-output_path");
261 args->push_back(output_dir.c_str());
262 args->push_back("-o");
263 args->push_back(output_filename.c_str());
264 args->push_back(nullptr);
Yang Nida0f0692015-01-12 13:03:40 -0800265}
266
Yang Nida0f0692015-01-12 13:03:40 -0800267bool fuseAndCompile(const char** arguments,
268 const string& commandLine) {
Yang Nieb9aa672015-01-27 14:32:25 -0800269 const pid_t pid = fork();
Yang Nida0f0692015-01-12 13:03:40 -0800270
Yang Nieb9aa672015-01-27 14:32:25 -0800271 if (pid == -1) {
272 ALOGE("Couldn't fork for bcc execution");
273 return false;
274 }
Yang Nida0f0692015-01-12 13:03:40 -0800275
Yang Nieb9aa672015-01-27 14:32:25 -0800276 if (pid == 0) {
277 // Child process
278 ALOGV("Invoking BCC with: %s", commandLine.c_str());
279 execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
Yang Nida0f0692015-01-12 13:03:40 -0800280
Yang Nieb9aa672015-01-27 14:32:25 -0800281 ALOGE("execv() failed: %s", strerror(errno));
282 abort();
283 return false;
284 }
Yang Nida0f0692015-01-12 13:03:40 -0800285
Yang Nieb9aa672015-01-27 14:32:25 -0800286 // Parent process
287 int status = 0;
288 const pid_t w = waitpid(pid, &status, 0);
289 if (w == -1) {
290 return false;
291 }
Yang Nida0f0692015-01-12 13:03:40 -0800292
Yang Nieb9aa672015-01-27 14:32:25 -0800293 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
294 ALOGE("bcc terminated unexpectedly");
295 return false;
296 }
Yang Nida0f0692015-01-12 13:03:40 -0800297
Yang Nieb9aa672015-01-27 14:32:25 -0800298 return true;
Yang Nida0f0692015-01-12 13:03:40 -0800299}
Yang Ni062c2872015-02-20 15:20:00 -0800300
301void generateSourceSlot(const Closure& closure,
302 const std::vector<std::string>& inputs,
303 std::stringstream& ss) {
304 const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
305 const Script* script = funcID->mScript;
306
307 rsAssert (!script->isIntrinsic());
308
309 const RsdCpuScriptImpl *cpuScript =
310 (const RsdCpuScriptImpl*)script->mHal.drv;
311 const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
312
313 const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
314 inputs.begin();
315
316 ss << index << "," << funcID->mSlot << ".";
317}
318
319#endif // RS_COMPATIBILTY_LIB
Yang Nida0f0692015-01-12 13:03:40 -0800320
321} // anonymous namespace
322
Yang Ni062c2872015-02-20 15:20:00 -0800323void CpuScriptGroup2Impl::compile(const char* cacheDir) {
Yang Nida0f0692015-01-12 13:03:40 -0800324#ifndef RS_COMPATIBILITY_LIB
Yang Ni062c2872015-02-20 15:20:00 -0800325 if (mGroup->mClosures.size() < 2) {
Yang Nieb9aa672015-01-27 14:32:25 -0800326 return;
Yang Nida0f0692015-01-12 13:03:40 -0800327 }
328
Yang Nieb9aa672015-01-27 14:32:25 -0800329 //===--------------------------------------------------------------------===//
330 // Fuse the input kernels and generate native code in an object file
331 //===--------------------------------------------------------------------===//
Yang Nida0f0692015-01-12 13:03:40 -0800332
Yang Ni062c2872015-02-20 15:20:00 -0800333 std::set<string> inputSet;
334 for (Closure* closure : mGroup->mClosures) {
335 const Script* script = closure->mFunctionID.get()->mScript;
Yang Nida0f0692015-01-12 13:03:40 -0800336
Yang Ni062c2872015-02-20 15:20:00 -0800337 // If any script is an intrinsic, give up trying fusing the kernels.
Yang Nieb9aa672015-01-27 14:32:25 -0800338 if (script->isIntrinsic()) {
339 return;
340 }
Yang Nida0f0692015-01-12 13:03:40 -0800341
Yang Nieb9aa672015-01-27 14:32:25 -0800342 const RsdCpuScriptImpl *cpuScript =
343 (const RsdCpuScriptImpl*)script->mHal.drv;
Yang Nieb9aa672015-01-27 14:32:25 -0800344 const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
Yang Ni062c2872015-02-20 15:20:00 -0800345 inputSet.insert(bitcodeFilename);
346 }
Yang Nida0f0692015-01-12 13:03:40 -0800347
Yang Ni062c2872015-02-20 15:20:00 -0800348 std::vector<string> inputs(inputSet.begin(), inputSet.end());
349
350 std::vector<string> kernelBatches;
351 std::vector<string> invokeBatches;
352
353 int i = 0;
354 for (const auto& batch : mBatches) {
355 rsAssert(batch->size() > 0);
356
357 std::stringstream ss;
358 ss << batch->mName << ":";
359
360 if (!batch->mClosures.front()->mClosure->mIsKernel) {
361 rsAssert(batch->size() == 1);
362 generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss);
363 invokeBatches.push_back(ss.str());
364 } else {
365 for (const auto& cpuClosure : batch->mClosures) {
366 generateSourceSlot(*cpuClosure->mClosure, inputs, ss);
367 }
368 kernelBatches.push_back(ss.str());
369 }
Yang Nieb9aa672015-01-27 14:32:25 -0800370 }
Yang Nida0f0692015-01-12 13:03:40 -0800371
Yabin Cui433558f2015-02-23 18:25:55 -0800372 rsAssert(cacheDir != nullptr);
373 string objFilePath(cacheDir);
374 objFilePath.append("/fusedXXXXXX.o");
375 // Find unique object file name, to make following file names unique.
376 int tempfd = mkstemps(&objFilePath[0], 2);
377 if (tempfd == -1) {
378 return;
379 }
380 TEMP_FAILURE_RETRY(close(tempfd));
381
382 string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2));
Yang Niedf4ea32015-03-11 09:07:15 -0700383 string coreLibRelaxedPath;
384 const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
385 &coreLibRelaxedPath);
Yang Nieb9aa672015-01-27 14:32:25 -0800386 vector<const char*> arguments;
Yang Ni062c2872015-02-20 15:20:00 -0800387 setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
Yang Niedf4ea32015-03-11 09:07:15 -0700388 outputFileName, coreLibPath, coreLibRelaxedPath, &arguments);
Yang Ni2abfcc62015-02-17 16:05:19 -0800389 std::unique_ptr<const char> joined(
390 rsuJoinStrings(arguments.size() - 1, arguments.data()));
391 string commandLine (joined.get());
Yang Nida0f0692015-01-12 13:03:40 -0800392
Yang Nieb9aa672015-01-27 14:32:25 -0800393 if (!fuseAndCompile(arguments.data(), commandLine)) {
Yabin Cui433558f2015-02-23 18:25:55 -0800394 unlink(objFilePath.c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800395 return;
396 }
Yang Nida0f0692015-01-12 13:03:40 -0800397
Yang Nieb9aa672015-01-27 14:32:25 -0800398 //===--------------------------------------------------------------------===//
399 // Create and load the shared lib
400 //===--------------------------------------------------------------------===//
401
402 const char* resName = outputFileName.c_str();
403
404 if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
405 ALOGE("Failed to link object file '%s'", resName);
406 return;
407 }
408
Yang Ni062c2872015-02-20 15:20:00 -0800409 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
410 if (mScriptObj == nullptr) {
Yang Nieb9aa672015-01-27 14:32:25 -0800411 ALOGE("Unable to load '%s'", resName);
412 return;
413 }
414
415 mExecutable = ScriptExecutable::createFromSharedObject(
Yang Ni062c2872015-02-20 15:20:00 -0800416 nullptr, // RS context. Unused.
417 mScriptObj);
Yang Nida0f0692015-01-12 13:03:40 -0800418
419#endif // RS_COMPATIBILITY_LIB
420}
421
Yang Ni1ffd86b2015-01-07 09:16:40 -0800422void CpuScriptGroup2Impl::execute() {
Yang Nieb9aa672015-01-27 14:32:25 -0800423 for (auto batch : mBatches) {
424 batch->setGlobalsForBatch();
425 batch->run();
426 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800427}
428
Yang Nida0f0692015-01-12 13:03:40 -0800429void Batch::setGlobalsForBatch() {
Yang Nieb9aa672015-01-27 14:32:25 -0800430 for (CPUClosure* cpuClosure : mClosures) {
431 const Closure* closure = cpuClosure->mClosure;
Yang Ni062c2872015-02-20 15:20:00 -0800432 const IDBase* funcID = closure->mFunctionID.get();
433 Script* s = funcID->mScript;;
Yang Nieb9aa672015-01-27 14:32:25 -0800434 for (const auto& p : closure->mGlobals) {
435 const void* value = p.second.first;
436 int size = p.second.second;
437 if (value == nullptr && size == 0) {
438 // This indicates the current closure depends on another closure for a
439 // global in their shared module (script). In this case we don't need to
440 // copy the value. For example, an invoke intializes a global variable
441 // which a kernel later reads.
442 continue;
443 }
Yang Niff2bb542015-02-02 14:33:47 -0800444 rsAssert(p.first != nullptr);
445 ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)",
446 closure, p.first, p.first->mScript, p.first->mSlot);
Yang Ni062c2872015-02-20 15:20:00 -0800447 Script* script = p.first->mScript;
448 const RsdCpuScriptImpl *cpuScript =
449 (const RsdCpuScriptImpl*)script->mHal.drv;
450 int slot = p.first->mSlot;
451 ScriptExecutable* exec = mGroup->getExecutable();
452 if (exec != nullptr) {
453 const char* varName = cpuScript->getFieldName(slot);
454 void* addr = exec->getFieldAddress(varName);
455 if (size < 0) {
456 rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
457 (rs_object_base*)addr, (ObjectBase*)value);
458 } else {
459 memcpy(addr, (const void*)&value, size);
460 }
Yang Nieb9aa672015-01-27 14:32:25 -0800461 } else {
Yang Ni062c2872015-02-20 15:20:00 -0800462 // We use -1 size to indicate an ObjectBase rather than a primitive type
463 if (size < 0) {
464 s->setVarObj(slot, (ObjectBase*)value);
465 } else {
466 s->setVar(slot, (const void*)&value, size);
467 }
Yang Nieb9aa672015-01-27 14:32:25 -0800468 }
469 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800470 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800471}
472
Yang Nida0f0692015-01-12 13:03:40 -0800473void Batch::run() {
Yang Ni062c2872015-02-20 15:20:00 -0800474 if (!mClosures.front()->mClosure->mIsKernel) {
475 rsAssert(mClosures.size() == 1);
476
477 // This batch contains a single closure for an invoke function
478 CPUClosure* cc = mClosures.front();
479 const Closure* c = cc->mClosure;
480
481 if (mFunc != nullptr) {
482 // TODO: Need align pointers for x86_64.
483 // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
484 ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
485 } else {
486 const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
487 rsAssert(invokeID != nullptr);
488 cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
489 }
490
491 return;
492 }
493
494 if (mFunc != nullptr) {
Yang Nieb9aa672015-01-27 14:32:25 -0800495 MTLaunchStruct mtls;
496 const CPUClosure* firstCpuClosure = mClosures.front();
497 const CPUClosure* lastCpuClosure = mClosures.back();
498
499 firstCpuClosure->mSi->forEachMtlsSetup(
Yang Niff2bb542015-02-02 14:33:47 -0800500 (const Allocation**)firstCpuClosure->mClosure->mArgs,
501 firstCpuClosure->mClosure->mNumArg,
Yang Nieb9aa672015-01-27 14:32:25 -0800502 lastCpuClosure->mClosure->mReturnValue,
503 nullptr, 0, nullptr, &mtls);
504
505 mtls.script = nullptr;
506 mtls.fep.usr = nullptr;
Yang Ni062c2872015-02-20 15:20:00 -0800507 mtls.kernel = (ForEachFunc_t)mFunc;
Yang Nieb9aa672015-01-27 14:32:25 -0800508
509 mGroup->getCpuRefImpl()->launchThreads(
Yang Niff2bb542015-02-02 14:33:47 -0800510 (const Allocation**)firstCpuClosure->mClosure->mArgs,
511 firstCpuClosure->mClosure->mNumArg,
Yang Nieb9aa672015-01-27 14:32:25 -0800512 lastCpuClosure->mClosure->mReturnValue,
513 nullptr, &mtls);
514
515 return;
516 }
517
Yang Nieb9aa672015-01-27 14:32:25 -0800518 for (CPUClosure* cpuClosure : mClosures) {
519 const Closure* closure = cpuClosure->mClosure;
Yang Ni062c2872015-02-20 15:20:00 -0800520 const ScriptKernelID* kernelID =
521 (const ScriptKernelID*)closure->mFunctionID.get();
Yang Nieb9aa672015-01-27 14:32:25 -0800522 cpuClosure->mSi->preLaunch(kernelID->mSlot,
Yang Niff2bb542015-02-02 14:33:47 -0800523 (const Allocation**)closure->mArgs,
524 closure->mNumArg, closure->mReturnValue,
Yang Ni062c2872015-02-20 15:20:00 -0800525 nullptr, 0, nullptr);
Yang Nieb9aa672015-01-27 14:32:25 -0800526 }
527
528 const CPUClosure* cpuClosure = mClosures.front();
529 const Closure* closure = cpuClosure->mClosure;
Yang Nida0f0692015-01-12 13:03:40 -0800530 MTLaunchStruct mtls;
Yang Nida0f0692015-01-12 13:03:40 -0800531
Yang Niff2bb542015-02-02 14:33:47 -0800532 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
533 closure->mNumArg,
Yang Nieb9aa672015-01-27 14:32:25 -0800534 closure->mReturnValue,
535 nullptr, 0, nullptr, &mtls)) {
Yang Nida0f0692015-01-12 13:03:40 -0800536
Yang Nieb9aa672015-01-27 14:32:25 -0800537 mtls.script = nullptr;
538 mtls.kernel = (void (*)())&groupRoot;
539 mtls.fep.usr = &mClosures;
Yang Nida0f0692015-01-12 13:03:40 -0800540
Yang Nieb9aa672015-01-27 14:32:25 -0800541 mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
542 }
Yang Nida0f0692015-01-12 13:03:40 -0800543
Yang Nieb9aa672015-01-27 14:32:25 -0800544 for (CPUClosure* cpuClosure : mClosures) {
545 const Closure* closure = cpuClosure->mClosure;
Yang Ni062c2872015-02-20 15:20:00 -0800546 const ScriptKernelID* kernelID =
547 (const ScriptKernelID*)closure->mFunctionID.get();
Yang Nieb9aa672015-01-27 14:32:25 -0800548 cpuClosure->mSi->postLaunch(kernelID->mSlot,
Yang Niff2bb542015-02-02 14:33:47 -0800549 (const Allocation**)closure->mArgs,
550 closure->mNumArg, closure->mReturnValue,
Yang Nieb9aa672015-01-27 14:32:25 -0800551 nullptr, 0, nullptr);
552 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800553}
554
555} // namespace renderscript
556} // namespace android