blob: 18c9f73b174a2b8c6e9ac1beaa2c5fa38c62382b [file] [log] [blame]
Yang Ni1ffd86b2015-01-07 09:16:40 -08001#include "rsCpuScriptGroup2.h"
2
Yang Nida0f0692015-01-12 13:03:40 -08003#include <dlfcn.h>
4
5#include <string>
6#include <vector>
7
8#ifndef RS_COMPATIBILITY_LIB
9#include "bcc/Config/Config.h"
10#include <sys/wait.h>
11#endif
12
Yang Ni1ffd86b2015-01-07 09:16:40 -080013#include "cpu_ref/rsCpuCore.h"
14#include "rsClosure.h"
15#include "rsContext.h"
16#include "rsCpuCore.h"
17#include "rsCpuScript.h"
18#include "rsScript.h"
19#include "rsScriptGroup2.h"
Yang Nida0f0692015-01-12 13:03:40 -080020#include "rsScriptIntrinsic.h"
21
22using std::string;
23using std::vector;
Yang Ni1ffd86b2015-01-07 09:16:40 -080024
25namespace android {
26namespace renderscript {
27
28namespace {
29
Yang Nida0f0692015-01-12 13:03:40 -080030const size_t DefaultKernelArgCount = 2;
Yang Ni1ffd86b2015-01-07 09:16:40 -080031
32void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
33 uint32_t xend, uint32_t outstep) {
Yang Nieb9aa672015-01-27 14:32:25 -080034 const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr;
35 RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
36 const void **oldIns = kparams->ins;
37 uint32_t *oldStrides = kparams->inEStrides;
Yang Ni1ffd86b2015-01-07 09:16:40 -080038
Yang Nieb9aa672015-01-27 14:32:25 -080039 std::vector<const void*> ins(DefaultKernelArgCount);
40 std::vector<uint32_t> strides(DefaultKernelArgCount);
Yang Ni1ffd86b2015-01-07 09:16:40 -080041
Yang Nieb9aa672015-01-27 14:32:25 -080042 for (CPUClosure* cpuClosure : closures) {
43 const Closure* closure = cpuClosure->mClosure;
Yang Ni1ffd86b2015-01-07 09:16:40 -080044
Yang Nieb9aa672015-01-27 14:32:25 -080045 auto in_iter = ins.begin();
46 auto stride_iter = strides.begin();
Yang Ni1ffd86b2015-01-07 09:16:40 -080047
Yang Nieb9aa672015-01-27 14:32:25 -080048 for (const auto& arg : closure->mArgs) {
49 const Allocation* a = (const Allocation*)arg;
50 const uint32_t eStride = a->mHal.state.elementSizeBytes;
51 const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
52 eStride * xstart;
53 if (kparams->dimY > 1) {
54 ptr += a->mHal.drvState.lod[0].stride * kparams->y;
55 }
56 *in_iter++ = ptr;
57 *stride_iter++ = eStride;
58 }
59
60 mutable_kparams->ins = &ins[0];
61 mutable_kparams->inEStrides = &strides[0];
62
63 const Allocation* out = closure->mReturnValue;
64 const uint32_t ostep = out->mHal.state.elementSizeBytes;
65 const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
66 ostep * xstart;
67 if (kparams->dimY > 1) {
68 ptr += out->mHal.drvState.lod[0].stride * kparams->y;
69 }
70
71 mutable_kparams->out = (void*)ptr;
72
73 mutable_kparams->usr = cpuClosure->mUsrPtr;
74
75 cpuClosure->mFunc(kparams, xstart, xend, ostep);
Yang Ni1ffd86b2015-01-07 09:16:40 -080076 }
77
Yang Nieb9aa672015-01-27 14:32:25 -080078 mutable_kparams->ins = oldIns;
79 mutable_kparams->inEStrides = oldStrides;
80 mutable_kparams->usr = &closures;
Yang Ni1ffd86b2015-01-07 09:16:40 -080081}
82
Yang Nida0f0692015-01-12 13:03:40 -080083} // namespace
84
85Batch::~Batch() {
Yang Nieb9aa672015-01-27 14:32:25 -080086 for (CPUClosure* c : mClosures) {
87 delete c;
88 }
89 if (mScriptObj) {
90 dlclose(mScriptObj);
91 }
Yang Nida0f0692015-01-12 13:03:40 -080092}
93
94bool Batch::conflict(CPUClosure* closure) const {
Yang Nieb9aa672015-01-27 14:32:25 -080095 if (mClosures.empty()) {
96 return false;
97 }
Yang Nida0f0692015-01-12 13:03:40 -080098
Yang Nieb9aa672015-01-27 14:32:25 -080099 if (closure->mClosure->mKernelID.get() == nullptr ||
100 mClosures.front()->mClosure->mKernelID.get() == nullptr) {
101 // An invoke should be in a batch by itself, so it conflicts with any other
102 // closure.
Yang Ni1ffd86b2015-01-07 09:16:40 -0800103 return true;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800104 }
Yang Nieb9aa672015-01-27 14:32:25 -0800105
106 for (const auto &p : closure->mClosure->mGlobalDeps) {
107 const Closure* dep = p.first;
108 for (CPUClosure* c : mClosures) {
109 if (c->mClosure == dep) {
110 ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global",
111 closure, dep);
112 return true;
113 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800114 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800115 }
Yang Nieb9aa672015-01-27 14:32:25 -0800116 for (const auto &p : closure->mClosure->mArgDeps) {
117 const Closure* dep = p.first;
118 for (CPUClosure* c : mClosures) {
119 if (c->mClosure == dep) {
120 for (const auto &p1 : *p.second) {
121 if (p1.second->get() != nullptr) {
122 ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg",
123 closure, dep);
124 return true;
125 }
126 }
127 }
128 }
129 }
130 return false;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800131}
132
Yang Ni1ffd86b2015-01-07 09:16:40 -0800133CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
134 const ScriptGroupBase *sg) :
135 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
Yang Nieb9aa672015-01-27 14:32:25 -0800136 rsAssert(!mGroup->mClosures.empty());
Yang Ni1ffd86b2015-01-07 09:16:40 -0800137
Yang Nieb9aa672015-01-27 14:32:25 -0800138 Batch* batch = new Batch(this);
139 for (Closure* closure: mGroup->mClosures) {
140 const ScriptKernelID* kernelID = closure->mKernelID.get();
141 RsdCpuScriptImpl* si;
142 CPUClosure* cc;
143 if (kernelID != nullptr) {
144 si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
145 MTLaunchStruct mtls;
146 si->forEachKernelSetup(kernelID->mSlot, &mtls);
147 // TODO: Is mtls.fep.usrLen ever used?
148 cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
149 mtls.fep.usr, mtls.fep.usrLen);
150 } else {
151 si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(
152 closure->mInvokeID->mScript);
153 cc = new CPUClosure(closure, si);
154 }
155
156 if (batch->conflict(cc)) {
157 mBatches.push_back(batch);
158 batch = new Batch(this);
159 }
160
161 batch->mClosures.push_back(cc);
Yang Ni1ffd86b2015-01-07 09:16:40 -0800162 }
Yang Nida0f0692015-01-12 13:03:40 -0800163
Yang Nieb9aa672015-01-27 14:32:25 -0800164 rsAssert(!batch->mClosures.empty());
165 mBatches.push_back(batch);
Yang Nida0f0692015-01-12 13:03:40 -0800166
167#ifndef RS_COMPATIBILITY_LIB
Yang Nieb9aa672015-01-27 14:32:25 -0800168 for (Batch* batch : mBatches) {
169 batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str());
170 }
Yang Nida0f0692015-01-12 13:03:40 -0800171#endif
Yang Ni1ffd86b2015-01-07 09:16:40 -0800172}
173
174CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
Yang Nieb9aa672015-01-27 14:32:25 -0800175 for (Batch* batch : mBatches) {
176 delete batch;
177 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800178}
179
Yang Nida0f0692015-01-12 13:03:40 -0800180namespace {
181
182#ifndef RS_COMPATIBILITY_LIB
183
184string getFileName(string path) {
Yang Nieb9aa672015-01-27 14:32:25 -0800185 unsigned found = path.find_last_of("/\\");
186 return path.substr(found + 1);
Yang Nida0f0692015-01-12 13:03:40 -0800187}
188
189void setupCompileArguments(
Yang Nieb9aa672015-01-27 14:32:25 -0800190 const vector<string>& inputs, const vector<int>& kernels,
191 const string& output_dir, const string& output_filename,
192 const string& rsLib, vector<const char*>* args) {
193 args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
194 args->push_back("-fPIC");
195 args->push_back("-embedRSInfo");
196 args->push_back("-mtriple");
197 args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
198 args->push_back("-bclib");
199 args->push_back(rsLib.c_str());
200 for (const string& input : inputs) {
201 args->push_back(input.c_str());
202 }
203 for (int kernel : kernels) {
204 args->push_back("-k");
205 string strKernel = std::to_string(kernel);
206 args->push_back(strKernel.c_str());
207 }
208 args->push_back("-output_path");
209 args->push_back(output_dir.c_str());
210 args->push_back("-o");
211 args->push_back(output_filename.c_str());
212 args->push_back(nullptr);
Yang Nida0f0692015-01-12 13:03:40 -0800213}
214
215string convertListToString(int n, const char* const* strs) {
Yang Nieb9aa672015-01-27 14:32:25 -0800216 string ret;
217 ret.append(strs[0]);
218 for (int i = 1; i < n; i++) {
219 ret.append(" ");
220 ret.append(strs[i]);
221 }
222 return ret;
Yang Nida0f0692015-01-12 13:03:40 -0800223}
224
225bool fuseAndCompile(const char** arguments,
226 const string& commandLine) {
Yang Nieb9aa672015-01-27 14:32:25 -0800227 const pid_t pid = fork();
Yang Nida0f0692015-01-12 13:03:40 -0800228
Yang Nieb9aa672015-01-27 14:32:25 -0800229 if (pid == -1) {
230 ALOGE("Couldn't fork for bcc execution");
231 return false;
232 }
Yang Nida0f0692015-01-12 13:03:40 -0800233
Yang Nieb9aa672015-01-27 14:32:25 -0800234 if (pid == 0) {
235 // Child process
236 ALOGV("Invoking BCC with: %s", commandLine.c_str());
237 execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments);
Yang Nida0f0692015-01-12 13:03:40 -0800238
Yang Nieb9aa672015-01-27 14:32:25 -0800239 ALOGE("execv() failed: %s", strerror(errno));
240 abort();
241 return false;
242 }
Yang Nida0f0692015-01-12 13:03:40 -0800243
Yang Nieb9aa672015-01-27 14:32:25 -0800244 // Parent process
245 int status = 0;
246 const pid_t w = waitpid(pid, &status, 0);
247 if (w == -1) {
248 return false;
249 }
Yang Nida0f0692015-01-12 13:03:40 -0800250
Yang Nieb9aa672015-01-27 14:32:25 -0800251 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) {
252 ALOGE("bcc terminated unexpectedly");
253 return false;
254 }
Yang Nida0f0692015-01-12 13:03:40 -0800255
Yang Nieb9aa672015-01-27 14:32:25 -0800256 return true;
Yang Nida0f0692015-01-12 13:03:40 -0800257}
258#endif
259
260} // anonymous namespace
261
262void Batch::tryToCreateFusedKernel(const char *cacheDir) {
263#ifndef RS_COMPATIBILITY_LIB
Yang Nieb9aa672015-01-27 14:32:25 -0800264 if (mClosures.size() < 2) {
265 return;
Yang Nida0f0692015-01-12 13:03:40 -0800266 }
267
Yang Nieb9aa672015-01-27 14:32:25 -0800268 //===--------------------------------------------------------------------===//
269 // Fuse the input kernels and generate native code in an object file
270 //===--------------------------------------------------------------------===//
Yang Nida0f0692015-01-12 13:03:40 -0800271
Yang Nieb9aa672015-01-27 14:32:25 -0800272 std::vector<string> inputFiles;
273 std::vector<int> slots;
Yang Nida0f0692015-01-12 13:03:40 -0800274
Yang Nieb9aa672015-01-27 14:32:25 -0800275 for (CPUClosure* cpuClosure : mClosures) {
276 const Closure* closure = cpuClosure->mClosure;
277 const ScriptKernelID* kernelID = closure->mKernelID.get();
278 const Script* script = kernelID->mScript;
Yang Nida0f0692015-01-12 13:03:40 -0800279
Yang Nieb9aa672015-01-27 14:32:25 -0800280 if (script->isIntrinsic()) {
281 return;
282 }
Yang Nida0f0692015-01-12 13:03:40 -0800283
Yang Nieb9aa672015-01-27 14:32:25 -0800284 const RsdCpuScriptImpl *cpuScript =
285 (const RsdCpuScriptImpl*)script->mHal.drv;
Yang Nida0f0692015-01-12 13:03:40 -0800286
Yang Nieb9aa672015-01-27 14:32:25 -0800287 const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
Yang Nida0f0692015-01-12 13:03:40 -0800288
Yang Nieb9aa672015-01-27 14:32:25 -0800289 inputFiles.push_back(bitcodeFilename);
290 slots.push_back(kernelID->mSlot);
291 }
Yang Nida0f0692015-01-12 13:03:40 -0800292
Yang Nieb9aa672015-01-27 14:32:25 -0800293 string outputPath(tempnam(cacheDir, "fused"));
294 string outputFileName = getFileName(outputPath);
295 string objFilePath(outputPath);
296 objFilePath.append(".o");
297 string rsLibPath(SYSLIBPATH"/libclcore.bc");
298 vector<const char*> arguments;
299 setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath,
300 &arguments);
301 string commandLine =
302 convertListToString(arguments.size() - 1, arguments.data());
Yang Nida0f0692015-01-12 13:03:40 -0800303
Yang Nieb9aa672015-01-27 14:32:25 -0800304 if (!fuseAndCompile(arguments.data(), commandLine)) {
305 return;
306 }
Yang Nida0f0692015-01-12 13:03:40 -0800307
Yang Nieb9aa672015-01-27 14:32:25 -0800308 //===--------------------------------------------------------------------===//
309 // Create and load the shared lib
310 //===--------------------------------------------------------------------===//
311
312 const char* resName = outputFileName.c_str();
313
314 if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) {
315 ALOGE("Failed to link object file '%s'", resName);
316 return;
317 }
318
319 void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
320 if (mSharedObj == nullptr) {
321 ALOGE("Unable to load '%s'", resName);
322 return;
323 }
324
325 mExecutable = ScriptExecutable::createFromSharedObject(
326 nullptr, // RS context. Unused.
327 mSharedObj);
Yang Nida0f0692015-01-12 13:03:40 -0800328
329#endif // RS_COMPATIBILITY_LIB
330}
331
Yang Ni1ffd86b2015-01-07 09:16:40 -0800332void CpuScriptGroup2Impl::execute() {
Yang Nieb9aa672015-01-27 14:32:25 -0800333 for (auto batch : mBatches) {
334 batch->setGlobalsForBatch();
335 batch->run();
336 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800337}
338
Yang Nida0f0692015-01-12 13:03:40 -0800339void Batch::setGlobalsForBatch() {
Yang Nieb9aa672015-01-27 14:32:25 -0800340 for (CPUClosure* cpuClosure : mClosures) {
341 const Closure* closure = cpuClosure->mClosure;
342 const ScriptKernelID* kernelID = closure->mKernelID.get();
343 Script* s;
344 if (kernelID != nullptr) {
345 s = kernelID->mScript;
346 } else {
347 s = cpuClosure->mClosure->mInvokeID->mScript;
348 }
349 for (const auto& p : closure->mGlobals) {
350 const void* value = p.second.first;
351 int size = p.second.second;
352 if (value == nullptr && size == 0) {
353 // This indicates the current closure depends on another closure for a
354 // global in their shared module (script). In this case we don't need to
355 // copy the value. For example, an invoke intializes a global variable
356 // which a kernel later reads.
357 continue;
358 }
359 // We use -1 size to indicate an ObjectBase rather than a primitive type
360 if (size < 0) {
361 s->setVarObj(p.first->mSlot, (ObjectBase*)value);
362 } else {
363 s->setVar(p.first->mSlot, (const void*)&value, size);
364 }
365 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800366 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800367}
368
Yang Nida0f0692015-01-12 13:03:40 -0800369void Batch::run() {
Yang Nieb9aa672015-01-27 14:32:25 -0800370 if (mExecutable != nullptr) {
371 MTLaunchStruct mtls;
372 const CPUClosure* firstCpuClosure = mClosures.front();
373 const CPUClosure* lastCpuClosure = mClosures.back();
374
375 firstCpuClosure->mSi->forEachMtlsSetup(
376 (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
377 firstCpuClosure->mClosure->mArgs.size(),
378 lastCpuClosure->mClosure->mReturnValue,
379 nullptr, 0, nullptr, &mtls);
380
381 mtls.script = nullptr;
382 mtls.fep.usr = nullptr;
383 mtls.kernel = mExecutable->getForEachFunction(0);
384
385 mGroup->getCpuRefImpl()->launchThreads(
386 (const Allocation**)&firstCpuClosure->mClosure->mArgs[0],
387 firstCpuClosure->mClosure->mArgs.size(),
388 lastCpuClosure->mClosure->mReturnValue,
389 nullptr, &mtls);
390
391 return;
392 }
393
394 if (mClosures.size() == 1 &&
395 mClosures.front()->mClosure->mKernelID.get() == nullptr) {
396 // This closure is for an invoke function
397 CPUClosure* cc = mClosures.front();
398 const Closure* c = cc->mClosure;
399 const ScriptInvokeID* invokeID = c->mInvokeID;
400 rsAssert(invokeID != nullptr);
401 cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
402 return;
403 }
404
405 for (CPUClosure* cpuClosure : mClosures) {
406 const Closure* closure = cpuClosure->mClosure;
407 const ScriptKernelID* kernelID = closure->mKernelID.get();
408 cpuClosure->mSi->preLaunch(kernelID->mSlot,
409 (const Allocation**)&closure->mArgs[0],
410 closure->mArgs.size(), closure->mReturnValue,
411 cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
412 nullptr);
413 }
414
415 const CPUClosure* cpuClosure = mClosures.front();
416 const Closure* closure = cpuClosure->mClosure;
Yang Nida0f0692015-01-12 13:03:40 -0800417 MTLaunchStruct mtls;
Yang Nida0f0692015-01-12 13:03:40 -0800418
Yang Nieb9aa672015-01-27 14:32:25 -0800419 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0],
420 closure->mArgs.size(),
421 closure->mReturnValue,
422 nullptr, 0, nullptr, &mtls)) {
Yang Nida0f0692015-01-12 13:03:40 -0800423
Yang Nieb9aa672015-01-27 14:32:25 -0800424 mtls.script = nullptr;
425 mtls.kernel = (void (*)())&groupRoot;
426 mtls.fep.usr = &mClosures;
Yang Nida0f0692015-01-12 13:03:40 -0800427
Yang Nieb9aa672015-01-27 14:32:25 -0800428 mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
429 }
Yang Nida0f0692015-01-12 13:03:40 -0800430
Yang Nieb9aa672015-01-27 14:32:25 -0800431 for (CPUClosure* cpuClosure : mClosures) {
432 const Closure* closure = cpuClosure->mClosure;
433 const ScriptKernelID* kernelID = closure->mKernelID.get();
434 cpuClosure->mSi->postLaunch(kernelID->mSlot,
435 (const Allocation**)&closure->mArgs[0],
436 closure->mArgs.size(), closure->mReturnValue,
437 nullptr, 0, nullptr);
438 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800439}
440
441} // namespace renderscript
442} // namespace android