blob: d624e623611290de84df4ce8a4aefafed46977ed [file] [log] [blame]
Yang Ni1ffd86b2015-01-07 09:16:40 -08001#include "rsCpuScriptGroup2.h"
2
Yang Nida0f0692015-01-12 13:03:40 -08003#include <dlfcn.h>
Yabin Cui433558f2015-02-23 18:25:55 -08004#include <stdio.h>
5#include <stdlib.h>
6#include <unistd.h>
Yang Nida0f0692015-01-12 13:03:40 -08007
Yang Ni062c2872015-02-20 15:20:00 -08008#include <set>
9#include <sstream>
Yang Nida0f0692015-01-12 13:03:40 -080010#include <string>
11#include <vector>
12
13#ifndef RS_COMPATIBILITY_LIB
Jean-Luc Brouillet03fab682017-02-16 21:07:20 -080014#include "bcc/Config.h"
Yang Nida0f0692015-01-12 13:03:40 -080015#endif
16
Yang Ni1ffd86b2015-01-07 09:16:40 -080017#include "cpu_ref/rsCpuCore.h"
18#include "rsClosure.h"
19#include "rsContext.h"
20#include "rsCpuCore.h"
Yang Ni2abfcc62015-02-17 16:05:19 -080021#include "rsCpuExecutable.h"
Yang Ni1ffd86b2015-01-07 09:16:40 -080022#include "rsCpuScript.h"
23#include "rsScript.h"
24#include "rsScriptGroup2.h"
Yang Nida0f0692015-01-12 13:03:40 -080025#include "rsScriptIntrinsic.h"
26
27using std::string;
28using std::vector;
Yang Ni1ffd86b2015-01-07 09:16:40 -080029
30namespace android {
31namespace renderscript {
32
33namespace {
34
Yang Nida0f0692015-01-12 13:03:40 -080035const size_t DefaultKernelArgCount = 2;
Yang Ni1ffd86b2015-01-07 09:16:40 -080036
David Grossb0abb142015-03-12 15:23:03 -070037void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
Yang Ni1ffd86b2015-01-07 09:16:40 -080038 uint32_t xend, uint32_t outstep) {
David Grossb0abb142015-03-12 15:23:03 -070039 const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
40 RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
Yang Ni1ffd86b2015-01-07 09:16:40 -080041
David Grossb0abb142015-03-12 15:23:03 -070042 const size_t oldInLen = mutable_kinfo->inLen;
43
44 decltype(mutable_kinfo->inStride) oldInStride;
45 memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
Yang Ni1ffd86b2015-01-07 09:16:40 -080046
Yang Nieb9aa672015-01-27 14:32:25 -080047 for (CPUClosure* cpuClosure : closures) {
48 const Closure* closure = cpuClosure->mClosure;
Yang Ni1ffd86b2015-01-07 09:16:40 -080049
David Grossb0abb142015-03-12 15:23:03 -070050 // There had better be enough space in mutable_kinfo
51 rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
Yang Ni1ffd86b2015-01-07 09:16:40 -080052
Yang Niff2bb542015-02-02 14:33:47 -080053 for (size_t i = 0; i < closure->mNumArg; i++) {
54 const void* arg = closure->mArgs[i];
Yang Nieb9aa672015-01-27 14:32:25 -080055 const Allocation* a = (const Allocation*)arg;
56 const uint32_t eStride = a->mHal.state.elementSizeBytes;
57 const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
58 eStride * xstart;
David Grossb0abb142015-03-12 15:23:03 -070059 if (kinfo->dim.y > 1) {
60 ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
Yang Nieb9aa672015-01-27 14:32:25 -080061 }
David Grossb0abb142015-03-12 15:23:03 -070062 mutable_kinfo->inPtr[i] = ptr;
63 mutable_kinfo->inStride[i] = eStride;
Yang Nieb9aa672015-01-27 14:32:25 -080064 }
David Grossb0abb142015-03-12 15:23:03 -070065 mutable_kinfo->inLen = closure->mNumArg;
Yang Nieb9aa672015-01-27 14:32:25 -080066
67 const Allocation* out = closure->mReturnValue;
68 const uint32_t ostep = out->mHal.state.elementSizeBytes;
69 const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
70 ostep * xstart;
David Grossb0abb142015-03-12 15:23:03 -070071 if (kinfo->dim.y > 1) {
72 ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
Yang Nieb9aa672015-01-27 14:32:25 -080073 }
74
David Grossb0abb142015-03-12 15:23:03 -070075 mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
Yang Nieb9aa672015-01-27 14:32:25 -080076
Yang Ni7a106ad2016-03-10 16:06:36 -080077 // The implementation of an intrinsic relies on kinfo->usr being
78 // the "this" pointer to the intrinsic (an RsdCpuScriptIntrinsic object)
79 mutable_kinfo->usr = cpuClosure->mSi;
80
David Grossb0abb142015-03-12 15:23:03 -070081 cpuClosure->mFunc(kinfo, xstart, xend, ostep);
Yang Ni1ffd86b2015-01-07 09:16:40 -080082 }
83
David Grossb0abb142015-03-12 15:23:03 -070084 mutable_kinfo->inLen = oldInLen;
Yang Ni7a106ad2016-03-10 16:06:36 -080085 mutable_kinfo->usr = &closures;
David Grossb0abb142015-03-12 15:23:03 -070086 memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
Yang Ni1ffd86b2015-01-07 09:16:40 -080087}
88
Yang Nida0f0692015-01-12 13:03:40 -080089} // namespace
90
Yang Ni062c2872015-02-20 15:20:00 -080091Batch::Batch(CpuScriptGroup2Impl* group, const char* name) :
92 mGroup(group), mFunc(nullptr) {
93 mName = strndup(name, strlen(name));
94}
95
Yang Nida0f0692015-01-12 13:03:40 -080096Batch::~Batch() {
Yang Nieb9aa672015-01-27 14:32:25 -080097 for (CPUClosure* c : mClosures) {
98 delete c;
99 }
Yang Ni062c2872015-02-20 15:20:00 -0800100 free(mName);
Yang Nida0f0692015-01-12 13:03:40 -0800101}
102
Yang Niff2bb542015-02-02 14:33:47 -0800103bool Batch::conflict(CPUClosure* cpuClosure) const {
Yang Nieb9aa672015-01-27 14:32:25 -0800104 if (mClosures.empty()) {
105 return false;
106 }
Yang Nida0f0692015-01-12 13:03:40 -0800107
Yang Niff2bb542015-02-02 14:33:47 -0800108 const Closure* closure = cpuClosure->mClosure;
109
Yang Ni062c2872015-02-20 15:20:00 -0800110 if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) {
Yang Nieb9aa672015-01-27 14:32:25 -0800111 // An invoke should be in a batch by itself, so it conflicts with any other
112 // closure.
Yang Ni1ffd86b2015-01-07 09:16:40 -0800113 return true;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800114 }
Yang Nieb9aa672015-01-27 14:32:25 -0800115
Yang Niff2bb542015-02-02 14:33:47 -0800116 const auto& globalDeps = closure->mGlobalDeps;
117 const auto& argDeps = closure->mArgDeps;
118
119 for (CPUClosure* c : mClosures) {
120 const Closure* batched = c->mClosure;
121 if (globalDeps.find(batched) != globalDeps.end()) {
122 return true;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800123 }
Yang Niff2bb542015-02-02 14:33:47 -0800124 const auto& it = argDeps.find(batched);
125 if (it != argDeps.end()) {
126 const auto& args = (*it).second;
127 for (const auto &p1 : *args) {
Yang Nibd0af2d2015-03-23 17:14:58 -0700128 if (p1.second.get() != nullptr) {
Yang Niff2bb542015-02-02 14:33:47 -0800129 return true;
Yang Nieb9aa672015-01-27 14:32:25 -0800130 }
131 }
132 }
133 }
Yang Niff2bb542015-02-02 14:33:47 -0800134
Yang Ni1c206672015-06-18 15:57:56 -0700135 // The compiler fusion pass in bcc expects that kernels chained up through
136 // (1st) input and output.
137
138 const Closure* lastBatched = mClosures.back()->mClosure;
139 const auto& it = argDeps.find(lastBatched);
140
141 if (it == argDeps.end()) {
142 return true;
143 }
144
145 const auto& args = (*it).second;
146 for (const auto &p1 : *args) {
147 if (p1.first == 0 && p1.second.get() == nullptr) {
148 // The new closure depends on the last batched closure's return
149 // value (fieldId being nullptr) for its first argument (argument 0)
150 return false;
151 }
152 }
153
154 return true;
Yang Ni1ffd86b2015-01-07 09:16:40 -0800155}
156
Yang Ni1ffd86b2015-01-07 09:16:40 -0800157CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
158 const ScriptGroupBase *sg) :
Yang Ni062c2872015-02-20 15:20:00 -0800159 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)),
160 mExecutable(nullptr), mScriptObj(nullptr) {
Yang Nieb9aa672015-01-27 14:32:25 -0800161 rsAssert(!mGroup->mClosures.empty());
Yang Ni1ffd86b2015-01-07 09:16:40 -0800162
Yang Ni1efae292015-06-27 15:45:18 -0700163 mCpuRefImpl->lockMutex();
Yang Ni062c2872015-02-20 15:20:00 -0800164 Batch* batch = new Batch(this, "Batch0");
165 int i = 0;
Yang Nieb9aa672015-01-27 14:32:25 -0800166 for (Closure* closure: mGroup->mClosures) {
Yang Nieb9aa672015-01-27 14:32:25 -0800167 CPUClosure* cc;
Yang Ni062c2872015-02-20 15:20:00 -0800168 const IDBase* funcID = closure->mFunctionID.get();
169 RsdCpuScriptImpl* si =
170 (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript);
171 if (closure->mIsKernel) {
Matt Wala14ce0072015-07-30 17:30:25 -0700172 MTLaunchStructForEach mtls;
Yang Ni062c2872015-02-20 15:20:00 -0800173 si->forEachKernelSetup(funcID->mSlot, &mtls);
174 cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel);
Yang Nieb9aa672015-01-27 14:32:25 -0800175 } else {
Yang Nieb9aa672015-01-27 14:32:25 -0800176 cc = new CPUClosure(closure, si);
177 }
178
179 if (batch->conflict(cc)) {
180 mBatches.push_back(batch);
Yang Ni062c2872015-02-20 15:20:00 -0800181 std::stringstream ss;
182 ss << "Batch" << ++i;
Yang Ni578419f2016-06-27 16:12:25 -0700183 std::string batchStr(ss.str());
184 batch = new Batch(this, batchStr.c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800185 }
186
187 batch->mClosures.push_back(cc);
Yang Ni1ffd86b2015-01-07 09:16:40 -0800188 }
Yang Nida0f0692015-01-12 13:03:40 -0800189
Yang Nieb9aa672015-01-27 14:32:25 -0800190 rsAssert(!batch->mClosures.empty());
191 mBatches.push_back(batch);
Yang Nida0f0692015-01-12 13:03:40 -0800192
193#ifndef RS_COMPATIBILITY_LIB
Yang Ni062c2872015-02-20 15:20:00 -0800194 compile(mGroup->mCacheDir);
195 if (mScriptObj != nullptr && mExecutable != nullptr) {
196 for (Batch* batch : mBatches) {
197 batch->resolveFuncPtr(mScriptObj);
198 }
Yang Nieb9aa672015-01-27 14:32:25 -0800199 }
Yang Ni062c2872015-02-20 15:20:00 -0800200#endif // RS_COMPATIBILITY_LIB
Yang Ni1efae292015-06-27 15:45:18 -0700201 mCpuRefImpl->unlockMutex();
Yang Ni062c2872015-02-20 15:20:00 -0800202}
203
204void Batch::resolveFuncPtr(void* sharedObj) {
205 std::string funcName(mName);
206 if (mClosures.front()->mClosure->mIsKernel) {
207 funcName.append(".expand");
208 }
209 mFunc = dlsym(sharedObj, funcName.c_str());
210 rsAssert (mFunc != nullptr);
Yang Ni1ffd86b2015-01-07 09:16:40 -0800211}
212
213CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
Yang Nieb9aa672015-01-27 14:32:25 -0800214 for (Batch* batch : mBatches) {
215 delete batch;
216 }
Yang Nibd0af2d2015-03-23 17:14:58 -0700217 delete mExecutable;
Yang Ni062c2872015-02-20 15:20:00 -0800218 // TODO: move this dlclose into ~ScriptExecutable().
219 if (mScriptObj != nullptr) {
220 dlclose(mScriptObj);
221 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800222}
223
Yang Nida0f0692015-01-12 13:03:40 -0800224namespace {
225
226#ifndef RS_COMPATIBILITY_LIB
227
Yang Niedf4ea32015-03-11 09:07:15 -0700228string getCoreLibPath(Context* context, string* coreLibRelaxedPath) {
229 *coreLibRelaxedPath = "";
230
231 // If we're debugging, use the debug library.
232 if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
Victor Khimenkocfb1d0b2016-10-28 17:05:22 +0200233 return SYSLIBPATH_BC"/libclcore_debug.bc";
Yang Niedf4ea32015-03-11 09:07:15 -0700234 }
235
236 // Check for a platform specific library
237
238#if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
239 // NEON-capable ARMv7a devices can use an accelerated math library
240 // for all reduced precision scripts.
241 // ARMv8 does not use NEON, as ASIMD can be used with all precision
242 // levels.
Victor Khimenkocfb1d0b2016-10-28 17:05:22 +0200243 *coreLibRelaxedPath = SYSLIBPATH_BC"/libclcore_neon.bc";
Yang Niedf4ea32015-03-11 09:07:15 -0700244#endif
245
246#if defined(__i386__) || defined(__x86_64__)
247 // x86 devices will use an optimized library.
Victor Khimenkocfb1d0b2016-10-28 17:05:22 +0200248 return SYSLIBPATH_BC"/libclcore_x86.bc";
Yang Niedf4ea32015-03-11 09:07:15 -0700249#else
Victor Khimenkocfb1d0b2016-10-28 17:05:22 +0200250 return SYSLIBPATH_BC"/libclcore.bc";
Yang Niedf4ea32015-03-11 09:07:15 -0700251#endif
252}
253
Yang Nida0f0692015-01-12 13:03:40 -0800254void setupCompileArguments(
Yang Nicb170152015-04-16 10:27:02 -0700255 const vector<const char*>& inputs, const vector<string>& kernelBatches,
Yang Ni062c2872015-02-20 15:20:00 -0800256 const vector<string>& invokeBatches,
Yang Nicb170152015-04-16 10:27:02 -0700257 const char* outputDir, const char* outputFileName,
258 const char* coreLibPath, const char* coreLibRelaxedPath,
Yang Ni82376382015-05-13 14:51:10 -0700259 const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant,
verena beckhamf5029802015-05-22 16:51:42 +0100260 int optLevel, vector<const char*>* args) {
Yang Nieb9aa672015-01-27 14:32:25 -0800261 args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH);
262 args->push_back("-fPIC");
263 args->push_back("-embedRSInfo");
Yang Ni82376382015-05-13 14:51:10 -0700264 if (emitGlobalInfo) {
265 args->push_back("-rs-global-info");
266 if (emitGlobalInfoSkipConstant) {
267 args->push_back("-rs-global-info-skip-constant");
268 }
269 }
Yang Nieb9aa672015-01-27 14:32:25 -0800270 args->push_back("-mtriple");
271 args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
272 args->push_back("-bclib");
Yang Nicb170152015-04-16 10:27:02 -0700273 args->push_back(coreLibPath);
Yang Niedf4ea32015-03-11 09:07:15 -0700274 args->push_back("-bclib_relaxed");
Yang Nicb170152015-04-16 10:27:02 -0700275 args->push_back(coreLibRelaxedPath);
276 for (const char* input : inputs) {
277 args->push_back(input);
Yang Nieb9aa672015-01-27 14:32:25 -0800278 }
Yang Ni062c2872015-02-20 15:20:00 -0800279 for (const string& batch : kernelBatches) {
280 args->push_back("-merge");
281 args->push_back(batch.c_str());
282 }
283 for (const string& batch : invokeBatches) {
284 args->push_back("-invoke");
285 args->push_back(batch.c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800286 }
287 args->push_back("-output_path");
Yang Nicb170152015-04-16 10:27:02 -0700288 args->push_back(outputDir);
Yang Ni1efae292015-06-27 15:45:18 -0700289
verena beckhamf5029802015-05-22 16:51:42 +0100290 args->push_back("-O");
Yang Ni7a106ad2016-03-10 16:06:36 -0800291 switch (optLevel) {
292 case 0:
293 args->push_back("0");
294 break;
295 case 3:
296 args->push_back("3");
297 break;
298 default:
299 ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel);
300 args->push_back("3");
301 break;
302 }
verena beckhamf5029802015-05-22 16:51:42 +0100303
Yang Ni1efae292015-06-27 15:45:18 -0700304 // The output filename has to be the last, in case we need to pop it out and
305 // replace with a different name.
Yang Nieb9aa672015-01-27 14:32:25 -0800306 args->push_back("-o");
Yang Nicb170152015-04-16 10:27:02 -0700307 args->push_back(outputFileName);
Yang Nida0f0692015-01-12 13:03:40 -0800308}
309
Yang Nicbff7bc2015-05-26 16:47:30 -0700310void generateSourceSlot(RsdCpuReferenceImpl* ctxt,
311 const Closure& closure,
Yang Nicb170152015-04-16 10:27:02 -0700312 const std::vector<const char*>& inputs,
Yang Ni062c2872015-02-20 15:20:00 -0800313 std::stringstream& ss) {
314 const IDBase* funcID = (const IDBase*)closure.mFunctionID.get();
315 const Script* script = funcID->mScript;
316
317 rsAssert (!script->isIntrinsic());
318
319 const RsdCpuScriptImpl *cpuScript =
Yang Nicbff7bc2015-05-26 16:47:30 -0700320 (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
Yang Ni062c2872015-02-20 15:20:00 -0800321 const string& bitcodeFilename = cpuScript->getBitcodeFilePath();
322
323 const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) -
324 inputs.begin();
325
326 ss << index << "," << funcID->mSlot << ".";
327}
328
329#endif // RS_COMPATIBILTY_LIB
Yang Nida0f0692015-01-12 13:03:40 -0800330
331} // anonymous namespace
332
Luke Drummondb4b603d2017-04-17 11:47:06 -0700333// This function is used by the debugger to inspect ScriptGroup
334// compilations.
335//
336// "__attribute__((noinline))" and "__asm__" are used to prevent the
337// function call from being eliminated as a no-op (see the "noinline"
338// attribute in gcc documentation).
339//
340// "__attribute__((weak))" is used to prevent callers from recognizing
341// that this is guaranteed to be the function definition, recognizing
342// that certain arguments are unused, and optimizing away the passing
343// of those arguments (see the LLVM optimization
344// DeadArgumentElimination). Theoretically, the compiler could get
345// aggressive enough with link-time optimization that even marking the
346// entry point as a weak definition wouldn't solve the problem.
347//
348extern __attribute__((noinline)) __attribute__((weak))
Aidan Dodds1cea94d2016-10-04 11:26:23 +0100349void debugHintScriptGroup2(const char* groupName,
350 const uint32_t groupNameSize,
351 const ExpandFuncTy* kernel,
352 const uint32_t kernelCount) {
353 ALOGV("group name: %d:%s\n", groupNameSize, groupName);
354 for (uint32_t i=0; i < kernelCount; ++i) {
355 const char* f1 = (const char*)(kernel[i]);
Luke Drummondb4b603d2017-04-17 11:47:06 -0700356 __asm__ __volatile__("");
Aidan Dodds1cea94d2016-10-04 11:26:23 +0100357 ALOGV(" closure: %p\n", (const void*)f1);
358 }
359 // do nothing, this is just a hook point for the debugger.
360 return;
361}
362
Yang Ni062c2872015-02-20 15:20:00 -0800363void CpuScriptGroup2Impl::compile(const char* cacheDir) {
Yang Nida0f0692015-01-12 13:03:40 -0800364#ifndef RS_COMPATIBILITY_LIB
Yang Ni062c2872015-02-20 15:20:00 -0800365 if (mGroup->mClosures.size() < 2) {
Yang Nieb9aa672015-01-27 14:32:25 -0800366 return;
Yang Nida0f0692015-01-12 13:03:40 -0800367 }
368
Aidan Dodds1cea94d2016-10-04 11:26:23 +0100369 const int optLevel = getCpuRefImpl()->getContext()->getOptLevel();
370 if (optLevel == 0) {
371 std::vector<ExpandFuncTy> kernels;
372 for (const Batch* b : mBatches)
373 for (const CPUClosure* c : b->mClosures)
374 kernels.push_back(c->mFunc);
375
376 if (kernels.size()) {
377 // pass this information on to the debugger via a hint function.
378 debugHintScriptGroup2(mGroup->mName,
379 strlen(mGroup->mName),
380 kernels.data(),
381 kernels.size());
382 }
383
384 // skip script group compilation forcing the driver to use the fallback
385 // execution path which currently has better support for debugging.
386 return;
387 }
388
Yang Nicb170152015-04-16 10:27:02 -0700389 auto comparator = [](const char* str1, const char* str2) -> bool {
390 return strcmp(str1, str2) < 0;
391 };
392 std::set<const char*, decltype(comparator)> inputSet(comparator);
393
Yang Ni062c2872015-02-20 15:20:00 -0800394 for (Closure* closure : mGroup->mClosures) {
395 const Script* script = closure->mFunctionID.get()->mScript;
Yang Nida0f0692015-01-12 13:03:40 -0800396
Yang Ni062c2872015-02-20 15:20:00 -0800397 // If any script is an intrinsic, give up trying fusing the kernels.
Yang Nieb9aa672015-01-27 14:32:25 -0800398 if (script->isIntrinsic()) {
399 return;
400 }
Yang Nida0f0692015-01-12 13:03:40 -0800401
Yang Nieb9aa672015-01-27 14:32:25 -0800402 const RsdCpuScriptImpl *cpuScript =
Yang Nicbff7bc2015-05-26 16:47:30 -0700403 (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script);
404
Yang Nicb170152015-04-16 10:27:02 -0700405 const char* bitcodeFilename = cpuScript->getBitcodeFilePath();
Yang Ni062c2872015-02-20 15:20:00 -0800406 inputSet.insert(bitcodeFilename);
407 }
Yang Nida0f0692015-01-12 13:03:40 -0800408
Yang Nicb170152015-04-16 10:27:02 -0700409 std::vector<const char*> inputs(inputSet.begin(), inputSet.end());
Yang Ni062c2872015-02-20 15:20:00 -0800410
411 std::vector<string> kernelBatches;
412 std::vector<string> invokeBatches;
413
414 int i = 0;
415 for (const auto& batch : mBatches) {
416 rsAssert(batch->size() > 0);
417
418 std::stringstream ss;
419 ss << batch->mName << ":";
420
421 if (!batch->mClosures.front()->mClosure->mIsKernel) {
422 rsAssert(batch->size() == 1);
Yang Nicbff7bc2015-05-26 16:47:30 -0700423 generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss);
Yang Ni062c2872015-02-20 15:20:00 -0800424 invokeBatches.push_back(ss.str());
425 } else {
426 for (const auto& cpuClosure : batch->mClosures) {
Yang Nicbff7bc2015-05-26 16:47:30 -0700427 generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss);
Yang Ni062c2872015-02-20 15:20:00 -0800428 }
429 kernelBatches.push_back(ss.str());
430 }
Yang Nieb9aa672015-01-27 14:32:25 -0800431 }
Yang Nida0f0692015-01-12 13:03:40 -0800432
Yabin Cui433558f2015-02-23 18:25:55 -0800433 rsAssert(cacheDir != nullptr);
434 string objFilePath(cacheDir);
Yang Nif02a2b02015-04-07 16:00:31 -0700435 objFilePath.append("/");
436 objFilePath.append(mGroup->mName);
437 objFilePath.append(".o");
Yabin Cui433558f2015-02-23 18:25:55 -0800438
Yang Nicb170152015-04-16 10:27:02 -0700439 const char* resName = mGroup->mName;
Yang Niedf4ea32015-03-11 09:07:15 -0700440 string coreLibRelaxedPath;
441 const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(),
442 &coreLibRelaxedPath);
Yang Nif02a2b02015-04-07 16:00:31 -0700443
Yang Nieb9aa672015-01-27 14:32:25 -0800444 vector<const char*> arguments;
Yang Ni82376382015-05-13 14:51:10 -0700445 bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo();
446 bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant();
Yang Nicb170152015-04-16 10:27:02 -0700447 setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir,
448 resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(),
Yang Ni82376382015-05-13 14:51:10 -0700449 emitGlobalInfo, emitGlobalInfoSkipConstant,
verena beckhamf5029802015-05-22 16:51:42 +0100450 optLevel, &arguments);
Yang Nif02a2b02015-04-07 16:00:31 -0700451
452 std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1,
Yang Nicb170152015-04-16 10:27:02 -0700453 arguments.data()));
Yang Nif02a2b02015-04-07 16:00:31 -0700454
Yang Nicb170152015-04-16 10:27:02 -0700455 inputs.push_back(coreLibPath.c_str());
456 inputs.push_back(coreLibRelaxedPath.c_str());
457
458 uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(),
459 inputs.data(), inputs.size());
460
461 if (checksum == 0) {
Yang Nif02a2b02015-04-07 16:00:31 -0700462 return;
463 }
464
Yang Nicb170152015-04-16 10:27:02 -0700465 std::stringstream ss;
466 ss << std::hex << checksum;
Yang Ni578419f2016-06-27 16:12:25 -0700467 std::string checksumStr(ss.str());
Yang Nif02a2b02015-04-07 16:00:31 -0700468
469 //===--------------------------------------------------------------------===//
470 // Try to load a shared lib from code cache matching filename and checksum
471 //===--------------------------------------------------------------------===//
472
Yang Ni1efae292015-06-27 15:45:18 -0700473 bool alreadyLoaded = false;
474 std::string cloneName;
475
Yang Nia845c352017-05-01 15:53:23 -0700476 const bool useRSDebugContext =
477 (mCpuRefImpl->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG);
478 const bool reuse = !is_force_recompile() && !useRSDebugContext;
479 if (reuse) {
480 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr,
481 &alreadyLoaded);
482 }
Yang Nif02a2b02015-04-07 16:00:31 -0700483 if (mScriptObj != nullptr) {
Yang Ni1efae292015-06-27 15:45:18 -0700484 // A shared library named resName is found in code cache directory
485 // cacheDir, and loaded with the handle stored in mScriptObj.
486
Yang Nif02a2b02015-04-07 16:00:31 -0700487 mExecutable = ScriptExecutable::createFromSharedObject(
Yang Niade31372016-04-06 09:34:34 -0700488 mScriptObj, checksum);
Yang Ni1efae292015-06-27 15:45:18 -0700489
Yang Nif02a2b02015-04-07 16:00:31 -0700490 if (mExecutable != nullptr) {
Yang Ni1efae292015-06-27 15:45:18 -0700491 // The loaded shared library in mScriptObj has a matching checksum.
492 // An executable object has been created.
Yang Nicb170152015-04-16 10:27:02 -0700493 return;
Yang Nif02a2b02015-04-07 16:00:31 -0700494 }
Yang Ni1efae292015-06-27 15:45:18 -0700495
496 ALOGV("Failed to create an executable object from so file due to "
497 "mismatching checksum");
498
499 if (alreadyLoaded) {
500 // The shared object found in code cache has already been loaded.
501 // A different file name is needed for the new shared library, to
502 // avoid corrupting the currently loaded instance.
503
504 cloneName.append(resName);
505 cloneName.append("#");
Miao Wang82e135c2017-02-27 23:35:35 -0800506 cloneName.append(SharedLibraryUtils::getRandomString(6).c_str());
Yang Ni1efae292015-06-27 15:45:18 -0700507
508 // The last element in arguments is the output filename.
509 arguments.pop_back();
510 arguments.push_back(cloneName.c_str());
511 }
512
Yang Nif02a2b02015-04-07 16:00:31 -0700513 dlclose(mScriptObj);
514 mScriptObj = nullptr;
515 }
516
517 //===--------------------------------------------------------------------===//
518 // Fuse the input kernels and generate native code in an object file
519 //===--------------------------------------------------------------------===//
520
521 arguments.push_back("-build-checksum");
Yang Ni578419f2016-06-27 16:12:25 -0700522 arguments.push_back(checksumStr.c_str());
Yang Nif02a2b02015-04-07 16:00:31 -0700523 arguments.push_back(nullptr);
Yang Nida0f0692015-01-12 13:03:40 -0800524
Pirama Arumuga Nainar2fa8a232015-03-25 17:21:40 -0700525 bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH,
Yang Nif02a2b02015-04-07 16:00:31 -0700526 arguments.size()-1,
527 arguments.data());
Pirama Arumuga Nainar2fa8a232015-03-25 17:21:40 -0700528 if (!compiled) {
Yang Nieb9aa672015-01-27 14:32:25 -0800529 return;
530 }
Yang Nida0f0692015-01-12 13:03:40 -0800531
Yang Nieb9aa672015-01-27 14:32:25 -0800532 //===--------------------------------------------------------------------===//
533 // Create and load the shared lib
534 //===--------------------------------------------------------------------===//
535
Yang Nia845c352017-05-01 15:53:23 -0700536 std::string SOPath;
537
Stephen Hines4c368af2015-05-06 00:43:02 -0700538 if (!SharedLibraryUtils::createSharedLibrary(
Yang Nia845c352017-05-01 15:53:23 -0700539 getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName,
540 reuse, &SOPath)) {
Yang Nieb9aa672015-01-27 14:32:25 -0800541 ALOGE("Failed to link object file '%s'", resName);
Yang Ni8b942222015-04-02 17:48:28 -0700542 unlink(objFilePath.c_str());
Yang Nieb9aa672015-01-27 14:32:25 -0800543 return;
544 }
545
Yang Ni8b942222015-04-02 17:48:28 -0700546 unlink(objFilePath.c_str());
547
Yang Nia845c352017-05-01 15:53:23 -0700548 if (reuse) {
549 mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
550 } else {
551 mScriptObj = SharedLibraryUtils::loadAndDeleteSharedLibrary(SOPath.c_str());
552 }
Yang Ni062c2872015-02-20 15:20:00 -0800553 if (mScriptObj == nullptr) {
Yang Nieb9aa672015-01-27 14:32:25 -0800554 ALOGE("Unable to load '%s'", resName);
555 return;
556 }
557
Yang Ni1efae292015-06-27 15:45:18 -0700558 if (alreadyLoaded) {
559 // Delete the temporary, random-named file that we created to avoid
560 // interfering with an already loaded shared library.
561 string cloneFilePath(cacheDir);
562 cloneFilePath.append("/");
563 cloneFilePath.append(cloneName.c_str());
564 cloneFilePath.append(".so");
565 unlink(cloneFilePath.c_str());
566 }
567
Yang Niade31372016-04-06 09:34:34 -0700568 mExecutable = ScriptExecutable::createFromSharedObject(mScriptObj);
Yang Nida0f0692015-01-12 13:03:40 -0800569
570#endif // RS_COMPATIBILITY_LIB
571}
572
Yang Ni1ffd86b2015-01-07 09:16:40 -0800573void CpuScriptGroup2Impl::execute() {
Yang Nieb9aa672015-01-27 14:32:25 -0800574 for (auto batch : mBatches) {
575 batch->setGlobalsForBatch();
576 batch->run();
577 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800578}
579
Yang Nida0f0692015-01-12 13:03:40 -0800580void Batch::setGlobalsForBatch() {
Yang Nieb9aa672015-01-27 14:32:25 -0800581 for (CPUClosure* cpuClosure : mClosures) {
582 const Closure* closure = cpuClosure->mClosure;
Yang Ni062c2872015-02-20 15:20:00 -0800583 const IDBase* funcID = closure->mFunctionID.get();
584 Script* s = funcID->mScript;;
Yang Nieb9aa672015-01-27 14:32:25 -0800585 for (const auto& p : closure->mGlobals) {
Yang Nifef0cd42015-11-11 15:08:16 -0800586 const int64_t value = p.second.first;
Yang Nieb9aa672015-01-27 14:32:25 -0800587 int size = p.second.second;
Yang Nifef0cd42015-11-11 15:08:16 -0800588 if (value == 0 && size == 0) {
Yang Nieb9aa672015-01-27 14:32:25 -0800589 // This indicates the current closure depends on another closure for a
590 // global in their shared module (script). In this case we don't need to
591 // copy the value. For example, an invoke intializes a global variable
592 // which a kernel later reads.
593 continue;
594 }
Yang Niff2bb542015-02-02 14:33:47 -0800595 rsAssert(p.first != nullptr);
Yang Ni062c2872015-02-20 15:20:00 -0800596 Script* script = p.first->mScript;
Yang Ni7a106ad2016-03-10 16:06:36 -0800597 rsAssert(script == s);
Yang Nicbff7bc2015-05-26 16:47:30 -0700598 RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl();
Yang Ni062c2872015-02-20 15:20:00 -0800599 const RsdCpuScriptImpl *cpuScript =
Yang Nicbff7bc2015-05-26 16:47:30 -0700600 (const RsdCpuScriptImpl *)ctxt->lookupScript(script);
Yang Ni062c2872015-02-20 15:20:00 -0800601 int slot = p.first->mSlot;
602 ScriptExecutable* exec = mGroup->getExecutable();
603 if (exec != nullptr) {
604 const char* varName = cpuScript->getFieldName(slot);
605 void* addr = exec->getFieldAddress(varName);
606 if (size < 0) {
607 rsrSetObject(mGroup->getCpuRefImpl()->getContext(),
608 (rs_object_base*)addr, (ObjectBase*)value);
609 } else {
610 memcpy(addr, (const void*)&value, size);
611 }
Yang Nieb9aa672015-01-27 14:32:25 -0800612 } else {
Yang Ni062c2872015-02-20 15:20:00 -0800613 // We use -1 size to indicate an ObjectBase rather than a primitive type
614 if (size < 0) {
615 s->setVarObj(slot, (ObjectBase*)value);
616 } else {
617 s->setVar(slot, (const void*)&value, size);
618 }
Yang Nieb9aa672015-01-27 14:32:25 -0800619 }
620 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800621 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800622}
623
Yang Nida0f0692015-01-12 13:03:40 -0800624void Batch::run() {
Yang Ni062c2872015-02-20 15:20:00 -0800625 if (!mClosures.front()->mClosure->mIsKernel) {
626 rsAssert(mClosures.size() == 1);
627
628 // This batch contains a single closure for an invoke function
629 CPUClosure* cc = mClosures.front();
630 const Closure* c = cc->mClosure;
631
632 if (mFunc != nullptr) {
633 // TODO: Need align pointers for x86_64.
634 // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp
635 ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength);
636 } else {
637 const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get();
638 rsAssert(invokeID != nullptr);
639 cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength);
640 }
641
642 return;
643 }
644
645 if (mFunc != nullptr) {
Matt Wala14ce0072015-07-30 17:30:25 -0700646 MTLaunchStructForEach mtls;
Yang Nieb9aa672015-01-27 14:32:25 -0800647 const CPUClosure* firstCpuClosure = mClosures.front();
648 const CPUClosure* lastCpuClosure = mClosures.back();
649
650 firstCpuClosure->mSi->forEachMtlsSetup(
Yang Niff2bb542015-02-02 14:33:47 -0800651 (const Allocation**)firstCpuClosure->mClosure->mArgs,
652 firstCpuClosure->mClosure->mNumArg,
Yang Nieb9aa672015-01-27 14:32:25 -0800653 lastCpuClosure->mClosure->mReturnValue,
654 nullptr, 0, nullptr, &mtls);
655
656 mtls.script = nullptr;
657 mtls.fep.usr = nullptr;
Yang Ni062c2872015-02-20 15:20:00 -0800658 mtls.kernel = (ForEachFunc_t)mFunc;
Yang Nieb9aa672015-01-27 14:32:25 -0800659
Matt Wala14ce0072015-07-30 17:30:25 -0700660 mGroup->getCpuRefImpl()->launchForEach(
Yang Niff2bb542015-02-02 14:33:47 -0800661 (const Allocation**)firstCpuClosure->mClosure->mArgs,
662 firstCpuClosure->mClosure->mNumArg,
Yang Nieb9aa672015-01-27 14:32:25 -0800663 lastCpuClosure->mClosure->mReturnValue,
664 nullptr, &mtls);
665
666 return;
667 }
668
Yang Nieb9aa672015-01-27 14:32:25 -0800669 for (CPUClosure* cpuClosure : mClosures) {
670 const Closure* closure = cpuClosure->mClosure;
Yang Ni062c2872015-02-20 15:20:00 -0800671 const ScriptKernelID* kernelID =
672 (const ScriptKernelID*)closure->mFunctionID.get();
Yang Nieb9aa672015-01-27 14:32:25 -0800673 cpuClosure->mSi->preLaunch(kernelID->mSlot,
Yang Niff2bb542015-02-02 14:33:47 -0800674 (const Allocation**)closure->mArgs,
675 closure->mNumArg, closure->mReturnValue,
Yang Ni062c2872015-02-20 15:20:00 -0800676 nullptr, 0, nullptr);
Yang Nieb9aa672015-01-27 14:32:25 -0800677 }
678
679 const CPUClosure* cpuClosure = mClosures.front();
680 const Closure* closure = cpuClosure->mClosure;
Matt Wala14ce0072015-07-30 17:30:25 -0700681 MTLaunchStructForEach mtls;
Yang Nida0f0692015-01-12 13:03:40 -0800682
Yang Niff2bb542015-02-02 14:33:47 -0800683 if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs,
684 closure->mNumArg,
Yang Nieb9aa672015-01-27 14:32:25 -0800685 closure->mReturnValue,
686 nullptr, 0, nullptr, &mtls)) {
Yang Nida0f0692015-01-12 13:03:40 -0800687
Yang Nieb9aa672015-01-27 14:32:25 -0800688 mtls.script = nullptr;
Matt Wala14ce0072015-07-30 17:30:25 -0700689 mtls.kernel = &groupRoot;
Yang Nieb9aa672015-01-27 14:32:25 -0800690 mtls.fep.usr = &mClosures;
Yang Nida0f0692015-01-12 13:03:40 -0800691
Matt Wala14ce0072015-07-30 17:30:25 -0700692 mGroup->getCpuRefImpl()->launchForEach(nullptr, 0, nullptr, nullptr, &mtls);
Yang Nieb9aa672015-01-27 14:32:25 -0800693 }
Yang Nida0f0692015-01-12 13:03:40 -0800694
Yang Nieb9aa672015-01-27 14:32:25 -0800695 for (CPUClosure* cpuClosure : mClosures) {
696 const Closure* closure = cpuClosure->mClosure;
Yang Ni062c2872015-02-20 15:20:00 -0800697 const ScriptKernelID* kernelID =
698 (const ScriptKernelID*)closure->mFunctionID.get();
Yang Nieb9aa672015-01-27 14:32:25 -0800699 cpuClosure->mSi->postLaunch(kernelID->mSlot,
Yang Niff2bb542015-02-02 14:33:47 -0800700 (const Allocation**)closure->mArgs,
701 closure->mNumArg, closure->mReturnValue,
Yang Nieb9aa672015-01-27 14:32:25 -0800702 nullptr, 0, nullptr);
703 }
Yang Ni1ffd86b2015-01-07 09:16:40 -0800704}
705
706} // namespace renderscript
707} // namespace android