Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 1 | #include "rsCpuScriptGroup2.h" |
| 2 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 3 | #include <dlfcn.h> |
| 4 | |
| 5 | #include <string> |
| 6 | #include <vector> |
| 7 | |
| 8 | #ifndef RS_COMPATIBILITY_LIB |
| 9 | #include "bcc/Config/Config.h" |
| 10 | #include <sys/wait.h> |
| 11 | #endif |
| 12 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 13 | #include "cpu_ref/rsCpuCore.h" |
| 14 | #include "rsClosure.h" |
| 15 | #include "rsContext.h" |
| 16 | #include "rsCpuCore.h" |
| 17 | #include "rsCpuScript.h" |
| 18 | #include "rsScript.h" |
| 19 | #include "rsScriptGroup2.h" |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 20 | #include "rsScriptIntrinsic.h" |
| 21 | |
| 22 | using std::string; |
| 23 | using std::vector; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 24 | |
| 25 | namespace android { |
| 26 | namespace renderscript { |
| 27 | |
| 28 | namespace { |
| 29 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 30 | const size_t DefaultKernelArgCount = 2; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 31 | |
| 32 | void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, |
| 33 | uint32_t xend, uint32_t outstep) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 34 | const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr; |
| 35 | RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; |
| 36 | const void **oldIns = kparams->ins; |
| 37 | uint32_t *oldStrides = kparams->inEStrides; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 38 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 39 | std::vector<const void*> ins(DefaultKernelArgCount); |
| 40 | std::vector<uint32_t> strides(DefaultKernelArgCount); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 41 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 42 | for (CPUClosure* cpuClosure : closures) { |
| 43 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 44 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 45 | auto in_iter = ins.begin(); |
| 46 | auto stride_iter = strides.begin(); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 47 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 48 | for (const auto& arg : closure->mArgs) { |
| 49 | const Allocation* a = (const Allocation*)arg; |
| 50 | const uint32_t eStride = a->mHal.state.elementSizeBytes; |
| 51 | const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + |
| 52 | eStride * xstart; |
| 53 | if (kparams->dimY > 1) { |
| 54 | ptr += a->mHal.drvState.lod[0].stride * kparams->y; |
| 55 | } |
| 56 | *in_iter++ = ptr; |
| 57 | *stride_iter++ = eStride; |
| 58 | } |
| 59 | |
| 60 | mutable_kparams->ins = &ins[0]; |
| 61 | mutable_kparams->inEStrides = &strides[0]; |
| 62 | |
| 63 | const Allocation* out = closure->mReturnValue; |
| 64 | const uint32_t ostep = out->mHal.state.elementSizeBytes; |
| 65 | const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + |
| 66 | ostep * xstart; |
| 67 | if (kparams->dimY > 1) { |
| 68 | ptr += out->mHal.drvState.lod[0].stride * kparams->y; |
| 69 | } |
| 70 | |
| 71 | mutable_kparams->out = (void*)ptr; |
| 72 | |
| 73 | mutable_kparams->usr = cpuClosure->mUsrPtr; |
| 74 | |
| 75 | cpuClosure->mFunc(kparams, xstart, xend, ostep); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 76 | } |
| 77 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 78 | mutable_kparams->ins = oldIns; |
| 79 | mutable_kparams->inEStrides = oldStrides; |
| 80 | mutable_kparams->usr = &closures; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 81 | } |
| 82 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 83 | } // namespace |
| 84 | |
| 85 | Batch::~Batch() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 86 | for (CPUClosure* c : mClosures) { |
| 87 | delete c; |
| 88 | } |
| 89 | if (mScriptObj) { |
| 90 | dlclose(mScriptObj); |
| 91 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 92 | } |
| 93 | |
| 94 | bool Batch::conflict(CPUClosure* closure) const { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 95 | if (mClosures.empty()) { |
| 96 | return false; |
| 97 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 98 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 99 | if (closure->mClosure->mKernelID.get() == nullptr || |
| 100 | mClosures.front()->mClosure->mKernelID.get() == nullptr) { |
| 101 | // An invoke should be in a batch by itself, so it conflicts with any other |
| 102 | // closure. |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 103 | return true; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 104 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 105 | |
| 106 | for (const auto &p : closure->mClosure->mGlobalDeps) { |
| 107 | const Closure* dep = p.first; |
| 108 | for (CPUClosure* c : mClosures) { |
| 109 | if (c->mClosure == dep) { |
| 110 | ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its global", |
| 111 | closure, dep); |
| 112 | return true; |
| 113 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 114 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 115 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 116 | for (const auto &p : closure->mClosure->mArgDeps) { |
| 117 | const Closure* dep = p.first; |
| 118 | for (CPUClosure* c : mClosures) { |
| 119 | if (c->mClosure == dep) { |
| 120 | for (const auto &p1 : *p.second) { |
| 121 | if (p1.second->get() != nullptr) { |
| 122 | ALOGV("ScriptGroup2: closure %p conflicting with closure %p via its arg", |
| 123 | closure, dep); |
| 124 | return true; |
| 125 | } |
| 126 | } |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | return false; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 131 | } |
| 132 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 133 | CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, |
| 134 | const ScriptGroupBase *sg) : |
| 135 | mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 136 | rsAssert(!mGroup->mClosures.empty()); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 137 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 138 | Batch* batch = new Batch(this); |
| 139 | for (Closure* closure: mGroup->mClosures) { |
| 140 | const ScriptKernelID* kernelID = closure->mKernelID.get(); |
| 141 | RsdCpuScriptImpl* si; |
| 142 | CPUClosure* cc; |
| 143 | if (kernelID != nullptr) { |
| 144 | si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript); |
| 145 | MTLaunchStruct mtls; |
| 146 | si->forEachKernelSetup(kernelID->mSlot, &mtls); |
| 147 | // TODO: Is mtls.fep.usrLen ever used? |
| 148 | cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel, |
| 149 | mtls.fep.usr, mtls.fep.usrLen); |
| 150 | } else { |
| 151 | si = (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript( |
| 152 | closure->mInvokeID->mScript); |
| 153 | cc = new CPUClosure(closure, si); |
| 154 | } |
| 155 | |
| 156 | if (batch->conflict(cc)) { |
| 157 | mBatches.push_back(batch); |
| 158 | batch = new Batch(this); |
| 159 | } |
| 160 | |
| 161 | batch->mClosures.push_back(cc); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 162 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 163 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 164 | rsAssert(!batch->mClosures.empty()); |
| 165 | mBatches.push_back(batch); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 166 | |
| 167 | #ifndef RS_COMPATIBILITY_LIB |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 168 | for (Batch* batch : mBatches) { |
| 169 | batch->tryToCreateFusedKernel(mGroup->mCacheDir.c_str()); |
| 170 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 171 | #endif |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 172 | } |
| 173 | |
| 174 | CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 175 | for (Batch* batch : mBatches) { |
| 176 | delete batch; |
| 177 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 178 | } |
| 179 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 180 | namespace { |
| 181 | |
| 182 | #ifndef RS_COMPATIBILITY_LIB |
| 183 | |
| 184 | string getFileName(string path) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 185 | unsigned found = path.find_last_of("/\\"); |
| 186 | return path.substr(found + 1); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 187 | } |
| 188 | |
| 189 | void setupCompileArguments( |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 190 | const vector<string>& inputs, const vector<int>& kernels, |
| 191 | const string& output_dir, const string& output_filename, |
| 192 | const string& rsLib, vector<const char*>* args) { |
| 193 | args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); |
| 194 | args->push_back("-fPIC"); |
| 195 | args->push_back("-embedRSInfo"); |
| 196 | args->push_back("-mtriple"); |
| 197 | args->push_back(DEFAULT_TARGET_TRIPLE_STRING); |
| 198 | args->push_back("-bclib"); |
| 199 | args->push_back(rsLib.c_str()); |
| 200 | for (const string& input : inputs) { |
| 201 | args->push_back(input.c_str()); |
| 202 | } |
| 203 | for (int kernel : kernels) { |
| 204 | args->push_back("-k"); |
| 205 | string strKernel = std::to_string(kernel); |
| 206 | args->push_back(strKernel.c_str()); |
| 207 | } |
| 208 | args->push_back("-output_path"); |
| 209 | args->push_back(output_dir.c_str()); |
| 210 | args->push_back("-o"); |
| 211 | args->push_back(output_filename.c_str()); |
| 212 | args->push_back(nullptr); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 213 | } |
| 214 | |
| 215 | string convertListToString(int n, const char* const* strs) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 216 | string ret; |
| 217 | ret.append(strs[0]); |
| 218 | for (int i = 1; i < n; i++) { |
| 219 | ret.append(" "); |
| 220 | ret.append(strs[i]); |
| 221 | } |
| 222 | return ret; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 223 | } |
| 224 | |
| 225 | bool fuseAndCompile(const char** arguments, |
| 226 | const string& commandLine) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 227 | const pid_t pid = fork(); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 228 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 229 | if (pid == -1) { |
| 230 | ALOGE("Couldn't fork for bcc execution"); |
| 231 | return false; |
| 232 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 233 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 234 | if (pid == 0) { |
| 235 | // Child process |
| 236 | ALOGV("Invoking BCC with: %s", commandLine.c_str()); |
| 237 | execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 238 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 239 | ALOGE("execv() failed: %s", strerror(errno)); |
| 240 | abort(); |
| 241 | return false; |
| 242 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 243 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 244 | // Parent process |
| 245 | int status = 0; |
| 246 | const pid_t w = waitpid(pid, &status, 0); |
| 247 | if (w == -1) { |
| 248 | return false; |
| 249 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 250 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 251 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { |
| 252 | ALOGE("bcc terminated unexpectedly"); |
| 253 | return false; |
| 254 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 255 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 256 | return true; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 257 | } |
| 258 | #endif |
| 259 | |
| 260 | } // anonymous namespace |
| 261 | |
| 262 | void Batch::tryToCreateFusedKernel(const char *cacheDir) { |
| 263 | #ifndef RS_COMPATIBILITY_LIB |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 264 | if (mClosures.size() < 2) { |
| 265 | return; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 266 | } |
| 267 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 268 | //===--------------------------------------------------------------------===// |
| 269 | // Fuse the input kernels and generate native code in an object file |
| 270 | //===--------------------------------------------------------------------===// |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 271 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 272 | std::vector<string> inputFiles; |
| 273 | std::vector<int> slots; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 274 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 275 | for (CPUClosure* cpuClosure : mClosures) { |
| 276 | const Closure* closure = cpuClosure->mClosure; |
| 277 | const ScriptKernelID* kernelID = closure->mKernelID.get(); |
| 278 | const Script* script = kernelID->mScript; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 279 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 280 | if (script->isIntrinsic()) { |
| 281 | return; |
| 282 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 283 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 284 | const RsdCpuScriptImpl *cpuScript = |
| 285 | (const RsdCpuScriptImpl*)script->mHal.drv; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 286 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 287 | const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 288 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 289 | inputFiles.push_back(bitcodeFilename); |
| 290 | slots.push_back(kernelID->mSlot); |
| 291 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 292 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 293 | string outputPath(tempnam(cacheDir, "fused")); |
| 294 | string outputFileName = getFileName(outputPath); |
| 295 | string objFilePath(outputPath); |
| 296 | objFilePath.append(".o"); |
| 297 | string rsLibPath(SYSLIBPATH"/libclcore.bc"); |
| 298 | vector<const char*> arguments; |
| 299 | setupCompileArguments(inputFiles, slots, cacheDir, outputFileName, rsLibPath, |
| 300 | &arguments); |
| 301 | string commandLine = |
| 302 | convertListToString(arguments.size() - 1, arguments.data()); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 303 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 304 | if (!fuseAndCompile(arguments.data(), commandLine)) { |
| 305 | return; |
| 306 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 307 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 308 | //===--------------------------------------------------------------------===// |
| 309 | // Create and load the shared lib |
| 310 | //===--------------------------------------------------------------------===// |
| 311 | |
| 312 | const char* resName = outputFileName.c_str(); |
| 313 | |
| 314 | if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { |
| 315 | ALOGE("Failed to link object file '%s'", resName); |
| 316 | return; |
| 317 | } |
| 318 | |
| 319 | void* mSharedObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); |
| 320 | if (mSharedObj == nullptr) { |
| 321 | ALOGE("Unable to load '%s'", resName); |
| 322 | return; |
| 323 | } |
| 324 | |
| 325 | mExecutable = ScriptExecutable::createFromSharedObject( |
| 326 | nullptr, // RS context. Unused. |
| 327 | mSharedObj); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 328 | |
| 329 | #endif // RS_COMPATIBILITY_LIB |
| 330 | } |
| 331 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 332 | void CpuScriptGroup2Impl::execute() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 333 | for (auto batch : mBatches) { |
| 334 | batch->setGlobalsForBatch(); |
| 335 | batch->run(); |
| 336 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 337 | } |
| 338 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 339 | void Batch::setGlobalsForBatch() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 340 | for (CPUClosure* cpuClosure : mClosures) { |
| 341 | const Closure* closure = cpuClosure->mClosure; |
| 342 | const ScriptKernelID* kernelID = closure->mKernelID.get(); |
| 343 | Script* s; |
| 344 | if (kernelID != nullptr) { |
| 345 | s = kernelID->mScript; |
| 346 | } else { |
| 347 | s = cpuClosure->mClosure->mInvokeID->mScript; |
| 348 | } |
| 349 | for (const auto& p : closure->mGlobals) { |
| 350 | const void* value = p.second.first; |
| 351 | int size = p.second.second; |
| 352 | if (value == nullptr && size == 0) { |
| 353 | // This indicates the current closure depends on another closure for a |
| 354 | // global in their shared module (script). In this case we don't need to |
| 355 | // copy the value. For example, an invoke intializes a global variable |
| 356 | // which a kernel later reads. |
| 357 | continue; |
| 358 | } |
| 359 | // We use -1 size to indicate an ObjectBase rather than a primitive type |
| 360 | if (size < 0) { |
| 361 | s->setVarObj(p.first->mSlot, (ObjectBase*)value); |
| 362 | } else { |
| 363 | s->setVar(p.first->mSlot, (const void*)&value, size); |
| 364 | } |
| 365 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 366 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 367 | } |
| 368 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 369 | void Batch::run() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 370 | if (mExecutable != nullptr) { |
| 371 | MTLaunchStruct mtls; |
| 372 | const CPUClosure* firstCpuClosure = mClosures.front(); |
| 373 | const CPUClosure* lastCpuClosure = mClosures.back(); |
| 374 | |
| 375 | firstCpuClosure->mSi->forEachMtlsSetup( |
| 376 | (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], |
| 377 | firstCpuClosure->mClosure->mArgs.size(), |
| 378 | lastCpuClosure->mClosure->mReturnValue, |
| 379 | nullptr, 0, nullptr, &mtls); |
| 380 | |
| 381 | mtls.script = nullptr; |
| 382 | mtls.fep.usr = nullptr; |
| 383 | mtls.kernel = mExecutable->getForEachFunction(0); |
| 384 | |
| 385 | mGroup->getCpuRefImpl()->launchThreads( |
| 386 | (const Allocation**)&firstCpuClosure->mClosure->mArgs[0], |
| 387 | firstCpuClosure->mClosure->mArgs.size(), |
| 388 | lastCpuClosure->mClosure->mReturnValue, |
| 389 | nullptr, &mtls); |
| 390 | |
| 391 | return; |
| 392 | } |
| 393 | |
| 394 | if (mClosures.size() == 1 && |
| 395 | mClosures.front()->mClosure->mKernelID.get() == nullptr) { |
| 396 | // This closure is for an invoke function |
| 397 | CPUClosure* cc = mClosures.front(); |
| 398 | const Closure* c = cc->mClosure; |
| 399 | const ScriptInvokeID* invokeID = c->mInvokeID; |
| 400 | rsAssert(invokeID != nullptr); |
| 401 | cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); |
| 402 | return; |
| 403 | } |
| 404 | |
| 405 | for (CPUClosure* cpuClosure : mClosures) { |
| 406 | const Closure* closure = cpuClosure->mClosure; |
| 407 | const ScriptKernelID* kernelID = closure->mKernelID.get(); |
| 408 | cpuClosure->mSi->preLaunch(kernelID->mSlot, |
| 409 | (const Allocation**)&closure->mArgs[0], |
| 410 | closure->mArgs.size(), closure->mReturnValue, |
| 411 | cpuClosure->mUsrPtr, cpuClosure->mUsrSize, |
| 412 | nullptr); |
| 413 | } |
| 414 | |
| 415 | const CPUClosure* cpuClosure = mClosures.front(); |
| 416 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 417 | MTLaunchStruct mtls; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 418 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 419 | if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0], |
| 420 | closure->mArgs.size(), |
| 421 | closure->mReturnValue, |
| 422 | nullptr, 0, nullptr, &mtls)) { |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 423 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 424 | mtls.script = nullptr; |
| 425 | mtls.kernel = (void (*)())&groupRoot; |
| 426 | mtls.fep.usr = &mClosures; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 427 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 428 | mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); |
| 429 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 430 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame^] | 431 | for (CPUClosure* cpuClosure : mClosures) { |
| 432 | const Closure* closure = cpuClosure->mClosure; |
| 433 | const ScriptKernelID* kernelID = closure->mKernelID.get(); |
| 434 | cpuClosure->mSi->postLaunch(kernelID->mSlot, |
| 435 | (const Allocation**)&closure->mArgs[0], |
| 436 | closure->mArgs.size(), closure->mReturnValue, |
| 437 | nullptr, 0, nullptr); |
| 438 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 439 | } |
| 440 | |
| 441 | } // namespace renderscript |
| 442 | } // namespace android |