Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 1 | #include "rsCpuScriptGroup2.h" |
| 2 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 3 | #include <dlfcn.h> |
Yabin Cui | 433558f | 2015-02-23 18:25:55 -0800 | [diff] [blame] | 4 | #include <stdio.h> |
| 5 | #include <stdlib.h> |
| 6 | #include <unistd.h> |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 7 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 8 | #include <set> |
| 9 | #include <sstream> |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 10 | #include <string> |
| 11 | #include <vector> |
| 12 | |
| 13 | #ifndef RS_COMPATIBILITY_LIB |
| 14 | #include "bcc/Config/Config.h" |
| 15 | #include <sys/wait.h> |
| 16 | #endif |
| 17 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 18 | #include "cpu_ref/rsCpuCore.h" |
| 19 | #include "rsClosure.h" |
| 20 | #include "rsContext.h" |
| 21 | #include "rsCpuCore.h" |
Yang Ni | 2abfcc6 | 2015-02-17 16:05:19 -0800 | [diff] [blame] | 22 | #include "rsCpuExecutable.h" |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 23 | #include "rsCpuScript.h" |
| 24 | #include "rsScript.h" |
| 25 | #include "rsScriptGroup2.h" |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 26 | #include "rsScriptIntrinsic.h" |
| 27 | |
| 28 | using std::string; |
| 29 | using std::vector; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 30 | |
| 31 | namespace android { |
| 32 | namespace renderscript { |
| 33 | |
| 34 | namespace { |
| 35 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 36 | const size_t DefaultKernelArgCount = 2; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 37 | |
| 38 | void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart, |
| 39 | uint32_t xend, uint32_t outstep) { |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 40 | const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 41 | RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams; |
| 42 | const void **oldIns = kparams->ins; |
| 43 | uint32_t *oldStrides = kparams->inEStrides; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 44 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 45 | std::vector<const void*> ins(DefaultKernelArgCount); |
| 46 | std::vector<uint32_t> strides(DefaultKernelArgCount); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 47 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 48 | for (CPUClosure* cpuClosure : closures) { |
| 49 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 50 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 51 | auto in_iter = ins.begin(); |
| 52 | auto stride_iter = strides.begin(); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 53 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 54 | for (size_t i = 0; i < closure->mNumArg; i++) { |
| 55 | const void* arg = closure->mArgs[i]; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 56 | const Allocation* a = (const Allocation*)arg; |
| 57 | const uint32_t eStride = a->mHal.state.elementSizeBytes; |
| 58 | const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + |
| 59 | eStride * xstart; |
| 60 | if (kparams->dimY > 1) { |
| 61 | ptr += a->mHal.drvState.lod[0].stride * kparams->y; |
| 62 | } |
| 63 | *in_iter++ = ptr; |
| 64 | *stride_iter++ = eStride; |
| 65 | } |
| 66 | |
| 67 | mutable_kparams->ins = &ins[0]; |
| 68 | mutable_kparams->inEStrides = &strides[0]; |
| 69 | |
| 70 | const Allocation* out = closure->mReturnValue; |
| 71 | const uint32_t ostep = out->mHal.state.elementSizeBytes; |
| 72 | const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + |
| 73 | ostep * xstart; |
| 74 | if (kparams->dimY > 1) { |
| 75 | ptr += out->mHal.drvState.lod[0].stride * kparams->y; |
| 76 | } |
| 77 | |
| 78 | mutable_kparams->out = (void*)ptr; |
| 79 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 80 | cpuClosure->mFunc(kparams, xstart, xend, ostep); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 81 | } |
| 82 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 83 | mutable_kparams->ins = oldIns; |
| 84 | mutable_kparams->inEStrides = oldStrides; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 85 | } |
| 86 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 87 | } // namespace |
| 88 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 89 | Batch::Batch(CpuScriptGroup2Impl* group, const char* name) : |
| 90 | mGroup(group), mFunc(nullptr) { |
| 91 | mName = strndup(name, strlen(name)); |
| 92 | } |
| 93 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 94 | Batch::~Batch() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 95 | for (CPUClosure* c : mClosures) { |
| 96 | delete c; |
| 97 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 98 | free(mName); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 99 | } |
| 100 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 101 | bool Batch::conflict(CPUClosure* cpuClosure) const { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 102 | if (mClosures.empty()) { |
| 103 | return false; |
| 104 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 105 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 106 | const Closure* closure = cpuClosure->mClosure; |
| 107 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 108 | if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 109 | // An invoke should be in a batch by itself, so it conflicts with any other |
| 110 | // closure. |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 111 | return true; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 112 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 113 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 114 | const auto& globalDeps = closure->mGlobalDeps; |
| 115 | const auto& argDeps = closure->mArgDeps; |
| 116 | |
| 117 | for (CPUClosure* c : mClosures) { |
| 118 | const Closure* batched = c->mClosure; |
| 119 | if (globalDeps.find(batched) != globalDeps.end()) { |
| 120 | return true; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 121 | } |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 122 | const auto& it = argDeps.find(batched); |
| 123 | if (it != argDeps.end()) { |
| 124 | const auto& args = (*it).second; |
| 125 | for (const auto &p1 : *args) { |
| 126 | if (p1.second->get() != nullptr) { |
| 127 | return true; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 128 | } |
| 129 | } |
| 130 | } |
| 131 | } |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 132 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 133 | return false; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 134 | } |
| 135 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 136 | CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, |
| 137 | const ScriptGroupBase *sg) : |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 138 | mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), |
| 139 | mExecutable(nullptr), mScriptObj(nullptr) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 140 | rsAssert(!mGroup->mClosures.empty()); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 141 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 142 | Batch* batch = new Batch(this, "Batch0"); |
| 143 | int i = 0; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 144 | for (Closure* closure: mGroup->mClosures) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 145 | CPUClosure* cc; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 146 | const IDBase* funcID = closure->mFunctionID.get(); |
| 147 | RsdCpuScriptImpl* si = |
| 148 | (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); |
| 149 | if (closure->mIsKernel) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 150 | MTLaunchStruct mtls; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 151 | si->forEachKernelSetup(funcID->mSlot, &mtls); |
| 152 | cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 153 | } else { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 154 | cc = new CPUClosure(closure, si); |
| 155 | } |
| 156 | |
| 157 | if (batch->conflict(cc)) { |
| 158 | mBatches.push_back(batch); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 159 | std::stringstream ss; |
| 160 | ss << "Batch" << ++i; |
| 161 | batch = new Batch(this, ss.str().c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 162 | } |
| 163 | |
| 164 | batch->mClosures.push_back(cc); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 165 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 166 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 167 | rsAssert(!batch->mClosures.empty()); |
| 168 | mBatches.push_back(batch); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 169 | |
| 170 | #ifndef RS_COMPATIBILITY_LIB |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 171 | compile(mGroup->mCacheDir); |
| 172 | if (mScriptObj != nullptr && mExecutable != nullptr) { |
| 173 | for (Batch* batch : mBatches) { |
| 174 | batch->resolveFuncPtr(mScriptObj); |
| 175 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 176 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 177 | #endif // RS_COMPATIBILITY_LIB |
| 178 | } |
| 179 | |
| 180 | void Batch::resolveFuncPtr(void* sharedObj) { |
| 181 | std::string funcName(mName); |
| 182 | if (mClosures.front()->mClosure->mIsKernel) { |
| 183 | funcName.append(".expand"); |
| 184 | } |
| 185 | mFunc = dlsym(sharedObj, funcName.c_str()); |
| 186 | rsAssert (mFunc != nullptr); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 187 | } |
| 188 | |
| 189 | CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 190 | for (Batch* batch : mBatches) { |
| 191 | delete batch; |
| 192 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 193 | // TODO: move this dlclose into ~ScriptExecutable(). |
| 194 | if (mScriptObj != nullptr) { |
| 195 | dlclose(mScriptObj); |
| 196 | } |
| 197 | delete mExecutable; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 198 | } |
| 199 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 200 | namespace { |
| 201 | |
| 202 | #ifndef RS_COMPATIBILITY_LIB |
| 203 | |
| 204 | string getFileName(string path) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 205 | unsigned found = path.find_last_of("/\\"); |
| 206 | return path.substr(found + 1); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 207 | } |
| 208 | |
| 209 | void setupCompileArguments( |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 210 | const vector<string>& inputs, const vector<string>& kernelBatches, |
| 211 | const vector<string>& invokeBatches, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 212 | const string& output_dir, const string& output_filename, |
| 213 | const string& rsLib, vector<const char*>* args) { |
| 214 | args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); |
| 215 | args->push_back("-fPIC"); |
| 216 | args->push_back("-embedRSInfo"); |
| 217 | args->push_back("-mtriple"); |
| 218 | args->push_back(DEFAULT_TARGET_TRIPLE_STRING); |
| 219 | args->push_back("-bclib"); |
| 220 | args->push_back(rsLib.c_str()); |
| 221 | for (const string& input : inputs) { |
| 222 | args->push_back(input.c_str()); |
| 223 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 224 | for (const string& batch : kernelBatches) { |
| 225 | args->push_back("-merge"); |
| 226 | args->push_back(batch.c_str()); |
| 227 | } |
| 228 | for (const string& batch : invokeBatches) { |
| 229 | args->push_back("-invoke"); |
| 230 | args->push_back(batch.c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 231 | } |
| 232 | args->push_back("-output_path"); |
| 233 | args->push_back(output_dir.c_str()); |
| 234 | args->push_back("-o"); |
| 235 | args->push_back(output_filename.c_str()); |
| 236 | args->push_back(nullptr); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 237 | } |
| 238 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 239 | bool fuseAndCompile(const char** arguments, |
| 240 | const string& commandLine) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 241 | const pid_t pid = fork(); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 242 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 243 | if (pid == -1) { |
| 244 | ALOGE("Couldn't fork for bcc execution"); |
| 245 | return false; |
| 246 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 247 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 248 | if (pid == 0) { |
| 249 | // Child process |
| 250 | ALOGV("Invoking BCC with: %s", commandLine.c_str()); |
| 251 | execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 252 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 253 | ALOGE("execv() failed: %s", strerror(errno)); |
| 254 | abort(); |
| 255 | return false; |
| 256 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 257 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 258 | // Parent process |
| 259 | int status = 0; |
| 260 | const pid_t w = waitpid(pid, &status, 0); |
| 261 | if (w == -1) { |
| 262 | return false; |
| 263 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 264 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 265 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { |
| 266 | ALOGE("bcc terminated unexpectedly"); |
| 267 | return false; |
| 268 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 269 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 270 | return true; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 271 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 272 | |
| 273 | void generateSourceSlot(const Closure& closure, |
| 274 | const std::vector<std::string>& inputs, |
| 275 | std::stringstream& ss) { |
| 276 | const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); |
| 277 | const Script* script = funcID->mScript; |
| 278 | |
| 279 | rsAssert (!script->isIntrinsic()); |
| 280 | |
| 281 | const RsdCpuScriptImpl *cpuScript = |
| 282 | (const RsdCpuScriptImpl*)script->mHal.drv; |
| 283 | const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); |
| 284 | |
| 285 | const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - |
| 286 | inputs.begin(); |
| 287 | |
| 288 | ss << index << "," << funcID->mSlot << "."; |
| 289 | } |
| 290 | |
| 291 | #endif // RS_COMPATIBILTY_LIB |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 292 | |
| 293 | } // anonymous namespace |
| 294 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 295 | void CpuScriptGroup2Impl::compile(const char* cacheDir) { |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 296 | #ifndef RS_COMPATIBILITY_LIB |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 297 | if (mGroup->mClosures.size() < 2) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 298 | return; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 299 | } |
| 300 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 301 | //===--------------------------------------------------------------------===// |
| 302 | // Fuse the input kernels and generate native code in an object file |
| 303 | //===--------------------------------------------------------------------===// |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 304 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 305 | std::set<string> inputSet; |
| 306 | for (Closure* closure : mGroup->mClosures) { |
| 307 | const Script* script = closure->mFunctionID.get()->mScript; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 308 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 309 | // If any script is an intrinsic, give up trying fusing the kernels. |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 310 | if (script->isIntrinsic()) { |
| 311 | return; |
| 312 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 313 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 314 | const RsdCpuScriptImpl *cpuScript = |
| 315 | (const RsdCpuScriptImpl*)script->mHal.drv; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 316 | const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 317 | inputSet.insert(bitcodeFilename); |
| 318 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 319 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 320 | std::vector<string> inputs(inputSet.begin(), inputSet.end()); |
| 321 | |
| 322 | std::vector<string> kernelBatches; |
| 323 | std::vector<string> invokeBatches; |
| 324 | |
| 325 | int i = 0; |
| 326 | for (const auto& batch : mBatches) { |
| 327 | rsAssert(batch->size() > 0); |
| 328 | |
| 329 | std::stringstream ss; |
| 330 | ss << batch->mName << ":"; |
| 331 | |
| 332 | if (!batch->mClosures.front()->mClosure->mIsKernel) { |
| 333 | rsAssert(batch->size() == 1); |
| 334 | generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss); |
| 335 | invokeBatches.push_back(ss.str()); |
| 336 | } else { |
| 337 | for (const auto& cpuClosure : batch->mClosures) { |
| 338 | generateSourceSlot(*cpuClosure->mClosure, inputs, ss); |
| 339 | } |
| 340 | kernelBatches.push_back(ss.str()); |
| 341 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 342 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 343 | |
Yabin Cui | 433558f | 2015-02-23 18:25:55 -0800 | [diff] [blame] | 344 | rsAssert(cacheDir != nullptr); |
| 345 | string objFilePath(cacheDir); |
| 346 | objFilePath.append("/fusedXXXXXX.o"); |
| 347 | // Find unique object file name, to make following file names unique. |
| 348 | int tempfd = mkstemps(&objFilePath[0], 2); |
| 349 | if (tempfd == -1) { |
| 350 | return; |
| 351 | } |
| 352 | TEMP_FAILURE_RETRY(close(tempfd)); |
| 353 | |
| 354 | string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2)); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 355 | string rsLibPath(SYSLIBPATH"/libclcore.bc"); |
| 356 | vector<const char*> arguments; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 357 | setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, |
| 358 | outputFileName, rsLibPath, &arguments); |
Yang Ni | 2abfcc6 | 2015-02-17 16:05:19 -0800 | [diff] [blame] | 359 | std::unique_ptr<const char> joined( |
| 360 | rsuJoinStrings(arguments.size() - 1, arguments.data())); |
| 361 | string commandLine (joined.get()); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 362 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 363 | if (!fuseAndCompile(arguments.data(), commandLine)) { |
Yabin Cui | 433558f | 2015-02-23 18:25:55 -0800 | [diff] [blame] | 364 | unlink(objFilePath.c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 365 | return; |
| 366 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 367 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 368 | //===--------------------------------------------------------------------===// |
| 369 | // Create and load the shared lib |
| 370 | //===--------------------------------------------------------------------===// |
| 371 | |
| 372 | const char* resName = outputFileName.c_str(); |
| 373 | |
| 374 | if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { |
| 375 | ALOGE("Failed to link object file '%s'", resName); |
| 376 | return; |
| 377 | } |
| 378 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 379 | mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); |
| 380 | if (mScriptObj == nullptr) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 381 | ALOGE("Unable to load '%s'", resName); |
| 382 | return; |
| 383 | } |
| 384 | |
| 385 | mExecutable = ScriptExecutable::createFromSharedObject( |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 386 | nullptr, // RS context. Unused. |
| 387 | mScriptObj); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 388 | |
| 389 | #endif // RS_COMPATIBILITY_LIB |
| 390 | } |
| 391 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 392 | void CpuScriptGroup2Impl::execute() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 393 | for (auto batch : mBatches) { |
| 394 | batch->setGlobalsForBatch(); |
| 395 | batch->run(); |
| 396 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 397 | } |
| 398 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 399 | void Batch::setGlobalsForBatch() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 400 | for (CPUClosure* cpuClosure : mClosures) { |
| 401 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 402 | const IDBase* funcID = closure->mFunctionID.get(); |
| 403 | Script* s = funcID->mScript;; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 404 | for (const auto& p : closure->mGlobals) { |
| 405 | const void* value = p.second.first; |
| 406 | int size = p.second.second; |
| 407 | if (value == nullptr && size == 0) { |
| 408 | // This indicates the current closure depends on another closure for a |
| 409 | // global in their shared module (script). In this case we don't need to |
| 410 | // copy the value. For example, an invoke intializes a global variable |
| 411 | // which a kernel later reads. |
| 412 | continue; |
| 413 | } |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 414 | rsAssert(p.first != nullptr); |
| 415 | ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)", |
| 416 | closure, p.first, p.first->mScript, p.first->mSlot); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 417 | Script* script = p.first->mScript; |
| 418 | const RsdCpuScriptImpl *cpuScript = |
| 419 | (const RsdCpuScriptImpl*)script->mHal.drv; |
| 420 | int slot = p.first->mSlot; |
| 421 | ScriptExecutable* exec = mGroup->getExecutable(); |
| 422 | if (exec != nullptr) { |
| 423 | const char* varName = cpuScript->getFieldName(slot); |
| 424 | void* addr = exec->getFieldAddress(varName); |
| 425 | if (size < 0) { |
| 426 | rsrSetObject(mGroup->getCpuRefImpl()->getContext(), |
| 427 | (rs_object_base*)addr, (ObjectBase*)value); |
| 428 | } else { |
| 429 | memcpy(addr, (const void*)&value, size); |
| 430 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 431 | } else { |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 432 | // We use -1 size to indicate an ObjectBase rather than a primitive type |
| 433 | if (size < 0) { |
| 434 | s->setVarObj(slot, (ObjectBase*)value); |
| 435 | } else { |
| 436 | s->setVar(slot, (const void*)&value, size); |
| 437 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 438 | } |
| 439 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 440 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 441 | } |
| 442 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 443 | void Batch::run() { |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 444 | if (!mClosures.front()->mClosure->mIsKernel) { |
| 445 | rsAssert(mClosures.size() == 1); |
| 446 | |
| 447 | // This batch contains a single closure for an invoke function |
| 448 | CPUClosure* cc = mClosures.front(); |
| 449 | const Closure* c = cc->mClosure; |
| 450 | |
| 451 | if (mFunc != nullptr) { |
| 452 | // TODO: Need align pointers for x86_64. |
| 453 | // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp |
| 454 | ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); |
| 455 | } else { |
| 456 | const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); |
| 457 | rsAssert(invokeID != nullptr); |
| 458 | cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); |
| 459 | } |
| 460 | |
| 461 | return; |
| 462 | } |
| 463 | |
| 464 | if (mFunc != nullptr) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 465 | MTLaunchStruct mtls; |
| 466 | const CPUClosure* firstCpuClosure = mClosures.front(); |
| 467 | const CPUClosure* lastCpuClosure = mClosures.back(); |
| 468 | |
| 469 | firstCpuClosure->mSi->forEachMtlsSetup( |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 470 | (const Allocation**)firstCpuClosure->mClosure->mArgs, |
| 471 | firstCpuClosure->mClosure->mNumArg, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 472 | lastCpuClosure->mClosure->mReturnValue, |
| 473 | nullptr, 0, nullptr, &mtls); |
| 474 | |
| 475 | mtls.script = nullptr; |
| 476 | mtls.fep.usr = nullptr; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 477 | mtls.kernel = (ForEachFunc_t)mFunc; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 478 | |
| 479 | mGroup->getCpuRefImpl()->launchThreads( |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 480 | (const Allocation**)firstCpuClosure->mClosure->mArgs, |
| 481 | firstCpuClosure->mClosure->mNumArg, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 482 | lastCpuClosure->mClosure->mReturnValue, |
| 483 | nullptr, &mtls); |
| 484 | |
| 485 | return; |
| 486 | } |
| 487 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 488 | for (CPUClosure* cpuClosure : mClosures) { |
| 489 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 490 | const ScriptKernelID* kernelID = |
| 491 | (const ScriptKernelID*)closure->mFunctionID.get(); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 492 | cpuClosure->mSi->preLaunch(kernelID->mSlot, |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 493 | (const Allocation**)closure->mArgs, |
| 494 | closure->mNumArg, closure->mReturnValue, |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 495 | nullptr, 0, nullptr); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 496 | } |
| 497 | |
| 498 | const CPUClosure* cpuClosure = mClosures.front(); |
| 499 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 500 | MTLaunchStruct mtls; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 501 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 502 | if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, |
| 503 | closure->mNumArg, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 504 | closure->mReturnValue, |
| 505 | nullptr, 0, nullptr, &mtls)) { |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 506 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 507 | mtls.script = nullptr; |
| 508 | mtls.kernel = (void (*)())&groupRoot; |
| 509 | mtls.fep.usr = &mClosures; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 510 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 511 | mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); |
| 512 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 513 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 514 | for (CPUClosure* cpuClosure : mClosures) { |
| 515 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame^] | 516 | const ScriptKernelID* kernelID = |
| 517 | (const ScriptKernelID*)closure->mFunctionID.get(); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 518 | cpuClosure->mSi->postLaunch(kernelID->mSlot, |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 519 | (const Allocation**)closure->mArgs, |
| 520 | closure->mNumArg, closure->mReturnValue, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 521 | nullptr, 0, nullptr); |
| 522 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 523 | } |
| 524 | |
| 525 | } // namespace renderscript |
| 526 | } // namespace android |