Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 1 | #include "rsCpuScriptGroup2.h" |
| 2 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 3 | #include <dlfcn.h> |
Yabin Cui | 433558f | 2015-02-23 18:25:55 -0800 | [diff] [blame] | 4 | #include <stdio.h> |
| 5 | #include <stdlib.h> |
| 6 | #include <unistd.h> |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 7 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 8 | #include <set> |
| 9 | #include <sstream> |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 10 | #include <string> |
| 11 | #include <vector> |
| 12 | |
| 13 | #ifndef RS_COMPATIBILITY_LIB |
| 14 | #include "bcc/Config/Config.h" |
| 15 | #include <sys/wait.h> |
| 16 | #endif |
| 17 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 18 | #include "cpu_ref/rsCpuCore.h" |
| 19 | #include "rsClosure.h" |
| 20 | #include "rsContext.h" |
| 21 | #include "rsCpuCore.h" |
Yang Ni | 2abfcc6 | 2015-02-17 16:05:19 -0800 | [diff] [blame] | 22 | #include "rsCpuExecutable.h" |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 23 | #include "rsCpuScript.h" |
| 24 | #include "rsScript.h" |
| 25 | #include "rsScriptGroup2.h" |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 26 | #include "rsScriptIntrinsic.h" |
| 27 | |
| 28 | using std::string; |
| 29 | using std::vector; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 30 | |
| 31 | namespace android { |
| 32 | namespace renderscript { |
| 33 | |
| 34 | namespace { |
| 35 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 36 | const size_t DefaultKernelArgCount = 2; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 37 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 38 | void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 39 | uint32_t xend, uint32_t outstep) { |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 40 | const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; |
| 41 | RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 42 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 43 | const size_t oldInLen = mutable_kinfo->inLen; |
| 44 | |
| 45 | decltype(mutable_kinfo->inStride) oldInStride; |
| 46 | memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 47 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 48 | for (CPUClosure* cpuClosure : closures) { |
| 49 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 50 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 51 | // There had better be enough space in mutable_kinfo |
| 52 | rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 53 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 54 | for (size_t i = 0; i < closure->mNumArg; i++) { |
| 55 | const void* arg = closure->mArgs[i]; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 56 | const Allocation* a = (const Allocation*)arg; |
| 57 | const uint32_t eStride = a->mHal.state.elementSizeBytes; |
| 58 | const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + |
| 59 | eStride * xstart; |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 60 | if (kinfo->dim.y > 1) { |
| 61 | ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 62 | } |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 63 | mutable_kinfo->inPtr[i] = ptr; |
| 64 | mutable_kinfo->inStride[i] = eStride; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 65 | } |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 66 | mutable_kinfo->inLen = closure->mNumArg; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 67 | |
| 68 | const Allocation* out = closure->mReturnValue; |
| 69 | const uint32_t ostep = out->mHal.state.elementSizeBytes; |
| 70 | const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + |
| 71 | ostep * xstart; |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 72 | if (kinfo->dim.y > 1) { |
| 73 | ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 74 | } |
| 75 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 76 | rsAssert(kinfo->outLen <= 1); |
| 77 | mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 78 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 79 | cpuClosure->mFunc(kinfo, xstart, xend, ostep); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 80 | } |
| 81 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 82 | mutable_kinfo->inLen = oldInLen; |
| 83 | memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 84 | } |
| 85 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 86 | } // namespace |
| 87 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 88 | Batch::Batch(CpuScriptGroup2Impl* group, const char* name) : |
| 89 | mGroup(group), mFunc(nullptr) { |
| 90 | mName = strndup(name, strlen(name)); |
| 91 | } |
| 92 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 93 | Batch::~Batch() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 94 | for (CPUClosure* c : mClosures) { |
| 95 | delete c; |
| 96 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 97 | free(mName); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 98 | } |
| 99 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 100 | bool Batch::conflict(CPUClosure* cpuClosure) const { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 101 | if (mClosures.empty()) { |
| 102 | return false; |
| 103 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 104 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 105 | const Closure* closure = cpuClosure->mClosure; |
| 106 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 107 | if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 108 | // An invoke should be in a batch by itself, so it conflicts with any other |
| 109 | // closure. |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 110 | return true; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 111 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 112 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 113 | const auto& globalDeps = closure->mGlobalDeps; |
| 114 | const auto& argDeps = closure->mArgDeps; |
| 115 | |
| 116 | for (CPUClosure* c : mClosures) { |
| 117 | const Closure* batched = c->mClosure; |
| 118 | if (globalDeps.find(batched) != globalDeps.end()) { |
| 119 | return true; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 120 | } |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 121 | const auto& it = argDeps.find(batched); |
| 122 | if (it != argDeps.end()) { |
| 123 | const auto& args = (*it).second; |
| 124 | for (const auto &p1 : *args) { |
| 125 | if (p1.second->get() != nullptr) { |
| 126 | return true; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 127 | } |
| 128 | } |
| 129 | } |
| 130 | } |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 131 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 132 | return false; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 133 | } |
| 134 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 135 | CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, |
| 136 | const ScriptGroupBase *sg) : |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 137 | mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), |
| 138 | mExecutable(nullptr), mScriptObj(nullptr) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 139 | rsAssert(!mGroup->mClosures.empty()); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 140 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 141 | Batch* batch = new Batch(this, "Batch0"); |
| 142 | int i = 0; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 143 | for (Closure* closure: mGroup->mClosures) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 144 | CPUClosure* cc; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 145 | const IDBase* funcID = closure->mFunctionID.get(); |
| 146 | RsdCpuScriptImpl* si = |
| 147 | (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); |
| 148 | if (closure->mIsKernel) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 149 | MTLaunchStruct mtls; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 150 | si->forEachKernelSetup(funcID->mSlot, &mtls); |
| 151 | cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 152 | } else { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 153 | cc = new CPUClosure(closure, si); |
| 154 | } |
| 155 | |
| 156 | if (batch->conflict(cc)) { |
| 157 | mBatches.push_back(batch); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 158 | std::stringstream ss; |
| 159 | ss << "Batch" << ++i; |
| 160 | batch = new Batch(this, ss.str().c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 161 | } |
| 162 | |
| 163 | batch->mClosures.push_back(cc); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 164 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 165 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 166 | rsAssert(!batch->mClosures.empty()); |
| 167 | mBatches.push_back(batch); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 168 | |
| 169 | #ifndef RS_COMPATIBILITY_LIB |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 170 | compile(mGroup->mCacheDir); |
| 171 | if (mScriptObj != nullptr && mExecutable != nullptr) { |
| 172 | for (Batch* batch : mBatches) { |
| 173 | batch->resolveFuncPtr(mScriptObj); |
| 174 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 175 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 176 | #endif // RS_COMPATIBILITY_LIB |
| 177 | } |
| 178 | |
| 179 | void Batch::resolveFuncPtr(void* sharedObj) { |
| 180 | std::string funcName(mName); |
| 181 | if (mClosures.front()->mClosure->mIsKernel) { |
| 182 | funcName.append(".expand"); |
| 183 | } |
| 184 | mFunc = dlsym(sharedObj, funcName.c_str()); |
| 185 | rsAssert (mFunc != nullptr); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 186 | } |
| 187 | |
| 188 | CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 189 | for (Batch* batch : mBatches) { |
| 190 | delete batch; |
| 191 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 192 | // TODO: move this dlclose into ~ScriptExecutable(). |
| 193 | if (mScriptObj != nullptr) { |
| 194 | dlclose(mScriptObj); |
| 195 | } |
| 196 | delete mExecutable; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 197 | } |
| 198 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 199 | namespace { |
| 200 | |
| 201 | #ifndef RS_COMPATIBILITY_LIB |
| 202 | |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 203 | string getCoreLibPath(Context* context, string* coreLibRelaxedPath) { |
| 204 | *coreLibRelaxedPath = ""; |
| 205 | |
| 206 | // If we're debugging, use the debug library. |
| 207 | if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { |
| 208 | return SYSLIBPATH"/libclcore_debug.bc"; |
| 209 | } |
| 210 | |
| 211 | // Check for a platform specific library |
| 212 | |
| 213 | #if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) |
| 214 | // NEON-capable ARMv7a devices can use an accelerated math library |
| 215 | // for all reduced precision scripts. |
| 216 | // ARMv8 does not use NEON, as ASIMD can be used with all precision |
| 217 | // levels. |
| 218 | *coreLibRelaxedPath = SYSLIBPATH"/libclcore_neon.bc"; |
| 219 | #endif |
| 220 | |
| 221 | #if defined(__i386__) || defined(__x86_64__) |
| 222 | // x86 devices will use an optimized library. |
| 223 | return SYSLIBPATH"/libclcore_x86.bc"; |
| 224 | #else |
| 225 | return SYSLIBPATH"/libclcore.bc"; |
| 226 | #endif |
| 227 | } |
| 228 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 229 | string getFileName(string path) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 230 | unsigned found = path.find_last_of("/\\"); |
| 231 | return path.substr(found + 1); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 232 | } |
| 233 | |
| 234 | void setupCompileArguments( |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 235 | const vector<string>& inputs, const vector<string>& kernelBatches, |
| 236 | const vector<string>& invokeBatches, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 237 | const string& output_dir, const string& output_filename, |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 238 | const string& coreLibPath, const string& coreLibRelaxedPath, |
| 239 | vector<const char*>* args) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 240 | args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); |
| 241 | args->push_back("-fPIC"); |
| 242 | args->push_back("-embedRSInfo"); |
| 243 | args->push_back("-mtriple"); |
| 244 | args->push_back(DEFAULT_TARGET_TRIPLE_STRING); |
| 245 | args->push_back("-bclib"); |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 246 | args->push_back(coreLibPath.c_str()); |
| 247 | args->push_back("-bclib_relaxed"); |
| 248 | args->push_back(coreLibRelaxedPath.c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 249 | for (const string& input : inputs) { |
| 250 | args->push_back(input.c_str()); |
| 251 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 252 | for (const string& batch : kernelBatches) { |
| 253 | args->push_back("-merge"); |
| 254 | args->push_back(batch.c_str()); |
| 255 | } |
| 256 | for (const string& batch : invokeBatches) { |
| 257 | args->push_back("-invoke"); |
| 258 | args->push_back(batch.c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 259 | } |
| 260 | args->push_back("-output_path"); |
| 261 | args->push_back(output_dir.c_str()); |
| 262 | args->push_back("-o"); |
| 263 | args->push_back(output_filename.c_str()); |
| 264 | args->push_back(nullptr); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 265 | } |
| 266 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 267 | bool fuseAndCompile(const char** arguments, |
| 268 | const string& commandLine) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 269 | const pid_t pid = fork(); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 270 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 271 | if (pid == -1) { |
| 272 | ALOGE("Couldn't fork for bcc execution"); |
| 273 | return false; |
| 274 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 275 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 276 | if (pid == 0) { |
| 277 | // Child process |
| 278 | ALOGV("Invoking BCC with: %s", commandLine.c_str()); |
| 279 | execv(RsdCpuScriptImpl::BCC_EXE_PATH, (char* const*)arguments); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 280 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 281 | ALOGE("execv() failed: %s", strerror(errno)); |
| 282 | abort(); |
| 283 | return false; |
| 284 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 285 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 286 | // Parent process |
| 287 | int status = 0; |
| 288 | const pid_t w = waitpid(pid, &status, 0); |
| 289 | if (w == -1) { |
| 290 | return false; |
| 291 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 292 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 293 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0 ) { |
| 294 | ALOGE("bcc terminated unexpectedly"); |
| 295 | return false; |
| 296 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 297 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 298 | return true; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 299 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 300 | |
| 301 | void generateSourceSlot(const Closure& closure, |
| 302 | const std::vector<std::string>& inputs, |
| 303 | std::stringstream& ss) { |
| 304 | const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); |
| 305 | const Script* script = funcID->mScript; |
| 306 | |
| 307 | rsAssert (!script->isIntrinsic()); |
| 308 | |
| 309 | const RsdCpuScriptImpl *cpuScript = |
| 310 | (const RsdCpuScriptImpl*)script->mHal.drv; |
| 311 | const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); |
| 312 | |
| 313 | const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - |
| 314 | inputs.begin(); |
| 315 | |
| 316 | ss << index << "," << funcID->mSlot << "."; |
| 317 | } |
| 318 | |
| 319 | #endif // RS_COMPATIBILTY_LIB |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 320 | |
| 321 | } // anonymous namespace |
| 322 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 323 | void CpuScriptGroup2Impl::compile(const char* cacheDir) { |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 324 | #ifndef RS_COMPATIBILITY_LIB |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 325 | if (mGroup->mClosures.size() < 2) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 326 | return; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 327 | } |
| 328 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 329 | //===--------------------------------------------------------------------===// |
| 330 | // Fuse the input kernels and generate native code in an object file |
| 331 | //===--------------------------------------------------------------------===// |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 332 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 333 | std::set<string> inputSet; |
| 334 | for (Closure* closure : mGroup->mClosures) { |
| 335 | const Script* script = closure->mFunctionID.get()->mScript; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 336 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 337 | // If any script is an intrinsic, give up trying fusing the kernels. |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 338 | if (script->isIntrinsic()) { |
| 339 | return; |
| 340 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 341 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 342 | const RsdCpuScriptImpl *cpuScript = |
| 343 | (const RsdCpuScriptImpl*)script->mHal.drv; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 344 | const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 345 | inputSet.insert(bitcodeFilename); |
| 346 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 347 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 348 | std::vector<string> inputs(inputSet.begin(), inputSet.end()); |
| 349 | |
| 350 | std::vector<string> kernelBatches; |
| 351 | std::vector<string> invokeBatches; |
| 352 | |
| 353 | int i = 0; |
| 354 | for (const auto& batch : mBatches) { |
| 355 | rsAssert(batch->size() > 0); |
| 356 | |
| 357 | std::stringstream ss; |
| 358 | ss << batch->mName << ":"; |
| 359 | |
| 360 | if (!batch->mClosures.front()->mClosure->mIsKernel) { |
| 361 | rsAssert(batch->size() == 1); |
| 362 | generateSourceSlot(*batch->mClosures.front()->mClosure, inputs, ss); |
| 363 | invokeBatches.push_back(ss.str()); |
| 364 | } else { |
| 365 | for (const auto& cpuClosure : batch->mClosures) { |
| 366 | generateSourceSlot(*cpuClosure->mClosure, inputs, ss); |
| 367 | } |
| 368 | kernelBatches.push_back(ss.str()); |
| 369 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 370 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 371 | |
Yabin Cui | 433558f | 2015-02-23 18:25:55 -0800 | [diff] [blame] | 372 | rsAssert(cacheDir != nullptr); |
| 373 | string objFilePath(cacheDir); |
| 374 | objFilePath.append("/fusedXXXXXX.o"); |
| 375 | // Find unique object file name, to make following file names unique. |
| 376 | int tempfd = mkstemps(&objFilePath[0], 2); |
| 377 | if (tempfd == -1) { |
| 378 | return; |
| 379 | } |
| 380 | TEMP_FAILURE_RETRY(close(tempfd)); |
| 381 | |
| 382 | string outputFileName = getFileName(objFilePath.substr(0, objFilePath.size() - 2)); |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 383 | string coreLibRelaxedPath; |
| 384 | const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), |
| 385 | &coreLibRelaxedPath); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 386 | vector<const char*> arguments; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 387 | setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 388 | outputFileName, coreLibPath, coreLibRelaxedPath, &arguments); |
Yang Ni | 2abfcc6 | 2015-02-17 16:05:19 -0800 | [diff] [blame] | 389 | std::unique_ptr<const char> joined( |
| 390 | rsuJoinStrings(arguments.size() - 1, arguments.data())); |
| 391 | string commandLine (joined.get()); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 392 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 393 | if (!fuseAndCompile(arguments.data(), commandLine)) { |
Yabin Cui | 433558f | 2015-02-23 18:25:55 -0800 | [diff] [blame] | 394 | unlink(objFilePath.c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 395 | return; |
| 396 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 397 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 398 | //===--------------------------------------------------------------------===// |
| 399 | // Create and load the shared lib |
| 400 | //===--------------------------------------------------------------------===// |
| 401 | |
| 402 | const char* resName = outputFileName.c_str(); |
| 403 | |
| 404 | if (!SharedLibraryUtils::createSharedLibrary(cacheDir, resName)) { |
| 405 | ALOGE("Failed to link object file '%s'", resName); |
| 406 | return; |
| 407 | } |
| 408 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 409 | mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); |
| 410 | if (mScriptObj == nullptr) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 411 | ALOGE("Unable to load '%s'", resName); |
| 412 | return; |
| 413 | } |
| 414 | |
| 415 | mExecutable = ScriptExecutable::createFromSharedObject( |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 416 | nullptr, // RS context. Unused. |
| 417 | mScriptObj); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 418 | |
| 419 | #endif // RS_COMPATIBILITY_LIB |
| 420 | } |
| 421 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 422 | void CpuScriptGroup2Impl::execute() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 423 | for (auto batch : mBatches) { |
| 424 | batch->setGlobalsForBatch(); |
| 425 | batch->run(); |
| 426 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 427 | } |
| 428 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 429 | void Batch::setGlobalsForBatch() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 430 | for (CPUClosure* cpuClosure : mClosures) { |
| 431 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 432 | const IDBase* funcID = closure->mFunctionID.get(); |
| 433 | Script* s = funcID->mScript;; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 434 | for (const auto& p : closure->mGlobals) { |
| 435 | const void* value = p.second.first; |
| 436 | int size = p.second.second; |
| 437 | if (value == nullptr && size == 0) { |
| 438 | // This indicates the current closure depends on another closure for a |
| 439 | // global in their shared module (script). In this case we don't need to |
| 440 | // copy the value. For example, an invoke intializes a global variable |
| 441 | // which a kernel later reads. |
| 442 | continue; |
| 443 | } |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 444 | rsAssert(p.first != nullptr); |
| 445 | ALOGV("Evaluating closure %p, setting field %p (Script %p, slot: %d)", |
| 446 | closure, p.first, p.first->mScript, p.first->mSlot); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 447 | Script* script = p.first->mScript; |
| 448 | const RsdCpuScriptImpl *cpuScript = |
| 449 | (const RsdCpuScriptImpl*)script->mHal.drv; |
| 450 | int slot = p.first->mSlot; |
| 451 | ScriptExecutable* exec = mGroup->getExecutable(); |
| 452 | if (exec != nullptr) { |
| 453 | const char* varName = cpuScript->getFieldName(slot); |
| 454 | void* addr = exec->getFieldAddress(varName); |
| 455 | if (size < 0) { |
| 456 | rsrSetObject(mGroup->getCpuRefImpl()->getContext(), |
| 457 | (rs_object_base*)addr, (ObjectBase*)value); |
| 458 | } else { |
| 459 | memcpy(addr, (const void*)&value, size); |
| 460 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 461 | } else { |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 462 | // We use -1 size to indicate an ObjectBase rather than a primitive type |
| 463 | if (size < 0) { |
| 464 | s->setVarObj(slot, (ObjectBase*)value); |
| 465 | } else { |
| 466 | s->setVar(slot, (const void*)&value, size); |
| 467 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 468 | } |
| 469 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 470 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 471 | } |
| 472 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 473 | void Batch::run() { |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 474 | if (!mClosures.front()->mClosure->mIsKernel) { |
| 475 | rsAssert(mClosures.size() == 1); |
| 476 | |
| 477 | // This batch contains a single closure for an invoke function |
| 478 | CPUClosure* cc = mClosures.front(); |
| 479 | const Closure* c = cc->mClosure; |
| 480 | |
| 481 | if (mFunc != nullptr) { |
| 482 | // TODO: Need align pointers for x86_64. |
| 483 | // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp |
| 484 | ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); |
| 485 | } else { |
| 486 | const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); |
| 487 | rsAssert(invokeID != nullptr); |
| 488 | cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); |
| 489 | } |
| 490 | |
| 491 | return; |
| 492 | } |
| 493 | |
| 494 | if (mFunc != nullptr) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 495 | MTLaunchStruct mtls; |
| 496 | const CPUClosure* firstCpuClosure = mClosures.front(); |
| 497 | const CPUClosure* lastCpuClosure = mClosures.back(); |
| 498 | |
| 499 | firstCpuClosure->mSi->forEachMtlsSetup( |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 500 | (const Allocation**)firstCpuClosure->mClosure->mArgs, |
| 501 | firstCpuClosure->mClosure->mNumArg, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 502 | lastCpuClosure->mClosure->mReturnValue, |
| 503 | nullptr, 0, nullptr, &mtls); |
| 504 | |
| 505 | mtls.script = nullptr; |
| 506 | mtls.fep.usr = nullptr; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 507 | mtls.kernel = (ForEachFunc_t)mFunc; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 508 | |
| 509 | mGroup->getCpuRefImpl()->launchThreads( |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 510 | (const Allocation**)firstCpuClosure->mClosure->mArgs, |
| 511 | firstCpuClosure->mClosure->mNumArg, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 512 | lastCpuClosure->mClosure->mReturnValue, |
| 513 | nullptr, &mtls); |
| 514 | |
| 515 | return; |
| 516 | } |
| 517 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 518 | for (CPUClosure* cpuClosure : mClosures) { |
| 519 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 520 | const ScriptKernelID* kernelID = |
| 521 | (const ScriptKernelID*)closure->mFunctionID.get(); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 522 | cpuClosure->mSi->preLaunch(kernelID->mSlot, |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 523 | (const Allocation**)closure->mArgs, |
| 524 | closure->mNumArg, closure->mReturnValue, |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 525 | nullptr, 0, nullptr); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 526 | } |
| 527 | |
| 528 | const CPUClosure* cpuClosure = mClosures.front(); |
| 529 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 530 | MTLaunchStruct mtls; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 531 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 532 | if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, |
| 533 | closure->mNumArg, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 534 | closure->mReturnValue, |
| 535 | nullptr, 0, nullptr, &mtls)) { |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 536 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 537 | mtls.script = nullptr; |
| 538 | mtls.kernel = (void (*)())&groupRoot; |
| 539 | mtls.fep.usr = &mClosures; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 540 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 541 | mGroup->getCpuRefImpl()->launchThreads(nullptr, 0, nullptr, nullptr, &mtls); |
| 542 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 543 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 544 | for (CPUClosure* cpuClosure : mClosures) { |
| 545 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 546 | const ScriptKernelID* kernelID = |
| 547 | (const ScriptKernelID*)closure->mFunctionID.get(); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 548 | cpuClosure->mSi->postLaunch(kernelID->mSlot, |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 549 | (const Allocation**)closure->mArgs, |
| 550 | closure->mNumArg, closure->mReturnValue, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 551 | nullptr, 0, nullptr); |
| 552 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 553 | } |
| 554 | |
| 555 | } // namespace renderscript |
| 556 | } // namespace android |