Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 1 | #include "rsCpuScriptGroup2.h" |
| 2 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 3 | #include <dlfcn.h> |
Yabin Cui | 433558f | 2015-02-23 18:25:55 -0800 | [diff] [blame] | 4 | #include <stdio.h> |
| 5 | #include <stdlib.h> |
| 6 | #include <unistd.h> |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 7 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 8 | #include <set> |
| 9 | #include <sstream> |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 10 | #include <string> |
| 11 | #include <vector> |
| 12 | |
| 13 | #ifndef RS_COMPATIBILITY_LIB |
Jean-Luc Brouillet | 03fab68 | 2017-02-16 21:07:20 -0800 | [diff] [blame] | 14 | #include "bcc/Config.h" |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 15 | #endif |
| 16 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 17 | #include "cpu_ref/rsCpuCore.h" |
| 18 | #include "rsClosure.h" |
| 19 | #include "rsContext.h" |
| 20 | #include "rsCpuCore.h" |
Yang Ni | 2abfcc6 | 2015-02-17 16:05:19 -0800 | [diff] [blame] | 21 | #include "rsCpuExecutable.h" |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 22 | #include "rsCpuScript.h" |
| 23 | #include "rsScript.h" |
| 24 | #include "rsScriptGroup2.h" |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 25 | #include "rsScriptIntrinsic.h" |
| 26 | |
| 27 | using std::string; |
| 28 | using std::vector; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 29 | |
| 30 | namespace android { |
| 31 | namespace renderscript { |
| 32 | |
| 33 | namespace { |
| 34 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 35 | const size_t DefaultKernelArgCount = 2; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 36 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 37 | void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart, |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 38 | uint32_t xend, uint32_t outstep) { |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 39 | const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr; |
| 40 | RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 41 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 42 | const size_t oldInLen = mutable_kinfo->inLen; |
| 43 | |
| 44 | decltype(mutable_kinfo->inStride) oldInStride; |
| 45 | memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride)); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 46 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 47 | for (CPUClosure* cpuClosure : closures) { |
| 48 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 49 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 50 | // There had better be enough space in mutable_kinfo |
| 51 | rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 52 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 53 | for (size_t i = 0; i < closure->mNumArg; i++) { |
| 54 | const void* arg = closure->mArgs[i]; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 55 | const Allocation* a = (const Allocation*)arg; |
| 56 | const uint32_t eStride = a->mHal.state.elementSizeBytes; |
| 57 | const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) + |
| 58 | eStride * xstart; |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 59 | if (kinfo->dim.y > 1) { |
| 60 | ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 61 | } |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 62 | mutable_kinfo->inPtr[i] = ptr; |
| 63 | mutable_kinfo->inStride[i] = eStride; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 64 | } |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 65 | mutable_kinfo->inLen = closure->mNumArg; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 66 | |
| 67 | const Allocation* out = closure->mReturnValue; |
| 68 | const uint32_t ostep = out->mHal.state.elementSizeBytes; |
| 69 | const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) + |
| 70 | ostep * xstart; |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 71 | if (kinfo->dim.y > 1) { |
| 72 | ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 73 | } |
| 74 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 75 | mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 76 | |
Yang Ni | 7a106ad | 2016-03-10 16:06:36 -0800 | [diff] [blame] | 77 | // The implementation of an intrinsic relies on kinfo->usr being |
| 78 | // the "this" pointer to the intrinsic (an RsdCpuScriptIntrinsic object) |
| 79 | mutable_kinfo->usr = cpuClosure->mSi; |
| 80 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 81 | cpuClosure->mFunc(kinfo, xstart, xend, ostep); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 82 | } |
| 83 | |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 84 | mutable_kinfo->inLen = oldInLen; |
Yang Ni | 7a106ad | 2016-03-10 16:06:36 -0800 | [diff] [blame] | 85 | mutable_kinfo->usr = &closures; |
David Gross | b0abb14 | 2015-03-12 15:23:03 -0700 | [diff] [blame] | 86 | memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride)); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 87 | } |
| 88 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 89 | } // namespace |
| 90 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 91 | Batch::Batch(CpuScriptGroup2Impl* group, const char* name) : |
| 92 | mGroup(group), mFunc(nullptr) { |
| 93 | mName = strndup(name, strlen(name)); |
| 94 | } |
| 95 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 96 | Batch::~Batch() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 97 | for (CPUClosure* c : mClosures) { |
| 98 | delete c; |
| 99 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 100 | free(mName); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 101 | } |
| 102 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 103 | bool Batch::conflict(CPUClosure* cpuClosure) const { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 104 | if (mClosures.empty()) { |
| 105 | return false; |
| 106 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 107 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 108 | const Closure* closure = cpuClosure->mClosure; |
| 109 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 110 | if (!closure->mIsKernel || !mClosures.front()->mClosure->mIsKernel) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 111 | // An invoke should be in a batch by itself, so it conflicts with any other |
| 112 | // closure. |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 113 | return true; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 114 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 115 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 116 | const auto& globalDeps = closure->mGlobalDeps; |
| 117 | const auto& argDeps = closure->mArgDeps; |
| 118 | |
| 119 | for (CPUClosure* c : mClosures) { |
| 120 | const Closure* batched = c->mClosure; |
| 121 | if (globalDeps.find(batched) != globalDeps.end()) { |
| 122 | return true; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 123 | } |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 124 | const auto& it = argDeps.find(batched); |
| 125 | if (it != argDeps.end()) { |
| 126 | const auto& args = (*it).second; |
| 127 | for (const auto &p1 : *args) { |
Yang Ni | bd0af2d | 2015-03-23 17:14:58 -0700 | [diff] [blame] | 128 | if (p1.second.get() != nullptr) { |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 129 | return true; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 130 | } |
| 131 | } |
| 132 | } |
| 133 | } |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 134 | |
Yang Ni | 1c20667 | 2015-06-18 15:57:56 -0700 | [diff] [blame] | 135 | // The compiler fusion pass in bcc expects that kernels chained up through |
| 136 | // (1st) input and output. |
| 137 | |
| 138 | const Closure* lastBatched = mClosures.back()->mClosure; |
| 139 | const auto& it = argDeps.find(lastBatched); |
| 140 | |
| 141 | if (it == argDeps.end()) { |
| 142 | return true; |
| 143 | } |
| 144 | |
| 145 | const auto& args = (*it).second; |
| 146 | for (const auto &p1 : *args) { |
| 147 | if (p1.first == 0 && p1.second.get() == nullptr) { |
| 148 | // The new closure depends on the last batched closure's return |
| 149 | // value (fieldId being nullptr) for its first argument (argument 0) |
| 150 | return false; |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | return true; |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 155 | } |
| 156 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 157 | CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl, |
| 158 | const ScriptGroupBase *sg) : |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 159 | mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)), |
| 160 | mExecutable(nullptr), mScriptObj(nullptr) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 161 | rsAssert(!mGroup->mClosures.empty()); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 162 | |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 163 | mCpuRefImpl->lockMutex(); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 164 | Batch* batch = new Batch(this, "Batch0"); |
| 165 | int i = 0; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 166 | for (Closure* closure: mGroup->mClosures) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 167 | CPUClosure* cc; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 168 | const IDBase* funcID = closure->mFunctionID.get(); |
| 169 | RsdCpuScriptImpl* si = |
| 170 | (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(funcID->mScript); |
| 171 | if (closure->mIsKernel) { |
Matt Wala | 14ce007 | 2015-07-30 17:30:25 -0700 | [diff] [blame] | 172 | MTLaunchStructForEach mtls; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 173 | si->forEachKernelSetup(funcID->mSlot, &mtls); |
| 174 | cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 175 | } else { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 176 | cc = new CPUClosure(closure, si); |
| 177 | } |
| 178 | |
| 179 | if (batch->conflict(cc)) { |
| 180 | mBatches.push_back(batch); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 181 | std::stringstream ss; |
| 182 | ss << "Batch" << ++i; |
Yang Ni | 578419f | 2016-06-27 16:12:25 -0700 | [diff] [blame] | 183 | std::string batchStr(ss.str()); |
| 184 | batch = new Batch(this, batchStr.c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 185 | } |
| 186 | |
| 187 | batch->mClosures.push_back(cc); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 188 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 189 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 190 | rsAssert(!batch->mClosures.empty()); |
| 191 | mBatches.push_back(batch); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 192 | |
| 193 | #ifndef RS_COMPATIBILITY_LIB |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 194 | compile(mGroup->mCacheDir); |
| 195 | if (mScriptObj != nullptr && mExecutable != nullptr) { |
| 196 | for (Batch* batch : mBatches) { |
| 197 | batch->resolveFuncPtr(mScriptObj); |
| 198 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 199 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 200 | #endif // RS_COMPATIBILITY_LIB |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 201 | mCpuRefImpl->unlockMutex(); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 202 | } |
| 203 | |
| 204 | void Batch::resolveFuncPtr(void* sharedObj) { |
| 205 | std::string funcName(mName); |
| 206 | if (mClosures.front()->mClosure->mIsKernel) { |
| 207 | funcName.append(".expand"); |
| 208 | } |
| 209 | mFunc = dlsym(sharedObj, funcName.c_str()); |
| 210 | rsAssert (mFunc != nullptr); |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 211 | } |
| 212 | |
| 213 | CpuScriptGroup2Impl::~CpuScriptGroup2Impl() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 214 | for (Batch* batch : mBatches) { |
| 215 | delete batch; |
| 216 | } |
Yang Ni | bd0af2d | 2015-03-23 17:14:58 -0700 | [diff] [blame] | 217 | delete mExecutable; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 218 | // TODO: move this dlclose into ~ScriptExecutable(). |
| 219 | if (mScriptObj != nullptr) { |
| 220 | dlclose(mScriptObj); |
| 221 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 222 | } |
| 223 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 224 | namespace { |
| 225 | |
| 226 | #ifndef RS_COMPATIBILITY_LIB |
| 227 | |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 228 | string getCoreLibPath(Context* context, string* coreLibRelaxedPath) { |
| 229 | *coreLibRelaxedPath = ""; |
| 230 | |
| 231 | // If we're debugging, use the debug library. |
| 232 | if (context->getContextType() == RS_CONTEXT_TYPE_DEBUG) { |
Victor Khimenko | cfb1d0b | 2016-10-28 17:05:22 +0200 | [diff] [blame] | 233 | return SYSLIBPATH_BC"/libclcore_debug.bc"; |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 234 | } |
| 235 | |
| 236 | // Check for a platform specific library |
| 237 | |
| 238 | #if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) |
| 239 | // NEON-capable ARMv7a devices can use an accelerated math library |
| 240 | // for all reduced precision scripts. |
| 241 | // ARMv8 does not use NEON, as ASIMD can be used with all precision |
| 242 | // levels. |
Victor Khimenko | cfb1d0b | 2016-10-28 17:05:22 +0200 | [diff] [blame] | 243 | *coreLibRelaxedPath = SYSLIBPATH_BC"/libclcore_neon.bc"; |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 244 | #endif |
| 245 | |
| 246 | #if defined(__i386__) || defined(__x86_64__) |
| 247 | // x86 devices will use an optimized library. |
Victor Khimenko | cfb1d0b | 2016-10-28 17:05:22 +0200 | [diff] [blame] | 248 | return SYSLIBPATH_BC"/libclcore_x86.bc"; |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 249 | #else |
Victor Khimenko | cfb1d0b | 2016-10-28 17:05:22 +0200 | [diff] [blame] | 250 | return SYSLIBPATH_BC"/libclcore.bc"; |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 251 | #endif |
| 252 | } |
| 253 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 254 | void setupCompileArguments( |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 255 | const vector<const char*>& inputs, const vector<string>& kernelBatches, |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 256 | const vector<string>& invokeBatches, |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 257 | const char* outputDir, const char* outputFileName, |
| 258 | const char* coreLibPath, const char* coreLibRelaxedPath, |
Yang Ni | 8237638 | 2015-05-13 14:51:10 -0700 | [diff] [blame] | 259 | const bool emitGlobalInfo, const bool emitGlobalInfoSkipConstant, |
verena beckham | f502980 | 2015-05-22 16:51:42 +0100 | [diff] [blame] | 260 | int optLevel, vector<const char*>* args) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 261 | args->push_back(RsdCpuScriptImpl::BCC_EXE_PATH); |
| 262 | args->push_back("-fPIC"); |
| 263 | args->push_back("-embedRSInfo"); |
Yang Ni | 8237638 | 2015-05-13 14:51:10 -0700 | [diff] [blame] | 264 | if (emitGlobalInfo) { |
| 265 | args->push_back("-rs-global-info"); |
| 266 | if (emitGlobalInfoSkipConstant) { |
| 267 | args->push_back("-rs-global-info-skip-constant"); |
| 268 | } |
| 269 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 270 | args->push_back("-mtriple"); |
| 271 | args->push_back(DEFAULT_TARGET_TRIPLE_STRING); |
| 272 | args->push_back("-bclib"); |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 273 | args->push_back(coreLibPath); |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 274 | args->push_back("-bclib_relaxed"); |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 275 | args->push_back(coreLibRelaxedPath); |
| 276 | for (const char* input : inputs) { |
| 277 | args->push_back(input); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 278 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 279 | for (const string& batch : kernelBatches) { |
| 280 | args->push_back("-merge"); |
| 281 | args->push_back(batch.c_str()); |
| 282 | } |
| 283 | for (const string& batch : invokeBatches) { |
| 284 | args->push_back("-invoke"); |
| 285 | args->push_back(batch.c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 286 | } |
| 287 | args->push_back("-output_path"); |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 288 | args->push_back(outputDir); |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 289 | |
verena beckham | f502980 | 2015-05-22 16:51:42 +0100 | [diff] [blame] | 290 | args->push_back("-O"); |
Yang Ni | 7a106ad | 2016-03-10 16:06:36 -0800 | [diff] [blame] | 291 | switch (optLevel) { |
| 292 | case 0: |
| 293 | args->push_back("0"); |
| 294 | break; |
| 295 | case 3: |
| 296 | args->push_back("3"); |
| 297 | break; |
| 298 | default: |
| 299 | ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel); |
| 300 | args->push_back("3"); |
| 301 | break; |
| 302 | } |
verena beckham | f502980 | 2015-05-22 16:51:42 +0100 | [diff] [blame] | 303 | |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 304 | // The output filename has to be the last, in case we need to pop it out and |
| 305 | // replace with a different name. |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 306 | args->push_back("-o"); |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 307 | args->push_back(outputFileName); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 308 | } |
| 309 | |
Yang Ni | cbff7bc | 2015-05-26 16:47:30 -0700 | [diff] [blame] | 310 | void generateSourceSlot(RsdCpuReferenceImpl* ctxt, |
| 311 | const Closure& closure, |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 312 | const std::vector<const char*>& inputs, |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 313 | std::stringstream& ss) { |
| 314 | const IDBase* funcID = (const IDBase*)closure.mFunctionID.get(); |
| 315 | const Script* script = funcID->mScript; |
| 316 | |
| 317 | rsAssert (!script->isIntrinsic()); |
| 318 | |
| 319 | const RsdCpuScriptImpl *cpuScript = |
Yang Ni | cbff7bc | 2015-05-26 16:47:30 -0700 | [diff] [blame] | 320 | (const RsdCpuScriptImpl *)ctxt->lookupScript(script); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 321 | const string& bitcodeFilename = cpuScript->getBitcodeFilePath(); |
| 322 | |
| 323 | const int index = find(inputs.begin(), inputs.end(), bitcodeFilename) - |
| 324 | inputs.begin(); |
| 325 | |
| 326 | ss << index << "," << funcID->mSlot << "."; |
| 327 | } |
| 328 | |
| 329 | #endif // RS_COMPATIBILTY_LIB |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 330 | |
| 331 | } // anonymous namespace |
| 332 | |
Luke Drummond | b4b603d | 2017-04-17 11:47:06 -0700 | [diff] [blame] | 333 | // This function is used by the debugger to inspect ScriptGroup |
| 334 | // compilations. |
| 335 | // |
| 336 | // "__attribute__((noinline))" and "__asm__" are used to prevent the |
| 337 | // function call from being eliminated as a no-op (see the "noinline" |
| 338 | // attribute in gcc documentation). |
| 339 | // |
| 340 | // "__attribute__((weak))" is used to prevent callers from recognizing |
| 341 | // that this is guaranteed to be the function definition, recognizing |
| 342 | // that certain arguments are unused, and optimizing away the passing |
| 343 | // of those arguments (see the LLVM optimization |
| 344 | // DeadArgumentElimination). Theoretically, the compiler could get |
| 345 | // aggressive enough with link-time optimization that even marking the |
| 346 | // entry point as a weak definition wouldn't solve the problem. |
| 347 | // |
| 348 | extern __attribute__((noinline)) __attribute__((weak)) |
Aidan Dodds | 1cea94d | 2016-10-04 11:26:23 +0100 | [diff] [blame] | 349 | void debugHintScriptGroup2(const char* groupName, |
| 350 | const uint32_t groupNameSize, |
| 351 | const ExpandFuncTy* kernel, |
| 352 | const uint32_t kernelCount) { |
| 353 | ALOGV("group name: %d:%s\n", groupNameSize, groupName); |
| 354 | for (uint32_t i=0; i < kernelCount; ++i) { |
| 355 | const char* f1 = (const char*)(kernel[i]); |
Luke Drummond | b4b603d | 2017-04-17 11:47:06 -0700 | [diff] [blame] | 356 | __asm__ __volatile__(""); |
Aidan Dodds | 1cea94d | 2016-10-04 11:26:23 +0100 | [diff] [blame] | 357 | ALOGV(" closure: %p\n", (const void*)f1); |
| 358 | } |
| 359 | // do nothing, this is just a hook point for the debugger. |
| 360 | return; |
| 361 | } |
| 362 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 363 | void CpuScriptGroup2Impl::compile(const char* cacheDir) { |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 364 | #ifndef RS_COMPATIBILITY_LIB |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 365 | if (mGroup->mClosures.size() < 2) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 366 | return; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 367 | } |
| 368 | |
Aidan Dodds | 1cea94d | 2016-10-04 11:26:23 +0100 | [diff] [blame] | 369 | const int optLevel = getCpuRefImpl()->getContext()->getOptLevel(); |
| 370 | if (optLevel == 0) { |
| 371 | std::vector<ExpandFuncTy> kernels; |
| 372 | for (const Batch* b : mBatches) |
| 373 | for (const CPUClosure* c : b->mClosures) |
| 374 | kernels.push_back(c->mFunc); |
| 375 | |
| 376 | if (kernels.size()) { |
| 377 | // pass this information on to the debugger via a hint function. |
| 378 | debugHintScriptGroup2(mGroup->mName, |
| 379 | strlen(mGroup->mName), |
| 380 | kernels.data(), |
| 381 | kernels.size()); |
| 382 | } |
| 383 | |
| 384 | // skip script group compilation forcing the driver to use the fallback |
| 385 | // execution path which currently has better support for debugging. |
| 386 | return; |
| 387 | } |
| 388 | |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 389 | auto comparator = [](const char* str1, const char* str2) -> bool { |
| 390 | return strcmp(str1, str2) < 0; |
| 391 | }; |
| 392 | std::set<const char*, decltype(comparator)> inputSet(comparator); |
| 393 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 394 | for (Closure* closure : mGroup->mClosures) { |
| 395 | const Script* script = closure->mFunctionID.get()->mScript; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 396 | |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 397 | // If any script is an intrinsic, give up trying fusing the kernels. |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 398 | if (script->isIntrinsic()) { |
| 399 | return; |
| 400 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 401 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 402 | const RsdCpuScriptImpl *cpuScript = |
Yang Ni | cbff7bc | 2015-05-26 16:47:30 -0700 | [diff] [blame] | 403 | (const RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(script); |
| 404 | |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 405 | const char* bitcodeFilename = cpuScript->getBitcodeFilePath(); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 406 | inputSet.insert(bitcodeFilename); |
| 407 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 408 | |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 409 | std::vector<const char*> inputs(inputSet.begin(), inputSet.end()); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 410 | |
| 411 | std::vector<string> kernelBatches; |
| 412 | std::vector<string> invokeBatches; |
| 413 | |
| 414 | int i = 0; |
| 415 | for (const auto& batch : mBatches) { |
| 416 | rsAssert(batch->size() > 0); |
| 417 | |
| 418 | std::stringstream ss; |
| 419 | ss << batch->mName << ":"; |
| 420 | |
| 421 | if (!batch->mClosures.front()->mClosure->mIsKernel) { |
| 422 | rsAssert(batch->size() == 1); |
Yang Ni | cbff7bc | 2015-05-26 16:47:30 -0700 | [diff] [blame] | 423 | generateSourceSlot(mCpuRefImpl, *batch->mClosures.front()->mClosure, inputs, ss); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 424 | invokeBatches.push_back(ss.str()); |
| 425 | } else { |
| 426 | for (const auto& cpuClosure : batch->mClosures) { |
Yang Ni | cbff7bc | 2015-05-26 16:47:30 -0700 | [diff] [blame] | 427 | generateSourceSlot(mCpuRefImpl, *cpuClosure->mClosure, inputs, ss); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 428 | } |
| 429 | kernelBatches.push_back(ss.str()); |
| 430 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 431 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 432 | |
Yabin Cui | 433558f | 2015-02-23 18:25:55 -0800 | [diff] [blame] | 433 | rsAssert(cacheDir != nullptr); |
| 434 | string objFilePath(cacheDir); |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 435 | objFilePath.append("/"); |
| 436 | objFilePath.append(mGroup->mName); |
| 437 | objFilePath.append(".o"); |
Yabin Cui | 433558f | 2015-02-23 18:25:55 -0800 | [diff] [blame] | 438 | |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 439 | const char* resName = mGroup->mName; |
Yang Ni | edf4ea3 | 2015-03-11 09:07:15 -0700 | [diff] [blame] | 440 | string coreLibRelaxedPath; |
| 441 | const string& coreLibPath = getCoreLibPath(getCpuRefImpl()->getContext(), |
| 442 | &coreLibRelaxedPath); |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 443 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 444 | vector<const char*> arguments; |
Yang Ni | 8237638 | 2015-05-13 14:51:10 -0700 | [diff] [blame] | 445 | bool emitGlobalInfo = getCpuRefImpl()->getEmbedGlobalInfo(); |
| 446 | bool emitGlobalInfoSkipConstant = getCpuRefImpl()->getEmbedGlobalInfoSkipConstant(); |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 447 | setupCompileArguments(inputs, kernelBatches, invokeBatches, cacheDir, |
| 448 | resName, coreLibPath.c_str(), coreLibRelaxedPath.c_str(), |
Yang Ni | 8237638 | 2015-05-13 14:51:10 -0700 | [diff] [blame] | 449 | emitGlobalInfo, emitGlobalInfoSkipConstant, |
verena beckham | f502980 | 2015-05-22 16:51:42 +0100 | [diff] [blame] | 450 | optLevel, &arguments); |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 451 | |
| 452 | std::unique_ptr<const char> cmdLine(rsuJoinStrings(arguments.size() - 1, |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 453 | arguments.data())); |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 454 | |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 455 | inputs.push_back(coreLibPath.c_str()); |
| 456 | inputs.push_back(coreLibRelaxedPath.c_str()); |
| 457 | |
| 458 | uint32_t checksum = constructBuildChecksum(nullptr, 0, cmdLine.get(), |
| 459 | inputs.data(), inputs.size()); |
| 460 | |
| 461 | if (checksum == 0) { |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 462 | return; |
| 463 | } |
| 464 | |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 465 | std::stringstream ss; |
| 466 | ss << std::hex << checksum; |
Yang Ni | 578419f | 2016-06-27 16:12:25 -0700 | [diff] [blame] | 467 | std::string checksumStr(ss.str()); |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 468 | |
| 469 | //===--------------------------------------------------------------------===// |
| 470 | // Try to load a shared lib from code cache matching filename and checksum |
| 471 | //===--------------------------------------------------------------------===// |
| 472 | |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 473 | bool alreadyLoaded = false; |
| 474 | std::string cloneName; |
| 475 | |
Yang Ni | a845c35 | 2017-05-01 15:53:23 -0700 | [diff] [blame] | 476 | const bool useRSDebugContext = |
| 477 | (mCpuRefImpl->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG); |
| 478 | const bool reuse = !is_force_recompile() && !useRSDebugContext; |
| 479 | if (reuse) { |
| 480 | mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr, |
| 481 | &alreadyLoaded); |
| 482 | } |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 483 | if (mScriptObj != nullptr) { |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 484 | // A shared library named resName is found in code cache directory |
| 485 | // cacheDir, and loaded with the handle stored in mScriptObj. |
| 486 | |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 487 | mExecutable = ScriptExecutable::createFromSharedObject( |
Yang Ni | ade3137 | 2016-04-06 09:34:34 -0700 | [diff] [blame] | 488 | mScriptObj, checksum); |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 489 | |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 490 | if (mExecutable != nullptr) { |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 491 | // The loaded shared library in mScriptObj has a matching checksum. |
| 492 | // An executable object has been created. |
Yang Ni | cb17015 | 2015-04-16 10:27:02 -0700 | [diff] [blame] | 493 | return; |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 494 | } |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 495 | |
| 496 | ALOGV("Failed to create an executable object from so file due to " |
| 497 | "mismatching checksum"); |
| 498 | |
| 499 | if (alreadyLoaded) { |
| 500 | // The shared object found in code cache has already been loaded. |
| 501 | // A different file name is needed for the new shared library, to |
| 502 | // avoid corrupting the currently loaded instance. |
| 503 | |
| 504 | cloneName.append(resName); |
| 505 | cloneName.append("#"); |
Miao Wang | 82e135c | 2017-02-27 23:35:35 -0800 | [diff] [blame] | 506 | cloneName.append(SharedLibraryUtils::getRandomString(6).c_str()); |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 507 | |
| 508 | // The last element in arguments is the output filename. |
| 509 | arguments.pop_back(); |
| 510 | arguments.push_back(cloneName.c_str()); |
| 511 | } |
| 512 | |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 513 | dlclose(mScriptObj); |
| 514 | mScriptObj = nullptr; |
| 515 | } |
| 516 | |
| 517 | //===--------------------------------------------------------------------===// |
| 518 | // Fuse the input kernels and generate native code in an object file |
| 519 | //===--------------------------------------------------------------------===// |
| 520 | |
| 521 | arguments.push_back("-build-checksum"); |
Yang Ni | 578419f | 2016-06-27 16:12:25 -0700 | [diff] [blame] | 522 | arguments.push_back(checksumStr.c_str()); |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 523 | arguments.push_back(nullptr); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 524 | |
Pirama Arumuga Nainar | 2fa8a23 | 2015-03-25 17:21:40 -0700 | [diff] [blame] | 525 | bool compiled = rsuExecuteCommand(RsdCpuScriptImpl::BCC_EXE_PATH, |
Yang Ni | f02a2b0 | 2015-04-07 16:00:31 -0700 | [diff] [blame] | 526 | arguments.size()-1, |
| 527 | arguments.data()); |
Pirama Arumuga Nainar | 2fa8a23 | 2015-03-25 17:21:40 -0700 | [diff] [blame] | 528 | if (!compiled) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 529 | return; |
| 530 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 531 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 532 | //===--------------------------------------------------------------------===// |
| 533 | // Create and load the shared lib |
| 534 | //===--------------------------------------------------------------------===// |
| 535 | |
Yang Ni | a845c35 | 2017-05-01 15:53:23 -0700 | [diff] [blame] | 536 | std::string SOPath; |
| 537 | |
Stephen Hines | 4c368af | 2015-05-06 00:43:02 -0700 | [diff] [blame] | 538 | if (!SharedLibraryUtils::createSharedLibrary( |
Yang Ni | a845c35 | 2017-05-01 15:53:23 -0700 | [diff] [blame] | 539 | getCpuRefImpl()->getContext()->getDriverName(), cacheDir, resName, |
| 540 | reuse, &SOPath)) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 541 | ALOGE("Failed to link object file '%s'", resName); |
Yang Ni | 8b94222 | 2015-04-02 17:48:28 -0700 | [diff] [blame] | 542 | unlink(objFilePath.c_str()); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 543 | return; |
| 544 | } |
| 545 | |
Yang Ni | 8b94222 | 2015-04-02 17:48:28 -0700 | [diff] [blame] | 546 | unlink(objFilePath.c_str()); |
| 547 | |
Yang Ni | a845c35 | 2017-05-01 15:53:23 -0700 | [diff] [blame] | 548 | if (reuse) { |
| 549 | mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); |
| 550 | } else { |
| 551 | mScriptObj = SharedLibraryUtils::loadAndDeleteSharedLibrary(SOPath.c_str()); |
| 552 | } |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 553 | if (mScriptObj == nullptr) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 554 | ALOGE("Unable to load '%s'", resName); |
| 555 | return; |
| 556 | } |
| 557 | |
Yang Ni | 1efae29 | 2015-06-27 15:45:18 -0700 | [diff] [blame] | 558 | if (alreadyLoaded) { |
| 559 | // Delete the temporary, random-named file that we created to avoid |
| 560 | // interfering with an already loaded shared library. |
| 561 | string cloneFilePath(cacheDir); |
| 562 | cloneFilePath.append("/"); |
| 563 | cloneFilePath.append(cloneName.c_str()); |
| 564 | cloneFilePath.append(".so"); |
| 565 | unlink(cloneFilePath.c_str()); |
| 566 | } |
| 567 | |
Yang Ni | ade3137 | 2016-04-06 09:34:34 -0700 | [diff] [blame] | 568 | mExecutable = ScriptExecutable::createFromSharedObject(mScriptObj); |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 569 | |
| 570 | #endif // RS_COMPATIBILITY_LIB |
| 571 | } |
| 572 | |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 573 | void CpuScriptGroup2Impl::execute() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 574 | for (auto batch : mBatches) { |
| 575 | batch->setGlobalsForBatch(); |
| 576 | batch->run(); |
| 577 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 578 | } |
| 579 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 580 | void Batch::setGlobalsForBatch() { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 581 | for (CPUClosure* cpuClosure : mClosures) { |
| 582 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 583 | const IDBase* funcID = closure->mFunctionID.get(); |
| 584 | Script* s = funcID->mScript;; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 585 | for (const auto& p : closure->mGlobals) { |
Yang Ni | fef0cd4 | 2015-11-11 15:08:16 -0800 | [diff] [blame] | 586 | const int64_t value = p.second.first; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 587 | int size = p.second.second; |
Yang Ni | fef0cd4 | 2015-11-11 15:08:16 -0800 | [diff] [blame] | 588 | if (value == 0 && size == 0) { |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 589 | // This indicates the current closure depends on another closure for a |
| 590 | // global in their shared module (script). In this case we don't need to |
| 591 | // copy the value. For example, an invoke intializes a global variable |
| 592 | // which a kernel later reads. |
| 593 | continue; |
| 594 | } |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 595 | rsAssert(p.first != nullptr); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 596 | Script* script = p.first->mScript; |
Yang Ni | 7a106ad | 2016-03-10 16:06:36 -0800 | [diff] [blame] | 597 | rsAssert(script == s); |
Yang Ni | cbff7bc | 2015-05-26 16:47:30 -0700 | [diff] [blame] | 598 | RsdCpuReferenceImpl* ctxt = mGroup->getCpuRefImpl(); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 599 | const RsdCpuScriptImpl *cpuScript = |
Yang Ni | cbff7bc | 2015-05-26 16:47:30 -0700 | [diff] [blame] | 600 | (const RsdCpuScriptImpl *)ctxt->lookupScript(script); |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 601 | int slot = p.first->mSlot; |
| 602 | ScriptExecutable* exec = mGroup->getExecutable(); |
| 603 | if (exec != nullptr) { |
| 604 | const char* varName = cpuScript->getFieldName(slot); |
| 605 | void* addr = exec->getFieldAddress(varName); |
| 606 | if (size < 0) { |
| 607 | rsrSetObject(mGroup->getCpuRefImpl()->getContext(), |
| 608 | (rs_object_base*)addr, (ObjectBase*)value); |
| 609 | } else { |
| 610 | memcpy(addr, (const void*)&value, size); |
| 611 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 612 | } else { |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 613 | // We use -1 size to indicate an ObjectBase rather than a primitive type |
| 614 | if (size < 0) { |
| 615 | s->setVarObj(slot, (ObjectBase*)value); |
| 616 | } else { |
| 617 | s->setVar(slot, (const void*)&value, size); |
| 618 | } |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 619 | } |
| 620 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 621 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 622 | } |
| 623 | |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 624 | void Batch::run() { |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 625 | if (!mClosures.front()->mClosure->mIsKernel) { |
| 626 | rsAssert(mClosures.size() == 1); |
| 627 | |
| 628 | // This batch contains a single closure for an invoke function |
| 629 | CPUClosure* cc = mClosures.front(); |
| 630 | const Closure* c = cc->mClosure; |
| 631 | |
| 632 | if (mFunc != nullptr) { |
| 633 | // TODO: Need align pointers for x86_64. |
| 634 | // See RsdCpuScriptImpl::invokeFunction in rsCpuScript.cpp |
| 635 | ((InvokeFuncTy)mFunc)(c->mParams, c->mParamLength); |
| 636 | } else { |
| 637 | const ScriptInvokeID* invokeID = (const ScriptInvokeID*)c->mFunctionID.get(); |
| 638 | rsAssert(invokeID != nullptr); |
| 639 | cc->mSi->invokeFunction(invokeID->mSlot, c->mParams, c->mParamLength); |
| 640 | } |
| 641 | |
| 642 | return; |
| 643 | } |
| 644 | |
| 645 | if (mFunc != nullptr) { |
Matt Wala | 14ce007 | 2015-07-30 17:30:25 -0700 | [diff] [blame] | 646 | MTLaunchStructForEach mtls; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 647 | const CPUClosure* firstCpuClosure = mClosures.front(); |
| 648 | const CPUClosure* lastCpuClosure = mClosures.back(); |
| 649 | |
| 650 | firstCpuClosure->mSi->forEachMtlsSetup( |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 651 | (const Allocation**)firstCpuClosure->mClosure->mArgs, |
| 652 | firstCpuClosure->mClosure->mNumArg, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 653 | lastCpuClosure->mClosure->mReturnValue, |
| 654 | nullptr, 0, nullptr, &mtls); |
| 655 | |
| 656 | mtls.script = nullptr; |
| 657 | mtls.fep.usr = nullptr; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 658 | mtls.kernel = (ForEachFunc_t)mFunc; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 659 | |
Matt Wala | 14ce007 | 2015-07-30 17:30:25 -0700 | [diff] [blame] | 660 | mGroup->getCpuRefImpl()->launchForEach( |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 661 | (const Allocation**)firstCpuClosure->mClosure->mArgs, |
| 662 | firstCpuClosure->mClosure->mNumArg, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 663 | lastCpuClosure->mClosure->mReturnValue, |
| 664 | nullptr, &mtls); |
| 665 | |
| 666 | return; |
| 667 | } |
| 668 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 669 | for (CPUClosure* cpuClosure : mClosures) { |
| 670 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 671 | const ScriptKernelID* kernelID = |
| 672 | (const ScriptKernelID*)closure->mFunctionID.get(); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 673 | cpuClosure->mSi->preLaunch(kernelID->mSlot, |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 674 | (const Allocation**)closure->mArgs, |
| 675 | closure->mNumArg, closure->mReturnValue, |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 676 | nullptr, 0, nullptr); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 677 | } |
| 678 | |
| 679 | const CPUClosure* cpuClosure = mClosures.front(); |
| 680 | const Closure* closure = cpuClosure->mClosure; |
Matt Wala | 14ce007 | 2015-07-30 17:30:25 -0700 | [diff] [blame] | 681 | MTLaunchStructForEach mtls; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 682 | |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 683 | if (cpuClosure->mSi->forEachMtlsSetup((const Allocation**)closure->mArgs, |
| 684 | closure->mNumArg, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 685 | closure->mReturnValue, |
| 686 | nullptr, 0, nullptr, &mtls)) { |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 687 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 688 | mtls.script = nullptr; |
Matt Wala | 14ce007 | 2015-07-30 17:30:25 -0700 | [diff] [blame] | 689 | mtls.kernel = &groupRoot; |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 690 | mtls.fep.usr = &mClosures; |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 691 | |
Matt Wala | 14ce007 | 2015-07-30 17:30:25 -0700 | [diff] [blame] | 692 | mGroup->getCpuRefImpl()->launchForEach(nullptr, 0, nullptr, nullptr, &mtls); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 693 | } |
Yang Ni | da0f069 | 2015-01-12 13:03:40 -0800 | [diff] [blame] | 694 | |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 695 | for (CPUClosure* cpuClosure : mClosures) { |
| 696 | const Closure* closure = cpuClosure->mClosure; |
Yang Ni | 062c287 | 2015-02-20 15:20:00 -0800 | [diff] [blame] | 697 | const ScriptKernelID* kernelID = |
| 698 | (const ScriptKernelID*)closure->mFunctionID.get(); |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 699 | cpuClosure->mSi->postLaunch(kernelID->mSlot, |
Yang Ni | ff2bb54 | 2015-02-02 14:33:47 -0800 | [diff] [blame] | 700 | (const Allocation**)closure->mArgs, |
| 701 | closure->mNumArg, closure->mReturnValue, |
Yang Ni | eb9aa67 | 2015-01-27 14:32:25 -0800 | [diff] [blame] | 702 | nullptr, 0, nullptr); |
| 703 | } |
Yang Ni | 1ffd86b | 2015-01-07 09:16:40 -0800 | [diff] [blame] | 704 | } |
| 705 | |
| 706 | } // namespace renderscript |
| 707 | } // namespace android |