| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2011 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "rsCpuCore.h" |
| 18 | #include "rsCpuScript.h" |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 19 | #include "rsScriptGroup.h" |
| 20 | #include "rsCpuScriptGroup.h" |
| 21 | //#include "rsdBcc.h" |
| 22 | //#include "rsdAllocation.h" |
| 23 | |
| 24 | using namespace android; |
| 25 | using namespace android::renderscript; |
| 26 | |
| 27 | CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) { |
| 28 | mCtx = ctx; |
| 29 | mSG = sg; |
| 30 | } |
| 31 | |
| 32 | CpuScriptGroupImpl::~CpuScriptGroupImpl() { |
| 33 | |
| 34 | } |
| 35 | |
| 36 | bool CpuScriptGroupImpl::init() { |
| 37 | return true; |
| 38 | } |
| 39 | |
| 40 | void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) { |
| 41 | } |
| 42 | |
| 43 | void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) { |
| 44 | } |
| 45 | |
| 46 | |
| 47 | typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p, |
| 48 | uint32_t xstart, uint32_t xend, |
| 49 | uint32_t instep, uint32_t outstep); |
| 50 | |
| 51 | void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p, |
| 52 | uint32_t xstart, uint32_t xend, |
| 53 | uint32_t instep, uint32_t outstep) { |
| 54 | |
| 55 | |
| 56 | const ScriptList *sl = (const ScriptList *)p->usr; |
| 57 | RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p; |
| 58 | const void *oldUsr = p->usr; |
| 59 | |
| 60 | for(size_t ct=0; ct < sl->count; ct++) { |
| 61 | ScriptGroupRootFunc_t func; |
| 62 | func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; |
| 63 | mp->usr = sl->usrPtrs[ct]; |
| 64 | |
| 65 | mp->ptrIn = NULL; |
| 66 | mp->in = NULL; |
| 67 | mp->ptrOut = NULL; |
| 68 | mp->out = NULL; |
| 69 | |
| Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 70 | uint32_t istep = 0; |
| 71 | uint32_t ostep = 0; |
| 72 | |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 73 | if (sl->ins[ct]) { |
| 74 | mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; |
| Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 75 | istep = sl->ins[ct]->mHal.state.elementSizeBytes; |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 76 | mp->in = mp->ptrIn; |
| 77 | if (sl->inExts[ct]) { |
| 78 | mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y; |
| 79 | } else { |
| 80 | if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) { |
| 81 | mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid; |
| 82 | } |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | if (sl->outs[ct]) { |
| 87 | mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; |
| 88 | mp->out = mp->ptrOut; |
| Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 89 | ostep = sl->outs[ct]->mHal.state.elementSizeBytes; |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 90 | if (sl->outExts[ct]) { |
| 91 | mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y; |
| 92 | } else { |
| 93 | if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) { |
| 94 | mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid; |
| 95 | } |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); |
| Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 100 | func(p, xstart, xend, istep, ostep); |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 101 | } |
| 102 | //ALOGE("script group root"); |
| 103 | |
| 104 | //ConvolveParams *cp = (ConvolveParams *)p->usr; |
| 105 | |
| 106 | mp->usr = oldUsr; |
| 107 | } |
| 108 | |
| 109 | |
| 110 | |
| 111 | void CpuScriptGroupImpl::execute() { |
| 112 | Vector<Allocation *> ins; |
| 113 | Vector<bool> inExts; |
| 114 | Vector<Allocation *> outs; |
| 115 | Vector<bool> outExts; |
| 116 | Vector<const ScriptKernelID *> kernels; |
| 117 | bool fieldDep = false; |
| 118 | |
| 119 | for (size_t ct=0; ct < mSG->mNodes.size(); ct++) { |
| 120 | ScriptGroup::Node *n = mSG->mNodes[ct]; |
| 121 | Script *s = n->mKernels[0]->mScript; |
| Stephen Hines | c78839b | 2013-09-10 17:40:41 -0700 | [diff] [blame] | 122 | if (s->hasObjectSlots()) { |
| 123 | // Disable the ScriptGroup optimization if we have global RS |
| 124 | // objects that might interfere between kernels. |
| 125 | fieldDep = true; |
| 126 | } |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 127 | |
| 128 | //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); |
| 129 | |
| 130 | for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { |
| 131 | if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { |
| 132 | //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); |
| 133 | s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { |
| 138 | const ScriptKernelID *k = n->mKernels[ct2]; |
| 139 | Allocation *ain = NULL; |
| 140 | Allocation *aout = NULL; |
| 141 | bool inExt = false; |
| 142 | bool outExt = false; |
| 143 | |
| Stephen Hines | c78839b | 2013-09-10 17:40:41 -0700 | [diff] [blame] | 144 | if (k->mScript->hasObjectSlots()) { |
| 145 | // Disable the ScriptGroup optimization if we have global RS |
| 146 | // objects that might interfere between kernels. |
| 147 | fieldDep = true; |
| 148 | } |
| 149 | |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 150 | for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { |
| 151 | if (n->mInputs[ct3]->mDstKernel.get() == k) { |
| 152 | ain = n->mInputs[ct3]->mAlloc.get(); |
| 153 | //ALOGE(" link in %p", ain); |
| 154 | } |
| 155 | } |
| 156 | for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) { |
| 157 | if (mSG->mInputs[ct3]->mKernel == k) { |
| 158 | ain = mSG->mInputs[ct3]->mAlloc.get(); |
| 159 | inExt = true; |
| 160 | //ALOGE(" io in %p", ain); |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { |
| 165 | if (n->mOutputs[ct3]->mSource.get() == k) { |
| 166 | aout = n->mOutputs[ct3]->mAlloc.get(); |
| 167 | if(n->mOutputs[ct3]->mDstField.get() != NULL) { |
| 168 | fieldDep = true; |
| 169 | } |
| 170 | //ALOGE(" link out %p", aout); |
| 171 | } |
| 172 | } |
| 173 | for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) { |
| 174 | if (mSG->mOutputs[ct3]->mKernel == k) { |
| 175 | aout = mSG->mOutputs[ct3]->mAlloc.get(); |
| 176 | outExt = true; |
| 177 | //ALOGE(" io out %p", aout); |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | if ((k->mHasKernelOutput == (aout != NULL)) && |
| 182 | (k->mHasKernelInput == (ain != NULL))) { |
| 183 | ins.add(ain); |
| 184 | inExts.add(inExt); |
| 185 | outs.add(aout); |
| 186 | outExts.add(outExt); |
| 187 | kernels.add(k); |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | } |
| 192 | |
| 193 | MTLaunchStruct mtls; |
| 194 | |
| 195 | if(fieldDep) { |
| 196 | for (size_t ct=0; ct < ins.size(); ct++) { |
| 197 | Script *s = kernels[ct]->mScript; |
| 198 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
| 199 | uint32_t slot = kernels[ct]->mSlot; |
| 200 | |
| 201 | si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls); |
| 202 | si->forEachKernelSetup(slot, &mtls); |
| Stephen Hines | c78839b | 2013-09-10 17:40:41 -0700 | [diff] [blame] | 203 | si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 204 | mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls); |
| Stephen Hines | c78839b | 2013-09-10 17:40:41 -0700 | [diff] [blame] | 205 | si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL); |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 206 | } |
| 207 | } else { |
| 208 | ScriptList sl; |
| 209 | sl.ins = ins.array(); |
| 210 | sl.outs = outs.array(); |
| 211 | sl.kernels = kernels.array(); |
| 212 | sl.count = kernels.size(); |
| 213 | |
| 214 | Vector<const void *> usrPtrs; |
| 215 | Vector<const void *> fnPtrs; |
| 216 | Vector<uint32_t> sigs; |
| 217 | for (size_t ct=0; ct < kernels.size(); ct++) { |
| 218 | Script *s = kernels[ct]->mScript; |
| 219 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
| 220 | |
| 221 | si->forEachKernelSetup(kernels[ct]->mSlot, &mtls); |
| 222 | fnPtrs.add((void *)mtls.kernel); |
| 223 | usrPtrs.add(mtls.fep.usr); |
| 224 | sigs.add(mtls.fep.usrLen); |
| Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 225 | si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 226 | } |
| 227 | sl.sigs = sigs.array(); |
| 228 | sl.usrPtrs = usrPtrs.array(); |
| 229 | sl.fnPtrs = fnPtrs.array(); |
| 230 | sl.inExts = inExts.array(); |
| 231 | sl.outExts = outExts.array(); |
| 232 | |
| 233 | Script *s = kernels[0]->mScript; |
| 234 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
| 235 | si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls); |
| 236 | mtls.script = NULL; |
| 237 | mtls.kernel = (void (*)())&scriptGroupRoot; |
| 238 | mtls.fep.usr = &sl; |
| 239 | mCtx->launchThreads(ins[0], outs[0], NULL, &mtls); |
| Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 240 | |
| 241 | for (size_t ct=0; ct < kernels.size(); ct++) { |
| 242 | Script *s = kernels[ct]->mScript; |
| 243 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
| 244 | si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL); |
| 245 | } |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 246 | } |
| 247 | } |
| 248 | |
| Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 249 | |