Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2011 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "rsCpuCore.h" |
| 18 | #include "rsCpuScript.h" |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 19 | #include "rsScriptGroup.h" |
| 20 | #include "rsCpuScriptGroup.h" |
| 21 | //#include "rsdBcc.h" |
| 22 | //#include "rsdAllocation.h" |
| 23 | |
| 24 | using namespace android; |
| 25 | using namespace android::renderscript; |
| 26 | |
| 27 | CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) { |
| 28 | mCtx = ctx; |
| 29 | mSG = sg; |
| 30 | } |
| 31 | |
| 32 | CpuScriptGroupImpl::~CpuScriptGroupImpl() { |
| 33 | |
| 34 | } |
| 35 | |
| 36 | bool CpuScriptGroupImpl::init() { |
| 37 | return true; |
| 38 | } |
| 39 | |
| 40 | void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) { |
| 41 | } |
| 42 | |
| 43 | void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) { |
| 44 | } |
| 45 | |
| 46 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 47 | typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams, |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 48 | uint32_t xstart, uint32_t xend, |
| 49 | uint32_t instep, uint32_t outstep); |
| 50 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 51 | void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams, |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 52 | uint32_t xstart, uint32_t xend, |
| 53 | uint32_t instep, uint32_t outstep) { |
| 54 | |
| 55 | |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 56 | const ScriptList *sl = (const ScriptList *)kparams->usr; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 57 | RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 58 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 59 | for (size_t ct = 0; ct < sl->count; ct++) { |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 60 | ScriptGroupRootFunc_t func; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 61 | func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; |
| 62 | mkparams->usr = sl->usrPtrs[ct]; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 63 | |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 64 | mkparams->in = NULL; |
| 65 | mkparams->out = NULL; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 66 | |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 67 | uint32_t istep = 0; |
| 68 | uint32_t ostep = 0; |
| 69 | |
| 70 | if (sl->ins[ct]) { |
| 71 | mkparams->in = |
| 72 | (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; |
| 73 | |
| 74 | istep = sl->ins[ct]->mHal.state.elementSizeBytes; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 75 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 76 | if (sl->inExts[ct]) { |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 77 | mkparams->in = |
| 78 | (const uint8_t *)mkparams->in + |
| 79 | sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 80 | |
| 81 | } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) { |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 82 | mkparams->in = |
| 83 | (const uint8_t *)mkparams->in + |
| 84 | sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 85 | } |
| 86 | } |
| 87 | |
| 88 | if (sl->outs[ct]) { |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 89 | mkparams->out = |
| 90 | (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; |
| 91 | |
Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 92 | ostep = sl->outs[ct]->mHal.state.elementSizeBytes; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 93 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 94 | if (sl->outExts[ct]) { |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 95 | mkparams->out = |
| 96 | (uint8_t *)mkparams->out + |
| 97 | sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y; |
| 98 | |
| 99 | } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) { |
| 100 | mkparams->out = |
| 101 | (uint8_t *)mkparams->out + |
| 102 | sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 103 | } |
| 104 | } |
| 105 | |
| 106 | //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 107 | func(kparams, xstart, xend, istep, ostep); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 108 | } |
| 109 | //ALOGE("script group root"); |
| 110 | |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 111 | mkparams->usr = sl; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 112 | } |
| 113 | |
| 114 | |
| 115 | |
| 116 | void CpuScriptGroupImpl::execute() { |
| 117 | Vector<Allocation *> ins; |
| 118 | Vector<bool> inExts; |
| 119 | Vector<Allocation *> outs; |
| 120 | Vector<bool> outExts; |
| 121 | Vector<const ScriptKernelID *> kernels; |
| 122 | bool fieldDep = false; |
| 123 | |
| 124 | for (size_t ct=0; ct < mSG->mNodes.size(); ct++) { |
| 125 | ScriptGroup::Node *n = mSG->mNodes[ct]; |
| 126 | Script *s = n->mKernels[0]->mScript; |
Stephen Hines | c78839b | 2013-09-10 17:40:41 -0700 | [diff] [blame] | 127 | if (s->hasObjectSlots()) { |
| 128 | // Disable the ScriptGroup optimization if we have global RS |
| 129 | // objects that might interfere between kernels. |
| 130 | fieldDep = true; |
| 131 | } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 132 | |
| 133 | //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); |
| 134 | |
| 135 | for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { |
| 136 | if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { |
| 137 | //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); |
| 138 | s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { |
| 143 | const ScriptKernelID *k = n->mKernels[ct2]; |
| 144 | Allocation *ain = NULL; |
| 145 | Allocation *aout = NULL; |
| 146 | bool inExt = false; |
| 147 | bool outExt = false; |
| 148 | |
| 149 | for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { |
| 150 | if (n->mInputs[ct3]->mDstKernel.get() == k) { |
| 151 | ain = n->mInputs[ct3]->mAlloc.get(); |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 152 | break; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 153 | } |
| 154 | } |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 155 | if (ain == NULL) { |
| 156 | for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) { |
| 157 | if (mSG->mInputs[ct3]->mKernel == k) { |
| 158 | ain = mSG->mInputs[ct3]->mAlloc.get(); |
| 159 | inExt = true; |
| 160 | break; |
| 161 | } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 162 | } |
| 163 | } |
| 164 | |
| 165 | for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { |
| 166 | if (n->mOutputs[ct3]->mSource.get() == k) { |
| 167 | aout = n->mOutputs[ct3]->mAlloc.get(); |
| 168 | if(n->mOutputs[ct3]->mDstField.get() != NULL) { |
| 169 | fieldDep = true; |
| 170 | } |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 171 | break; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 172 | } |
| 173 | } |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 174 | if (aout == NULL) { |
| 175 | for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) { |
| 176 | if (mSG->mOutputs[ct3]->mKernel == k) { |
| 177 | aout = mSG->mOutputs[ct3]->mAlloc.get(); |
| 178 | outExt = true; |
| 179 | break; |
| 180 | } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 181 | } |
| 182 | } |
| 183 | |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 184 | rsAssert((k->mHasKernelOutput == (aout != NULL)) && |
| 185 | (k->mHasKernelInput == (ain != NULL))); |
| 186 | |
| 187 | ins.add(ain); |
| 188 | inExts.add(inExt); |
| 189 | outs.add(aout); |
| 190 | outExts.add(outExt); |
| 191 | kernels.add(k); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 192 | } |
| 193 | |
| 194 | } |
| 195 | |
| 196 | MTLaunchStruct mtls; |
| 197 | |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 198 | if(fieldDep) { |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 199 | for (size_t ct=0; ct < ins.size(); ct++) { |
| 200 | Script *s = kernels[ct]->mScript; |
| 201 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
| 202 | uint32_t slot = kernels[ct]->mSlot; |
| 203 | |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 204 | si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 205 | si->forEachKernelSetup(slot, &mtls); |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 206 | si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); |
| 207 | mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls); |
| 208 | si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 209 | } |
| 210 | } else { |
| 211 | ScriptList sl; |
| 212 | sl.ins = ins.array(); |
| 213 | sl.outs = outs.array(); |
| 214 | sl.kernels = kernels.array(); |
| 215 | sl.count = kernels.size(); |
| 216 | |
| 217 | Vector<const void *> usrPtrs; |
| 218 | Vector<const void *> fnPtrs; |
| 219 | Vector<uint32_t> sigs; |
| 220 | for (size_t ct=0; ct < kernels.size(); ct++) { |
| 221 | Script *s = kernels[ct]->mScript; |
| 222 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
| 223 | |
| 224 | si->forEachKernelSetup(kernels[ct]->mSlot, &mtls); |
| 225 | fnPtrs.add((void *)mtls.kernel); |
| 226 | usrPtrs.add(mtls.fep.usr); |
| 227 | sigs.add(mtls.fep.usrLen); |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 228 | si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 229 | } |
| 230 | sl.sigs = sigs.array(); |
| 231 | sl.usrPtrs = usrPtrs.array(); |
| 232 | sl.fnPtrs = fnPtrs.array(); |
| 233 | sl.inExts = inExts.array(); |
| 234 | sl.outExts = outExts.array(); |
| 235 | |
| 236 | Script *s = kernels[0]->mScript; |
| 237 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 238 | si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 239 | mtls.script = NULL; |
| 240 | mtls.kernel = (void (*)())&scriptGroupRoot; |
| 241 | mtls.fep.usr = &sl; |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 242 | mCtx->launchThreads(ins[0], outs[0], NULL, &mtls); |
Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 243 | |
| 244 | for (size_t ct=0; ct < kernels.size(); ct++) { |
| 245 | Script *s = kernels[ct]->mScript; |
| 246 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame^] | 247 | si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL); |
Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 248 | } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 249 | } |
| 250 | } |