Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2011 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "rsCpuCore.h" |
| 18 | #include "rsCpuScript.h" |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 19 | #include "rsScriptGroup.h" |
| 20 | #include "rsCpuScriptGroup.h" |
| 21 | //#include "rsdBcc.h" |
| 22 | //#include "rsdAllocation.h" |
| 23 | |
| 24 | using namespace android; |
| 25 | using namespace android::renderscript; |
| 26 | |
| 27 | CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) { |
| 28 | mCtx = ctx; |
| 29 | mSG = sg; |
| 30 | } |
| 31 | |
| 32 | CpuScriptGroupImpl::~CpuScriptGroupImpl() { |
| 33 | |
| 34 | } |
| 35 | |
| 36 | bool CpuScriptGroupImpl::init() { |
| 37 | return true; |
| 38 | } |
| 39 | |
| 40 | void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) { |
| 41 | } |
| 42 | |
| 43 | void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) { |
| 44 | } |
| 45 | |
| 46 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 47 | typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams, |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 48 | uint32_t xstart, uint32_t xend, |
| 49 | uint32_t instep, uint32_t outstep); |
| 50 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 51 | void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams, |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 52 | uint32_t xstart, uint32_t xend, |
| 53 | uint32_t instep, uint32_t outstep) { |
| 54 | |
| 55 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 56 | const ScriptList *sl = (const ScriptList *)kparams->usr; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 57 | RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 58 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 59 | const void **oldIns = mkparams->ins; |
| 60 | uint32_t *oldStrides = mkparams->inEStrides; |
| 61 | |
| 62 | void *localIns[1]; |
| 63 | uint32_t localStride[1]; |
| 64 | |
| 65 | mkparams->ins = (const void**)localIns; |
| 66 | mkparams->inEStrides = localStride; |
| 67 | |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 68 | for (size_t ct = 0; ct < sl->count; ct++) { |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 69 | ScriptGroupRootFunc_t func; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 70 | func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; |
| 71 | mkparams->usr = sl->usrPtrs[ct]; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 72 | |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 73 | if (sl->ins[ct]) { |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 74 | localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; |
Stephen Hines | 4b2bea3 | 2014-08-13 17:32:10 +0000 | [diff] [blame] | 75 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 76 | localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 77 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 78 | if (sl->inExts[ct]) { |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 79 | localIns[0] = (void*) |
| 80 | ((const uint8_t *)localIns[0] + |
| 81 | sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y); |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 82 | |
| 83 | } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) { |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 84 | localIns[0] = (void*) |
| 85 | ((const uint8_t *)localIns[0] + |
| 86 | sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 87 | } |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 88 | |
| 89 | } else { |
| 90 | localIns[0] = NULL; |
| 91 | localStride[0] = 0; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 92 | } |
| 93 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 94 | uint32_t ostep; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 95 | if (sl->outs[ct]) { |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 96 | mkparams->out = |
| 97 | (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; |
| 98 | |
Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 99 | ostep = sl->outs[ct]->mHal.state.elementSizeBytes; |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 100 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 101 | if (sl->outExts[ct]) { |
Chris Wailes | 80ef693 | 2014-07-08 11:22:18 -0700 | [diff] [blame] | 102 | mkparams->out = |
| 103 | (uint8_t *)mkparams->out + |
| 104 | sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y; |
| 105 | |
| 106 | } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) { |
| 107 | mkparams->out = |
| 108 | (uint8_t *)mkparams->out + |
| 109 | sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 110 | } |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 111 | } else { |
| 112 | mkparams->out = NULL; |
| 113 | ostep = 0; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 114 | } |
| 115 | |
| 116 | //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 117 | /* |
| 118 | * The fourth argument is zero here because kernels get their stride |
| 119 | * information from a member of p that points to an array. |
| 120 | */ |
| 121 | func(kparams, xstart, xend, 0, ostep); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 122 | } |
| 123 | //ALOGE("script group root"); |
| 124 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 125 | mkparams->ins = oldIns; |
| 126 | mkparams->inEStrides = oldStrides; |
| 127 | mkparams->usr = sl; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | |
| 131 | |
| 132 | void CpuScriptGroupImpl::execute() { |
| 133 | Vector<Allocation *> ins; |
| 134 | Vector<bool> inExts; |
| 135 | Vector<Allocation *> outs; |
| 136 | Vector<bool> outExts; |
| 137 | Vector<const ScriptKernelID *> kernels; |
| 138 | bool fieldDep = false; |
| 139 | |
| 140 | for (size_t ct=0; ct < mSG->mNodes.size(); ct++) { |
| 141 | ScriptGroup::Node *n = mSG->mNodes[ct]; |
| 142 | Script *s = n->mKernels[0]->mScript; |
Stephen Hines | c78839b | 2013-09-10 17:40:41 -0700 | [diff] [blame] | 143 | if (s->hasObjectSlots()) { |
| 144 | // Disable the ScriptGroup optimization if we have global RS |
| 145 | // objects that might interfere between kernels. |
| 146 | fieldDep = true; |
| 147 | } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 148 | |
| 149 | //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); |
| 150 | |
| 151 | for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { |
| 152 | if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { |
| 153 | //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); |
| 154 | s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { |
| 159 | const ScriptKernelID *k = n->mKernels[ct2]; |
| 160 | Allocation *ain = NULL; |
| 161 | Allocation *aout = NULL; |
| 162 | bool inExt = false; |
| 163 | bool outExt = false; |
| 164 | |
| 165 | for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { |
| 166 | if (n->mInputs[ct3]->mDstKernel.get() == k) { |
| 167 | ain = n->mInputs[ct3]->mAlloc.get(); |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 168 | break; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 169 | } |
| 170 | } |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 171 | if (ain == NULL) { |
| 172 | for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) { |
| 173 | if (mSG->mInputs[ct3]->mKernel == k) { |
| 174 | ain = mSG->mInputs[ct3]->mAlloc.get(); |
| 175 | inExt = true; |
| 176 | break; |
| 177 | } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 178 | } |
| 179 | } |
| 180 | |
| 181 | for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { |
| 182 | if (n->mOutputs[ct3]->mSource.get() == k) { |
| 183 | aout = n->mOutputs[ct3]->mAlloc.get(); |
| 184 | if(n->mOutputs[ct3]->mDstField.get() != NULL) { |
| 185 | fieldDep = true; |
| 186 | } |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 187 | break; |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 188 | } |
| 189 | } |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 190 | if (aout == NULL) { |
| 191 | for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) { |
| 192 | if (mSG->mOutputs[ct3]->mKernel == k) { |
| 193 | aout = mSG->mOutputs[ct3]->mAlloc.get(); |
| 194 | outExt = true; |
| 195 | break; |
| 196 | } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 197 | } |
| 198 | } |
| 199 | |
Yang Ni | 5f6f16f | 2014-07-25 13:51:09 -0700 | [diff] [blame] | 200 | rsAssert((k->mHasKernelOutput == (aout != NULL)) && |
| 201 | (k->mHasKernelInput == (ain != NULL))); |
| 202 | |
| 203 | ins.add(ain); |
| 204 | inExts.add(inExt); |
| 205 | outs.add(aout); |
| 206 | outExts.add(outExt); |
| 207 | kernels.add(k); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 208 | } |
| 209 | |
| 210 | } |
| 211 | |
| 212 | MTLaunchStruct mtls; |
| 213 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 214 | if (fieldDep) { |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 215 | for (size_t ct=0; ct < ins.size(); ct++) { |
| 216 | Script *s = kernels[ct]->mScript; |
| 217 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
| 218 | uint32_t slot = kernels[ct]->mSlot; |
| 219 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 220 | uint32_t inLen; |
| 221 | const Allocation **ains; |
| 222 | |
| 223 | if (ins[ct] == NULL) { |
| 224 | inLen = 0; |
| 225 | ains = NULL; |
| 226 | |
| 227 | } else { |
| 228 | inLen = 1; |
| 229 | ains = const_cast<const Allocation**>(&ins[ct]); |
| 230 | } |
| 231 | |
| 232 | si->forEachMtlsSetup(ains, inLen, outs[ct], NULL, 0, NULL, &mtls); |
| 233 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 234 | si->forEachKernelSetup(slot, &mtls); |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 235 | si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr, |
| 236 | mtls.fep.usrLen, NULL); |
| 237 | |
| 238 | mCtx->launchThreads(ains, inLen, outs[ct], NULL, &mtls); |
| 239 | |
| 240 | si->postLaunch(slot, ains, inLen, outs[ct], NULL, 0, NULL); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 241 | } |
| 242 | } else { |
| 243 | ScriptList sl; |
| 244 | sl.ins = ins.array(); |
| 245 | sl.outs = outs.array(); |
| 246 | sl.kernels = kernels.array(); |
| 247 | sl.count = kernels.size(); |
| 248 | |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 249 | uint32_t inLen; |
| 250 | const Allocation **ains; |
| 251 | |
| 252 | if (ins[0] == NULL) { |
| 253 | inLen = 0; |
| 254 | ains = NULL; |
| 255 | |
| 256 | } else { |
| 257 | inLen = 1; |
| 258 | ains = const_cast<const Allocation**>(&ins[0]); |
| 259 | } |
| 260 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 261 | Vector<const void *> usrPtrs; |
| 262 | Vector<const void *> fnPtrs; |
| 263 | Vector<uint32_t> sigs; |
| 264 | for (size_t ct=0; ct < kernels.size(); ct++) { |
| 265 | Script *s = kernels[ct]->mScript; |
| 266 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
| 267 | |
| 268 | si->forEachKernelSetup(kernels[ct]->mSlot, &mtls); |
| 269 | fnPtrs.add((void *)mtls.kernel); |
| 270 | usrPtrs.add(mtls.fep.usr); |
| 271 | sigs.add(mtls.fep.usrLen); |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 272 | si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], |
| 273 | mtls.fep.usr, mtls.fep.usrLen, NULL); |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 274 | } |
| 275 | sl.sigs = sigs.array(); |
| 276 | sl.usrPtrs = usrPtrs.array(); |
| 277 | sl.fnPtrs = fnPtrs.array(); |
| 278 | sl.inExts = inExts.array(); |
| 279 | sl.outExts = outExts.array(); |
| 280 | |
| 281 | Script *s = kernels[0]->mScript; |
| 282 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 283 | |
| 284 | si->forEachMtlsSetup(ains, inLen, outs[0], NULL, 0, NULL, &mtls); |
| 285 | |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 286 | mtls.script = NULL; |
| 287 | mtls.kernel = (void (*)())&scriptGroupRoot; |
| 288 | mtls.fep.usr = &sl; |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 289 | |
| 290 | mCtx->launchThreads(ains, inLen, outs[0], NULL, &mtls); |
Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 291 | |
| 292 | for (size_t ct=0; ct < kernels.size(); ct++) { |
| 293 | Script *s = kernels[ct]->mScript; |
| 294 | RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); |
Chris Wailes | f371213 | 2014-07-16 15:18:30 -0700 | [diff] [blame^] | 295 | si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], NULL, 0, |
| 296 | NULL); |
Jason Sams | 17e3cdc | 2013-09-09 17:32:16 -0700 | [diff] [blame] | 297 | } |
Jason Sams | 709a097 | 2012-11-15 18:18:04 -0800 | [diff] [blame] | 298 | } |
| 299 | } |