blob: 3d32a5129956f35767da6c2c6d79f5359ad87e9d [file] [log] [blame]
Jason Sams709a0972012-11-15 18:18:04 -08001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
Jason Sams709a0972012-11-15 18:18:04 -080019#include "rsScriptGroup.h"
20#include "rsCpuScriptGroup.h"
Jason Sams709a0972012-11-15 18:18:04 -080021
22using namespace android;
23using namespace android::renderscript;
24
Yang Ni1ffd86b2015-01-07 09:16:40 -080025CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
Jason Sams709a0972012-11-15 18:18:04 -080026 mCtx = ctx;
Yang Ni1ffd86b2015-01-07 09:16:40 -080027 mSG = (ScriptGroup*)sg;
Jason Sams709a0972012-11-15 18:18:04 -080028}
29
30CpuScriptGroupImpl::~CpuScriptGroupImpl() {
31
32}
33
34bool CpuScriptGroupImpl::init() {
35 return true;
36}
37
38void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
39}
40
41void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
42}
43
44
Chris Wailes80ef6932014-07-08 11:22:18 -070045typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams,
Jason Sams709a0972012-11-15 18:18:04 -080046 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070047 uint32_t outstep);
Jason Sams709a0972012-11-15 18:18:04 -080048
Chris Wailes80ef6932014-07-08 11:22:18 -070049void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
Jason Sams709a0972012-11-15 18:18:04 -080050 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070051 uint32_t outstep) {
Jason Sams709a0972012-11-15 18:18:04 -080052
53
Chris Wailesf3712132014-07-16 15:18:30 -070054 const ScriptList *sl = (const ScriptList *)kparams->usr;
Chris Wailes80ef6932014-07-08 11:22:18 -070055 RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
Jason Sams709a0972012-11-15 18:18:04 -080056
Chris Wailesf3712132014-07-16 15:18:30 -070057 const void **oldIns = mkparams->ins;
58 uint32_t *oldStrides = mkparams->inEStrides;
59
60 void *localIns[1];
61 uint32_t localStride[1];
62
63 mkparams->ins = (const void**)localIns;
64 mkparams->inEStrides = localStride;
65
Chris Wailes80ef6932014-07-08 11:22:18 -070066 for (size_t ct = 0; ct < sl->count; ct++) {
Jason Sams709a0972012-11-15 18:18:04 -080067 ScriptGroupRootFunc_t func;
Chris Wailes80ef6932014-07-08 11:22:18 -070068 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
69 mkparams->usr = sl->usrPtrs[ct];
Jason Sams709a0972012-11-15 18:18:04 -080070
Stephen Hines4b2bea32014-08-13 17:32:10 +000071 if (sl->ins[ct]) {
Chris Wailesf3712132014-07-16 15:18:30 -070072 localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
Stephen Hines4b2bea32014-08-13 17:32:10 +000073
Chris Wailesf3712132014-07-16 15:18:30 -070074 localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
Chris Wailes80ef6932014-07-08 11:22:18 -070075
Jason Sams709a0972012-11-15 18:18:04 -080076 if (sl->inExts[ct]) {
Chris Wailesf3712132014-07-16 15:18:30 -070077 localIns[0] = (void*)
78 ((const uint8_t *)localIns[0] +
79 sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y);
Chris Wailes80ef6932014-07-08 11:22:18 -070080
81 } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
Chris Wailesf3712132014-07-16 15:18:30 -070082 localIns[0] = (void*)
83 ((const uint8_t *)localIns[0] +
84 sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid);
Jason Sams709a0972012-11-15 18:18:04 -080085 }
Chris Wailesf3712132014-07-16 15:18:30 -070086
87 } else {
Chris Wailes44bef6f2014-08-12 13:51:10 -070088 localIns[0] = nullptr;
Chris Wailesf3712132014-07-16 15:18:30 -070089 localStride[0] = 0;
Jason Sams709a0972012-11-15 18:18:04 -080090 }
91
Chris Wailesf3712132014-07-16 15:18:30 -070092 uint32_t ostep;
Jason Sams709a0972012-11-15 18:18:04 -080093 if (sl->outs[ct]) {
Chris Wailes80ef6932014-07-08 11:22:18 -070094 mkparams->out =
95 (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
96
Jason Sams17e3cdc2013-09-09 17:32:16 -070097 ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
Chris Wailes80ef6932014-07-08 11:22:18 -070098
Jason Sams709a0972012-11-15 18:18:04 -080099 if (sl->outExts[ct]) {
Chris Wailes80ef6932014-07-08 11:22:18 -0700100 mkparams->out =
101 (uint8_t *)mkparams->out +
102 sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y;
103
104 } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
105 mkparams->out =
106 (uint8_t *)mkparams->out +
107 sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
Jason Sams709a0972012-11-15 18:18:04 -0800108 }
Chris Wailesf3712132014-07-16 15:18:30 -0700109 } else {
Chris Wailes44bef6f2014-08-12 13:51:10 -0700110 mkparams->out = nullptr;
Chris Wailesf3712132014-07-16 15:18:30 -0700111 ostep = 0;
Jason Sams709a0972012-11-15 18:18:04 -0800112 }
113
114 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
Chris Wailes9ed79102014-07-25 15:53:28 -0700115 func(kparams, xstart, xend, ostep);
Jason Sams709a0972012-11-15 18:18:04 -0800116 }
117 //ALOGE("script group root");
118
Chris Wailesf3712132014-07-16 15:18:30 -0700119 mkparams->ins = oldIns;
120 mkparams->inEStrides = oldStrides;
121 mkparams->usr = sl;
Jason Sams709a0972012-11-15 18:18:04 -0800122}
123
124
125
126void CpuScriptGroupImpl::execute() {
Chris Wailes93d6bc82014-07-28 16:54:38 -0700127 std::vector<Allocation *> ins;
128 std::vector<char> inExts;
129 std::vector<Allocation *> outs;
130 std::vector<char> outExts;
131 std::vector<const ScriptKernelID *> kernels;
Jason Sams709a0972012-11-15 18:18:04 -0800132 bool fieldDep = false;
133
134 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
135 ScriptGroup::Node *n = mSG->mNodes[ct];
136 Script *s = n->mKernels[0]->mScript;
Stephen Hinesc78839b2013-09-10 17:40:41 -0700137 if (s->hasObjectSlots()) {
138 // Disable the ScriptGroup optimization if we have global RS
139 // objects that might interfere between kernels.
140 fieldDep = true;
141 }
Jason Sams709a0972012-11-15 18:18:04 -0800142
143 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
144
145 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
146 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
147 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
148 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
149 }
150 }
151
152 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
153 const ScriptKernelID *k = n->mKernels[ct2];
Chris Wailes44bef6f2014-08-12 13:51:10 -0700154 Allocation *ain = nullptr;
155 Allocation *aout = nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800156 bool inExt = false;
157 bool outExt = false;
158
159 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
160 if (n->mInputs[ct3]->mDstKernel.get() == k) {
161 ain = n->mInputs[ct3]->mAlloc.get();
Yang Ni5f6f16f2014-07-25 13:51:09 -0700162 break;
Jason Sams709a0972012-11-15 18:18:04 -0800163 }
164 }
Chris Wailes44bef6f2014-08-12 13:51:10 -0700165 if (ain == nullptr) {
Yang Ni5f6f16f2014-07-25 13:51:09 -0700166 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
167 if (mSG->mInputs[ct3]->mKernel == k) {
168 ain = mSG->mInputs[ct3]->mAlloc.get();
169 inExt = true;
170 break;
171 }
Jason Sams709a0972012-11-15 18:18:04 -0800172 }
173 }
174
175 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
176 if (n->mOutputs[ct3]->mSource.get() == k) {
177 aout = n->mOutputs[ct3]->mAlloc.get();
Chris Wailes44bef6f2014-08-12 13:51:10 -0700178 if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
Jason Sams709a0972012-11-15 18:18:04 -0800179 fieldDep = true;
180 }
Yang Ni5f6f16f2014-07-25 13:51:09 -0700181 break;
Jason Sams709a0972012-11-15 18:18:04 -0800182 }
183 }
Chris Wailes44bef6f2014-08-12 13:51:10 -0700184 if (aout == nullptr) {
Yang Ni5f6f16f2014-07-25 13:51:09 -0700185 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
186 if (mSG->mOutputs[ct3]->mKernel == k) {
187 aout = mSG->mOutputs[ct3]->mAlloc.get();
188 outExt = true;
189 break;
190 }
Jason Sams709a0972012-11-15 18:18:04 -0800191 }
192 }
193
Chris Wailes44bef6f2014-08-12 13:51:10 -0700194 rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
195 (k->mHasKernelInput == (ain != nullptr)));
Yang Ni5f6f16f2014-07-25 13:51:09 -0700196
Chris Wailes93d6bc82014-07-28 16:54:38 -0700197 ins.push_back(ain);
198 inExts.push_back(inExt);
199 outs.push_back(aout);
200 outExts.push_back(outExt);
201 kernels.push_back(k);
Jason Sams709a0972012-11-15 18:18:04 -0800202 }
203
204 }
205
206 MTLaunchStruct mtls;
207
Chris Wailesf3712132014-07-16 15:18:30 -0700208 if (fieldDep) {
Jason Sams709a0972012-11-15 18:18:04 -0800209 for (size_t ct=0; ct < ins.size(); ct++) {
210 Script *s = kernels[ct]->mScript;
211 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
212 uint32_t slot = kernels[ct]->mSlot;
213
Chris Wailesf3712132014-07-16 15:18:30 -0700214 uint32_t inLen;
215 const Allocation **ains;
216
Chris Wailes44bef6f2014-08-12 13:51:10 -0700217 if (ins[ct] == nullptr) {
Chris Wailesf3712132014-07-16 15:18:30 -0700218 inLen = 0;
Chris Wailes44bef6f2014-08-12 13:51:10 -0700219 ains = nullptr;
Chris Wailesf3712132014-07-16 15:18:30 -0700220
221 } else {
222 inLen = 1;
223 ains = const_cast<const Allocation**>(&ins[ct]);
224 }
225
Chris Wailes44bef6f2014-08-12 13:51:10 -0700226 si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
Chris Wailesf3712132014-07-16 15:18:30 -0700227
Jason Sams709a0972012-11-15 18:18:04 -0800228 si->forEachKernelSetup(slot, &mtls);
Chris Wailesf3712132014-07-16 15:18:30 -0700229 si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
Chris Wailes44bef6f2014-08-12 13:51:10 -0700230 mtls.fep.usrLen, nullptr);
Chris Wailesf3712132014-07-16 15:18:30 -0700231
Chris Wailes44bef6f2014-08-12 13:51:10 -0700232 mCtx->launchThreads(ains, inLen, outs[ct], nullptr, &mtls);
Chris Wailesf3712132014-07-16 15:18:30 -0700233
Chris Wailes44bef6f2014-08-12 13:51:10 -0700234 si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
Jason Sams709a0972012-11-15 18:18:04 -0800235 }
236 } else {
237 ScriptList sl;
Chris Wailes93d6bc82014-07-28 16:54:38 -0700238
239 /*
240 * TODO: This is a hacky way of doing this and should be replaced by a
241 * call to std::vector's data() member once we have a C++11
242 * version of the STL.
243 */
244 sl.ins = &ins.front();
245 sl.outs = &outs.front();
246 sl.kernels = &kernels.front();
247 sl.count = kernels.size();
Jason Sams709a0972012-11-15 18:18:04 -0800248
Chris Wailesf3712132014-07-16 15:18:30 -0700249 uint32_t inLen;
250 const Allocation **ains;
251
Chris Wailes44bef6f2014-08-12 13:51:10 -0700252 if (ins[0] == nullptr) {
Chris Wailesf3712132014-07-16 15:18:30 -0700253 inLen = 0;
Chris Wailes44bef6f2014-08-12 13:51:10 -0700254 ains = nullptr;
Chris Wailesf3712132014-07-16 15:18:30 -0700255
256 } else {
257 inLen = 1;
258 ains = const_cast<const Allocation**>(&ins[0]);
259 }
260
Chris Wailes93d6bc82014-07-28 16:54:38 -0700261 std::vector<const void *> usrPtrs;
262 std::vector<const void *> fnPtrs;
263 std::vector<uint32_t> sigs;
Jason Sams709a0972012-11-15 18:18:04 -0800264 for (size_t ct=0; ct < kernels.size(); ct++) {
265 Script *s = kernels[ct]->mScript;
266 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
267
268 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
Chris Wailes93d6bc82014-07-28 16:54:38 -0700269 fnPtrs.push_back((void *)mtls.kernel);
270 usrPtrs.push_back(mtls.fep.usr);
271 sigs.push_back(mtls.fep.usrLen);
Chris Wailesf3712132014-07-16 15:18:30 -0700272 si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
Chris Wailes44bef6f2014-08-12 13:51:10 -0700273 mtls.fep.usr, mtls.fep.usrLen, nullptr);
Jason Sams709a0972012-11-15 18:18:04 -0800274 }
Chris Wailes93d6bc82014-07-28 16:54:38 -0700275
276 sl.sigs = &sigs.front();
277 sl.usrPtrs = &usrPtrs.front();
278 sl.fnPtrs = &fnPtrs.front();
279
280 sl.inExts = (bool*)&inExts.front();
281 sl.outExts = (bool*)&outExts.front();
Jason Sams709a0972012-11-15 18:18:04 -0800282
283 Script *s = kernels[0]->mScript;
284 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
Chris Wailesf3712132014-07-16 15:18:30 -0700285
Chris Wailes44bef6f2014-08-12 13:51:10 -0700286 si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls);
Chris Wailesf3712132014-07-16 15:18:30 -0700287
Chris Wailes44bef6f2014-08-12 13:51:10 -0700288 mtls.script = nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800289 mtls.kernel = (void (*)())&scriptGroupRoot;
290 mtls.fep.usr = &sl;
Chris Wailesf3712132014-07-16 15:18:30 -0700291
Chris Wailes44bef6f2014-08-12 13:51:10 -0700292 mCtx->launchThreads(ains, inLen, outs[0], nullptr, &mtls);
Jason Sams17e3cdc2013-09-09 17:32:16 -0700293
294 for (size_t ct=0; ct < kernels.size(); ct++) {
295 Script *s = kernels[ct]->mScript;
296 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
Chris Wailes44bef6f2014-08-12 13:51:10 -0700297 si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
298 nullptr);
Jason Sams17e3cdc2013-09-09 17:32:16 -0700299 }
Jason Sams709a0972012-11-15 18:18:04 -0800300 }
301}