blob: cc2933c13179e8fc9bf86c71fa1461c67ac98c29 [file] [log] [blame]
Jason Sams709a0972012-11-15 18:18:04 -08001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
Jason Sams709a0972012-11-15 18:18:04 -080019#include "rsScriptGroup.h"
20#include "rsCpuScriptGroup.h"
Jason Sams709a0972012-11-15 18:18:04 -080021
Miao Wang82e135c2017-02-27 23:35:35 -080022#include <vector>
23
Chih-Hung Hsieh462de212016-11-16 11:33:57 -080024namespace android {
25namespace renderscript {
Jason Sams709a0972012-11-15 18:18:04 -080026
Yang Ni1ffd86b2015-01-07 09:16:40 -080027CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
Jason Sams709a0972012-11-15 18:18:04 -080028 mCtx = ctx;
Yang Ni1ffd86b2015-01-07 09:16:40 -080029 mSG = (ScriptGroup*)sg;
Jason Sams709a0972012-11-15 18:18:04 -080030}
31
32CpuScriptGroupImpl::~CpuScriptGroupImpl() {
33
34}
35
36bool CpuScriptGroupImpl::init() {
37 return true;
38}
39
40void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41}
42
43void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44}
45
46
David Grossb0abb142015-03-12 15:23:03 -070047typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
Jason Sams709a0972012-11-15 18:18:04 -080048 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070049 uint32_t outstep);
Jason Sams709a0972012-11-15 18:18:04 -080050
David Grossb0abb142015-03-12 15:23:03 -070051void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
Jason Sams709a0972012-11-15 18:18:04 -080052 uint32_t xstart, uint32_t xend,
Chris Wailes9ed79102014-07-25 15:53:28 -070053 uint32_t outstep) {
Jason Sams709a0972012-11-15 18:18:04 -080054
55
David Grossb0abb142015-03-12 15:23:03 -070056 const ScriptList *sl = (const ScriptList *)kinfo->usr;
57 RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
Jason Sams709a0972012-11-15 18:18:04 -080058
David Grossb0abb142015-03-12 15:23:03 -070059 const uint32_t oldInStride = mkinfo->inStride[0];
Chris Wailesf3712132014-07-16 15:18:30 -070060
Chris Wailes80ef6932014-07-08 11:22:18 -070061 for (size_t ct = 0; ct < sl->count; ct++) {
Jason Sams709a0972012-11-15 18:18:04 -080062 ScriptGroupRootFunc_t func;
Chris Wailes80ef6932014-07-08 11:22:18 -070063 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
David Grossb0abb142015-03-12 15:23:03 -070064 mkinfo->usr = sl->usrPtrs[ct];
Jason Sams709a0972012-11-15 18:18:04 -080065
Stephen Hines4b2bea32014-08-13 17:32:10 +000066 if (sl->ins[ct]) {
David Grossb0abb142015-03-12 15:23:03 -070067 rsAssert(kinfo->inLen == 1);
Stephen Hines4b2bea32014-08-13 17:32:10 +000068
David Grossb0abb142015-03-12 15:23:03 -070069 mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
70
71 mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
Chris Wailes80ef6932014-07-08 11:22:18 -070072
Jason Sams709a0972012-11-15 18:18:04 -080073 if (sl->inExts[ct]) {
David Grossb0abb142015-03-12 15:23:03 -070074 mkinfo->inPtr[0] =
75 (mkinfo->inPtr[0] +
76 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
Chris Wailes80ef6932014-07-08 11:22:18 -070077
David Grossb0abb142015-03-12 15:23:03 -070078 } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
79 mkinfo->inPtr[0] =
80 (mkinfo->inPtr[0] +
81 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
Jason Sams709a0972012-11-15 18:18:04 -080082 }
Chris Wailesf3712132014-07-16 15:18:30 -070083
84 } else {
David Grossb0abb142015-03-12 15:23:03 -070085 rsAssert(kinfo->inLen == 0);
86
87 mkinfo->inPtr[0] = nullptr;
88 mkinfo->inStride[0] = 0;
Jason Sams709a0972012-11-15 18:18:04 -080089 }
90
Chris Wailesf3712132014-07-16 15:18:30 -070091 uint32_t ostep;
Jason Sams709a0972012-11-15 18:18:04 -080092 if (sl->outs[ct]) {
David Grossb0abb142015-03-12 15:23:03 -070093 rsAssert(kinfo->outLen == 1);
94
95 mkinfo->outPtr[0] =
Chris Wailes80ef6932014-07-08 11:22:18 -070096 (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
97
Jason Sams17e3cdc2013-09-09 17:32:16 -070098 ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
Chris Wailes80ef6932014-07-08 11:22:18 -070099
Jason Sams709a0972012-11-15 18:18:04 -0800100 if (sl->outExts[ct]) {
David Grossb0abb142015-03-12 15:23:03 -0700101 mkinfo->outPtr[0] =
102 mkinfo->outPtr[0] +
103 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
Chris Wailes80ef6932014-07-08 11:22:18 -0700104
David Grossb0abb142015-03-12 15:23:03 -0700105 } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
106 mkinfo->outPtr[0] =
107 mkinfo->outPtr[0] +
108 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
Jason Sams709a0972012-11-15 18:18:04 -0800109 }
Chris Wailesf3712132014-07-16 15:18:30 -0700110 } else {
David Grossb0abb142015-03-12 15:23:03 -0700111 rsAssert(kinfo->outLen == 0);
112
113 mkinfo->outPtr[0] = nullptr;
114 ostep = 0;
Jason Sams709a0972012-11-15 18:18:04 -0800115 }
116
117 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
David Grossb0abb142015-03-12 15:23:03 -0700118 func(kinfo, xstart, xend, ostep);
Jason Sams709a0972012-11-15 18:18:04 -0800119 }
120 //ALOGE("script group root");
121
David Grossb0abb142015-03-12 15:23:03 -0700122 mkinfo->inStride[0] = oldInStride;
123 mkinfo->usr = sl;
Jason Sams709a0972012-11-15 18:18:04 -0800124}
125
126
127
128void CpuScriptGroupImpl::execute() {
Miao Wang82e135c2017-02-27 23:35:35 -0800129 std::vector<Allocation *> ins;
130 std::vector<uint8_t> inExts;
131 std::vector<Allocation *> outs;
132 std::vector<uint8_t> outExts;
133 std::vector<const ScriptKernelID *> kernels;
Jason Sams709a0972012-11-15 18:18:04 -0800134 bool fieldDep = false;
135
136 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
137 ScriptGroup::Node *n = mSG->mNodes[ct];
138 Script *s = n->mKernels[0]->mScript;
Stephen Hinesc78839b2013-09-10 17:40:41 -0700139 if (s->hasObjectSlots()) {
140 // Disable the ScriptGroup optimization if we have global RS
141 // objects that might interfere between kernels.
142 fieldDep = true;
143 }
Jason Sams709a0972012-11-15 18:18:04 -0800144
145 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
146
147 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
148 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
149 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
150 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
151 }
152 }
153
154 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
155 const ScriptKernelID *k = n->mKernels[ct2];
Chris Wailes44bef6f2014-08-12 13:51:10 -0700156 Allocation *ain = nullptr;
157 Allocation *aout = nullptr;
Jason Sams709a0972012-11-15 18:18:04 -0800158 bool inExt = false;
159 bool outExt = false;
160
161 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
162 if (n->mInputs[ct3]->mDstKernel.get() == k) {
163 ain = n->mInputs[ct3]->mAlloc.get();
Yang Ni5f6f16f2014-07-25 13:51:09 -0700164 break;
Jason Sams709a0972012-11-15 18:18:04 -0800165 }
166 }
Chris Wailes44bef6f2014-08-12 13:51:10 -0700167 if (ain == nullptr) {
Yang Ni5f6f16f2014-07-25 13:51:09 -0700168 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
169 if (mSG->mInputs[ct3]->mKernel == k) {
170 ain = mSG->mInputs[ct3]->mAlloc.get();
171 inExt = true;
172 break;
173 }
Jason Sams709a0972012-11-15 18:18:04 -0800174 }
175 }
176
177 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
178 if (n->mOutputs[ct3]->mSource.get() == k) {
179 aout = n->mOutputs[ct3]->mAlloc.get();
Chris Wailes44bef6f2014-08-12 13:51:10 -0700180 if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
Jason Sams709a0972012-11-15 18:18:04 -0800181 fieldDep = true;
182 }
Yang Ni5f6f16f2014-07-25 13:51:09 -0700183 break;
Jason Sams709a0972012-11-15 18:18:04 -0800184 }
185 }
Chris Wailes44bef6f2014-08-12 13:51:10 -0700186 if (aout == nullptr) {
Yang Ni5f6f16f2014-07-25 13:51:09 -0700187 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
188 if (mSG->mOutputs[ct3]->mKernel == k) {
189 aout = mSG->mOutputs[ct3]->mAlloc.get();
190 outExt = true;
191 break;
192 }
Jason Sams709a0972012-11-15 18:18:04 -0800193 }
194 }
195
Chris Wailes44bef6f2014-08-12 13:51:10 -0700196 rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
197 (k->mHasKernelInput == (ain != nullptr)));
Yang Ni5f6f16f2014-07-25 13:51:09 -0700198
Miao Wang82e135c2017-02-27 23:35:35 -0800199 ins.push_back(ain);
200 inExts.push_back(inExt);
201 outs.push_back(aout);
202 outExts.push_back(outExt);
203 kernels.push_back(k);
Jason Sams709a0972012-11-15 18:18:04 -0800204 }
205
206 }
207
Matt Wala14ce0072015-07-30 17:30:25 -0700208 MTLaunchStructForEach mtls;
Jason Sams709a0972012-11-15 18:18:04 -0800209
Chris Wailesf3712132014-07-16 15:18:30 -0700210 if (fieldDep) {
Jason Sams709a0972012-11-15 18:18:04 -0800211 for (size_t ct=0; ct < ins.size(); ct++) {
212 Script *s = kernels[ct]->mScript;
213 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
214 uint32_t slot = kernels[ct]->mSlot;
215
Chris Wailesf3712132014-07-16 15:18:30 -0700216 uint32_t inLen;
217 const Allocation **ains;
218
Chris Wailes44bef6f2014-08-12 13:51:10 -0700219 if (ins[ct] == nullptr) {
Chris Wailesf3712132014-07-16 15:18:30 -0700220 inLen = 0;
Chris Wailes44bef6f2014-08-12 13:51:10 -0700221 ains = nullptr;
Chris Wailesf3712132014-07-16 15:18:30 -0700222
223 } else {
224 inLen = 1;
225 ains = const_cast<const Allocation**>(&ins[ct]);
226 }
227
Jason Samsbf2111d2015-01-26 18:13:41 -0800228 bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
Chris Wailesf3712132014-07-16 15:18:30 -0700229
Jason Sams709a0972012-11-15 18:18:04 -0800230 si->forEachKernelSetup(slot, &mtls);
Chris Wailesf3712132014-07-16 15:18:30 -0700231 si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
Chris Wailes44bef6f2014-08-12 13:51:10 -0700232 mtls.fep.usrLen, nullptr);
Chris Wailesf3712132014-07-16 15:18:30 -0700233
Jason Samsbf2111d2015-01-26 18:13:41 -0800234 if (launchOK) {
Matt Wala14ce0072015-07-30 17:30:25 -0700235 mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls);
Jason Samsbf2111d2015-01-26 18:13:41 -0800236 }
Chris Wailesf3712132014-07-16 15:18:30 -0700237
Chris Wailes44bef6f2014-08-12 13:51:10 -0700238 si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
Jason Sams709a0972012-11-15 18:18:04 -0800239 }
240 } else {
241 ScriptList sl;
Miao Wang82e135c2017-02-27 23:35:35 -0800242 sl.ins = ins.data();
243 sl.outs = outs.data();
244 sl.kernels = kernels.data();
Yang Nib8353c52015-02-14 18:00:59 -0800245 sl.count = kernels.size();
Jason Sams709a0972012-11-15 18:18:04 -0800246
Chris Wailesf3712132014-07-16 15:18:30 -0700247 uint32_t inLen;
248 const Allocation **ains;
249
Chris Wailes44bef6f2014-08-12 13:51:10 -0700250 if (ins[0] == nullptr) {
Chris Wailesf3712132014-07-16 15:18:30 -0700251 inLen = 0;
Chris Wailes44bef6f2014-08-12 13:51:10 -0700252 ains = nullptr;
Chris Wailesf3712132014-07-16 15:18:30 -0700253
254 } else {
255 inLen = 1;
256 ains = const_cast<const Allocation**>(&ins[0]);
257 }
258
Miao Wang82e135c2017-02-27 23:35:35 -0800259 std::vector<const void *> usrPtrs;
260 std::vector<const void *> fnPtrs;
261 std::vector<uint32_t> sigs;
Jason Sams709a0972012-11-15 18:18:04 -0800262 for (size_t ct=0; ct < kernels.size(); ct++) {
263 Script *s = kernels[ct]->mScript;
264 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
265
266 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
Miao Wang82e135c2017-02-27 23:35:35 -0800267 fnPtrs.push_back((void *)mtls.kernel);
268 usrPtrs.push_back(mtls.fep.usr);
269 sigs.push_back(mtls.fep.usrLen);
Chris Wailesf3712132014-07-16 15:18:30 -0700270 si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
Chris Wailes44bef6f2014-08-12 13:51:10 -0700271 mtls.fep.usr, mtls.fep.usrLen, nullptr);
Jason Sams709a0972012-11-15 18:18:04 -0800272 }
Miao Wang82e135c2017-02-27 23:35:35 -0800273 sl.sigs = sigs.data();
274 sl.usrPtrs = usrPtrs.data();
275 sl.fnPtrs = fnPtrs.data();
276 sl.inExts = inExts.data();
277 sl.outExts = outExts.data();
Jason Sams709a0972012-11-15 18:18:04 -0800278
279 Script *s = kernels[0]->mScript;
280 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
Chris Wailesf3712132014-07-16 15:18:30 -0700281
Jason Samsbf2111d2015-01-26 18:13:41 -0800282 if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
Chris Wailesf3712132014-07-16 15:18:30 -0700283
Jason Samsbf2111d2015-01-26 18:13:41 -0800284 mtls.script = nullptr;
Matt Wala14ce0072015-07-30 17:30:25 -0700285 mtls.kernel = &scriptGroupRoot;
Jason Samsbf2111d2015-01-26 18:13:41 -0800286 mtls.fep.usr = &sl;
Chris Wailesf3712132014-07-16 15:18:30 -0700287
Matt Wala14ce0072015-07-30 17:30:25 -0700288 mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls);
Jason Samsbf2111d2015-01-26 18:13:41 -0800289 }
Jason Sams17e3cdc2013-09-09 17:32:16 -0700290
291 for (size_t ct=0; ct < kernels.size(); ct++) {
292 Script *s = kernels[ct]->mScript;
293 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
Chris Wailes44bef6f2014-08-12 13:51:10 -0700294 si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
295 nullptr);
Jason Sams17e3cdc2013-09-09 17:32:16 -0700296 }
Jason Sams709a0972012-11-15 18:18:04 -0800297 }
298}
Chih-Hung Hsieh462de212016-11-16 11:33:57 -0800299
300} // namespace renderscript
301} // namespace android