blob: 20ee09db1f2842a266956ffd999a0c9fa481bec2 [file] [log] [blame]
Jason Sams709a0972012-11-15 18:18:04 -08001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
Jason Sams709a0972012-11-15 18:18:04 -080019#include "rsScriptGroup.h"
20#include "rsCpuScriptGroup.h"
21//#include "rsdBcc.h"
22//#include "rsdAllocation.h"
23
24using namespace android;
25using namespace android::renderscript;
26
27CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
28 mCtx = ctx;
29 mSG = sg;
30}
31
32CpuScriptGroupImpl::~CpuScriptGroupImpl() {
33
34}
35
36bool CpuScriptGroupImpl::init() {
37 return true;
38}
39
40void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41}
42
43void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44}
45
46
Chris Wailes80ef6932014-07-08 11:22:18 -070047typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams,
Jason Sams709a0972012-11-15 18:18:04 -080048 uint32_t xstart, uint32_t xend,
49 uint32_t instep, uint32_t outstep);
50
Chris Wailes80ef6932014-07-08 11:22:18 -070051void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
Jason Sams709a0972012-11-15 18:18:04 -080052 uint32_t xstart, uint32_t xend,
53 uint32_t instep, uint32_t outstep) {
54
55
Chris Wailesf3712132014-07-16 15:18:30 -070056 const ScriptList *sl = (const ScriptList *)kparams->usr;
Chris Wailes80ef6932014-07-08 11:22:18 -070057 RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
Jason Sams709a0972012-11-15 18:18:04 -080058
Chris Wailesf3712132014-07-16 15:18:30 -070059 const void **oldIns = mkparams->ins;
60 uint32_t *oldStrides = mkparams->inEStrides;
61
62 void *localIns[1];
63 uint32_t localStride[1];
64
65 mkparams->ins = (const void**)localIns;
66 mkparams->inEStrides = localStride;
67
Chris Wailes80ef6932014-07-08 11:22:18 -070068 for (size_t ct = 0; ct < sl->count; ct++) {
Jason Sams709a0972012-11-15 18:18:04 -080069 ScriptGroupRootFunc_t func;
Chris Wailes80ef6932014-07-08 11:22:18 -070070 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
71 mkparams->usr = sl->usrPtrs[ct];
Jason Sams709a0972012-11-15 18:18:04 -080072
Stephen Hines4b2bea32014-08-13 17:32:10 +000073 if (sl->ins[ct]) {
Chris Wailesf3712132014-07-16 15:18:30 -070074 localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
Stephen Hines4b2bea32014-08-13 17:32:10 +000075
Chris Wailesf3712132014-07-16 15:18:30 -070076 localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
Chris Wailes80ef6932014-07-08 11:22:18 -070077
Jason Sams709a0972012-11-15 18:18:04 -080078 if (sl->inExts[ct]) {
Chris Wailesf3712132014-07-16 15:18:30 -070079 localIns[0] = (void*)
80 ((const uint8_t *)localIns[0] +
81 sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y);
Chris Wailes80ef6932014-07-08 11:22:18 -070082
83 } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
Chris Wailesf3712132014-07-16 15:18:30 -070084 localIns[0] = (void*)
85 ((const uint8_t *)localIns[0] +
86 sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid);
Jason Sams709a0972012-11-15 18:18:04 -080087 }
Chris Wailesf3712132014-07-16 15:18:30 -070088
89 } else {
90 localIns[0] = NULL;
91 localStride[0] = 0;
Jason Sams709a0972012-11-15 18:18:04 -080092 }
93
Chris Wailesf3712132014-07-16 15:18:30 -070094 uint32_t ostep;
Jason Sams709a0972012-11-15 18:18:04 -080095 if (sl->outs[ct]) {
Chris Wailes80ef6932014-07-08 11:22:18 -070096 mkparams->out =
97 (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
98
Jason Sams17e3cdc2013-09-09 17:32:16 -070099 ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
Chris Wailes80ef6932014-07-08 11:22:18 -0700100
Jason Sams709a0972012-11-15 18:18:04 -0800101 if (sl->outExts[ct]) {
Chris Wailes80ef6932014-07-08 11:22:18 -0700102 mkparams->out =
103 (uint8_t *)mkparams->out +
104 sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y;
105
106 } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
107 mkparams->out =
108 (uint8_t *)mkparams->out +
109 sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
Jason Sams709a0972012-11-15 18:18:04 -0800110 }
Chris Wailesf3712132014-07-16 15:18:30 -0700111 } else {
112 mkparams->out = NULL;
113 ostep = 0;
Jason Sams709a0972012-11-15 18:18:04 -0800114 }
115
116 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
Chris Wailesf3712132014-07-16 15:18:30 -0700117 /*
118 * The fourth argument is zero here because kernels get their stride
119 * information from a member of p that points to an array.
120 */
121 func(kparams, xstart, xend, 0, ostep);
Jason Sams709a0972012-11-15 18:18:04 -0800122 }
123 //ALOGE("script group root");
124
Chris Wailesf3712132014-07-16 15:18:30 -0700125 mkparams->ins = oldIns;
126 mkparams->inEStrides = oldStrides;
127 mkparams->usr = sl;
Jason Sams709a0972012-11-15 18:18:04 -0800128}
129
130
131
132void CpuScriptGroupImpl::execute() {
133 Vector<Allocation *> ins;
134 Vector<bool> inExts;
135 Vector<Allocation *> outs;
136 Vector<bool> outExts;
137 Vector<const ScriptKernelID *> kernels;
138 bool fieldDep = false;
139
140 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
141 ScriptGroup::Node *n = mSG->mNodes[ct];
142 Script *s = n->mKernels[0]->mScript;
Stephen Hinesc78839b2013-09-10 17:40:41 -0700143 if (s->hasObjectSlots()) {
144 // Disable the ScriptGroup optimization if we have global RS
145 // objects that might interfere between kernels.
146 fieldDep = true;
147 }
Jason Sams709a0972012-11-15 18:18:04 -0800148
149 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
150
151 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
152 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
153 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
154 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
155 }
156 }
157
158 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
159 const ScriptKernelID *k = n->mKernels[ct2];
160 Allocation *ain = NULL;
161 Allocation *aout = NULL;
162 bool inExt = false;
163 bool outExt = false;
164
165 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
166 if (n->mInputs[ct3]->mDstKernel.get() == k) {
167 ain = n->mInputs[ct3]->mAlloc.get();
Yang Ni5f6f16f2014-07-25 13:51:09 -0700168 break;
Jason Sams709a0972012-11-15 18:18:04 -0800169 }
170 }
Yang Ni5f6f16f2014-07-25 13:51:09 -0700171 if (ain == NULL) {
172 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
173 if (mSG->mInputs[ct3]->mKernel == k) {
174 ain = mSG->mInputs[ct3]->mAlloc.get();
175 inExt = true;
176 break;
177 }
Jason Sams709a0972012-11-15 18:18:04 -0800178 }
179 }
180
181 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
182 if (n->mOutputs[ct3]->mSource.get() == k) {
183 aout = n->mOutputs[ct3]->mAlloc.get();
184 if(n->mOutputs[ct3]->mDstField.get() != NULL) {
185 fieldDep = true;
186 }
Yang Ni5f6f16f2014-07-25 13:51:09 -0700187 break;
Jason Sams709a0972012-11-15 18:18:04 -0800188 }
189 }
Yang Ni5f6f16f2014-07-25 13:51:09 -0700190 if (aout == NULL) {
191 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
192 if (mSG->mOutputs[ct3]->mKernel == k) {
193 aout = mSG->mOutputs[ct3]->mAlloc.get();
194 outExt = true;
195 break;
196 }
Jason Sams709a0972012-11-15 18:18:04 -0800197 }
198 }
199
Yang Ni5f6f16f2014-07-25 13:51:09 -0700200 rsAssert((k->mHasKernelOutput == (aout != NULL)) &&
201 (k->mHasKernelInput == (ain != NULL)));
202
203 ins.add(ain);
204 inExts.add(inExt);
205 outs.add(aout);
206 outExts.add(outExt);
207 kernels.add(k);
Jason Sams709a0972012-11-15 18:18:04 -0800208 }
209
210 }
211
212 MTLaunchStruct mtls;
213
Chris Wailesf3712132014-07-16 15:18:30 -0700214 if (fieldDep) {
Jason Sams709a0972012-11-15 18:18:04 -0800215 for (size_t ct=0; ct < ins.size(); ct++) {
216 Script *s = kernels[ct]->mScript;
217 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
218 uint32_t slot = kernels[ct]->mSlot;
219
Chris Wailesf3712132014-07-16 15:18:30 -0700220 uint32_t inLen;
221 const Allocation **ains;
222
223 if (ins[ct] == NULL) {
224 inLen = 0;
225 ains = NULL;
226
227 } else {
228 inLen = 1;
229 ains = const_cast<const Allocation**>(&ins[ct]);
230 }
231
232 si->forEachMtlsSetup(ains, inLen, outs[ct], NULL, 0, NULL, &mtls);
233
Jason Sams709a0972012-11-15 18:18:04 -0800234 si->forEachKernelSetup(slot, &mtls);
Chris Wailesf3712132014-07-16 15:18:30 -0700235 si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
236 mtls.fep.usrLen, NULL);
237
238 mCtx->launchThreads(ains, inLen, outs[ct], NULL, &mtls);
239
240 si->postLaunch(slot, ains, inLen, outs[ct], NULL, 0, NULL);
Jason Sams709a0972012-11-15 18:18:04 -0800241 }
242 } else {
243 ScriptList sl;
244 sl.ins = ins.array();
245 sl.outs = outs.array();
246 sl.kernels = kernels.array();
247 sl.count = kernels.size();
248
Chris Wailesf3712132014-07-16 15:18:30 -0700249 uint32_t inLen;
250 const Allocation **ains;
251
252 if (ins[0] == NULL) {
253 inLen = 0;
254 ains = NULL;
255
256 } else {
257 inLen = 1;
258 ains = const_cast<const Allocation**>(&ins[0]);
259 }
260
Jason Sams709a0972012-11-15 18:18:04 -0800261 Vector<const void *> usrPtrs;
262 Vector<const void *> fnPtrs;
263 Vector<uint32_t> sigs;
264 for (size_t ct=0; ct < kernels.size(); ct++) {
265 Script *s = kernels[ct]->mScript;
266 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
267
268 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
269 fnPtrs.add((void *)mtls.kernel);
270 usrPtrs.add(mtls.fep.usr);
271 sigs.add(mtls.fep.usrLen);
Chris Wailesf3712132014-07-16 15:18:30 -0700272 si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
273 mtls.fep.usr, mtls.fep.usrLen, NULL);
Jason Sams709a0972012-11-15 18:18:04 -0800274 }
275 sl.sigs = sigs.array();
276 sl.usrPtrs = usrPtrs.array();
277 sl.fnPtrs = fnPtrs.array();
278 sl.inExts = inExts.array();
279 sl.outExts = outExts.array();
280
281 Script *s = kernels[0]->mScript;
282 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
Chris Wailesf3712132014-07-16 15:18:30 -0700283
284 si->forEachMtlsSetup(ains, inLen, outs[0], NULL, 0, NULL, &mtls);
285
Jason Sams709a0972012-11-15 18:18:04 -0800286 mtls.script = NULL;
287 mtls.kernel = (void (*)())&scriptGroupRoot;
288 mtls.fep.usr = &sl;
Chris Wailesf3712132014-07-16 15:18:30 -0700289
290 mCtx->launchThreads(ains, inLen, outs[0], NULL, &mtls);
Jason Sams17e3cdc2013-09-09 17:32:16 -0700291
292 for (size_t ct=0; ct < kernels.size(); ct++) {
293 Script *s = kernels[ct]->mScript;
294 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
Chris Wailesf3712132014-07-16 15:18:30 -0700295 si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], NULL, 0,
296 NULL);
Jason Sams17e3cdc2013-09-09 17:32:16 -0700297 }
Jason Sams709a0972012-11-15 18:18:04 -0800298 }
299}