blob: 087855237b733c1c2a4258dca4a2ddcb4fe961fc [file] [log] [blame]
Jason Sams709a0972012-11-15 18:18:04 -08001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuCore.h"
18#include "rsCpuScript.h"
Jason Sams709a0972012-11-15 18:18:04 -080019#include "rsScriptGroup.h"
20#include "rsCpuScriptGroup.h"
21//#include "rsdBcc.h"
22//#include "rsdAllocation.h"
23
24using namespace android;
25using namespace android::renderscript;
26
27CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
28 mCtx = ctx;
29 mSG = sg;
30}
31
32CpuScriptGroupImpl::~CpuScriptGroupImpl() {
33
34}
35
36bool CpuScriptGroupImpl::init() {
37 return true;
38}
39
40void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
41}
42
43void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
44}
45
46
Chris Wailes80ef6932014-07-08 11:22:18 -070047typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams,
Jason Sams709a0972012-11-15 18:18:04 -080048 uint32_t xstart, uint32_t xend,
49 uint32_t instep, uint32_t outstep);
50
Chris Wailes80ef6932014-07-08 11:22:18 -070051void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
Jason Sams709a0972012-11-15 18:18:04 -080052 uint32_t xstart, uint32_t xend,
53 uint32_t instep, uint32_t outstep) {
54
55
Stephen Hines4b2bea32014-08-13 17:32:10 +000056 const ScriptList *sl = (const ScriptList *)kparams->usr;
Chris Wailes80ef6932014-07-08 11:22:18 -070057 RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
Jason Sams709a0972012-11-15 18:18:04 -080058
Chris Wailes80ef6932014-07-08 11:22:18 -070059 for (size_t ct = 0; ct < sl->count; ct++) {
Jason Sams709a0972012-11-15 18:18:04 -080060 ScriptGroupRootFunc_t func;
Chris Wailes80ef6932014-07-08 11:22:18 -070061 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
62 mkparams->usr = sl->usrPtrs[ct];
Jason Sams709a0972012-11-15 18:18:04 -080063
Stephen Hines4b2bea32014-08-13 17:32:10 +000064 mkparams->in = NULL;
65 mkparams->out = NULL;
Chris Wailes80ef6932014-07-08 11:22:18 -070066
Stephen Hines4b2bea32014-08-13 17:32:10 +000067 uint32_t istep = 0;
68 uint32_t ostep = 0;
69
70 if (sl->ins[ct]) {
71 mkparams->in =
72 (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
73
74 istep = sl->ins[ct]->mHal.state.elementSizeBytes;
Chris Wailes80ef6932014-07-08 11:22:18 -070075
Jason Sams709a0972012-11-15 18:18:04 -080076 if (sl->inExts[ct]) {
Stephen Hines4b2bea32014-08-13 17:32:10 +000077 mkparams->in =
78 (const uint8_t *)mkparams->in +
79 sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y;
Chris Wailes80ef6932014-07-08 11:22:18 -070080
81 } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
Stephen Hines4b2bea32014-08-13 17:32:10 +000082 mkparams->in =
83 (const uint8_t *)mkparams->in +
84 sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid;
Jason Sams709a0972012-11-15 18:18:04 -080085 }
86 }
87
88 if (sl->outs[ct]) {
Chris Wailes80ef6932014-07-08 11:22:18 -070089 mkparams->out =
90 (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
91
Jason Sams17e3cdc2013-09-09 17:32:16 -070092 ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
Chris Wailes80ef6932014-07-08 11:22:18 -070093
Jason Sams709a0972012-11-15 18:18:04 -080094 if (sl->outExts[ct]) {
Chris Wailes80ef6932014-07-08 11:22:18 -070095 mkparams->out =
96 (uint8_t *)mkparams->out +
97 sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y;
98
99 } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
100 mkparams->out =
101 (uint8_t *)mkparams->out +
102 sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
Jason Sams709a0972012-11-15 18:18:04 -0800103 }
104 }
105
106 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
Stephen Hines4b2bea32014-08-13 17:32:10 +0000107 func(kparams, xstart, xend, istep, ostep);
Jason Sams709a0972012-11-15 18:18:04 -0800108 }
109 //ALOGE("script group root");
110
Stephen Hines4b2bea32014-08-13 17:32:10 +0000111 mkparams->usr = sl;
Jason Sams709a0972012-11-15 18:18:04 -0800112}
113
114
115
116void CpuScriptGroupImpl::execute() {
117 Vector<Allocation *> ins;
118 Vector<bool> inExts;
119 Vector<Allocation *> outs;
120 Vector<bool> outExts;
121 Vector<const ScriptKernelID *> kernels;
122 bool fieldDep = false;
123
124 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
125 ScriptGroup::Node *n = mSG->mNodes[ct];
126 Script *s = n->mKernels[0]->mScript;
Stephen Hinesc78839b2013-09-10 17:40:41 -0700127 if (s->hasObjectSlots()) {
128 // Disable the ScriptGroup optimization if we have global RS
129 // objects that might interfere between kernels.
130 fieldDep = true;
131 }
Jason Sams709a0972012-11-15 18:18:04 -0800132
133 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
134
135 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
136 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
137 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
138 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
139 }
140 }
141
142 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
143 const ScriptKernelID *k = n->mKernels[ct2];
144 Allocation *ain = NULL;
145 Allocation *aout = NULL;
146 bool inExt = false;
147 bool outExt = false;
148
149 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
150 if (n->mInputs[ct3]->mDstKernel.get() == k) {
151 ain = n->mInputs[ct3]->mAlloc.get();
Yang Ni5f6f16f2014-07-25 13:51:09 -0700152 break;
Jason Sams709a0972012-11-15 18:18:04 -0800153 }
154 }
Yang Ni5f6f16f2014-07-25 13:51:09 -0700155 if (ain == NULL) {
156 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
157 if (mSG->mInputs[ct3]->mKernel == k) {
158 ain = mSG->mInputs[ct3]->mAlloc.get();
159 inExt = true;
160 break;
161 }
Jason Sams709a0972012-11-15 18:18:04 -0800162 }
163 }
164
165 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
166 if (n->mOutputs[ct3]->mSource.get() == k) {
167 aout = n->mOutputs[ct3]->mAlloc.get();
168 if(n->mOutputs[ct3]->mDstField.get() != NULL) {
169 fieldDep = true;
170 }
Yang Ni5f6f16f2014-07-25 13:51:09 -0700171 break;
Jason Sams709a0972012-11-15 18:18:04 -0800172 }
173 }
Yang Ni5f6f16f2014-07-25 13:51:09 -0700174 if (aout == NULL) {
175 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
176 if (mSG->mOutputs[ct3]->mKernel == k) {
177 aout = mSG->mOutputs[ct3]->mAlloc.get();
178 outExt = true;
179 break;
180 }
Jason Sams709a0972012-11-15 18:18:04 -0800181 }
182 }
183
Yang Ni5f6f16f2014-07-25 13:51:09 -0700184 rsAssert((k->mHasKernelOutput == (aout != NULL)) &&
185 (k->mHasKernelInput == (ain != NULL)));
186
187 ins.add(ain);
188 inExts.add(inExt);
189 outs.add(aout);
190 outExts.add(outExt);
191 kernels.add(k);
Jason Sams709a0972012-11-15 18:18:04 -0800192 }
193
194 }
195
196 MTLaunchStruct mtls;
197
Stephen Hines4b2bea32014-08-13 17:32:10 +0000198 if(fieldDep) {
Jason Sams709a0972012-11-15 18:18:04 -0800199 for (size_t ct=0; ct < ins.size(); ct++) {
200 Script *s = kernels[ct]->mScript;
201 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
202 uint32_t slot = kernels[ct]->mSlot;
203
Stephen Hines4b2bea32014-08-13 17:32:10 +0000204 si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
Jason Sams709a0972012-11-15 18:18:04 -0800205 si->forEachKernelSetup(slot, &mtls);
Stephen Hines4b2bea32014-08-13 17:32:10 +0000206 si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
207 mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
208 si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
Jason Sams709a0972012-11-15 18:18:04 -0800209 }
210 } else {
211 ScriptList sl;
212 sl.ins = ins.array();
213 sl.outs = outs.array();
214 sl.kernels = kernels.array();
215 sl.count = kernels.size();
216
217 Vector<const void *> usrPtrs;
218 Vector<const void *> fnPtrs;
219 Vector<uint32_t> sigs;
220 for (size_t ct=0; ct < kernels.size(); ct++) {
221 Script *s = kernels[ct]->mScript;
222 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
223
224 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
225 fnPtrs.add((void *)mtls.kernel);
226 usrPtrs.add(mtls.fep.usr);
227 sigs.add(mtls.fep.usrLen);
Stephen Hines4b2bea32014-08-13 17:32:10 +0000228 si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
Jason Sams709a0972012-11-15 18:18:04 -0800229 }
230 sl.sigs = sigs.array();
231 sl.usrPtrs = usrPtrs.array();
232 sl.fnPtrs = fnPtrs.array();
233 sl.inExts = inExts.array();
234 sl.outExts = outExts.array();
235
236 Script *s = kernels[0]->mScript;
237 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
Stephen Hines4b2bea32014-08-13 17:32:10 +0000238 si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
Jason Sams709a0972012-11-15 18:18:04 -0800239 mtls.script = NULL;
240 mtls.kernel = (void (*)())&scriptGroupRoot;
241 mtls.fep.usr = &sl;
Stephen Hines4b2bea32014-08-13 17:32:10 +0000242 mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
Jason Sams17e3cdc2013-09-09 17:32:16 -0700243
244 for (size_t ct=0; ct < kernels.size(); ct++) {
245 Script *s = kernels[ct]->mScript;
246 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
Stephen Hines4b2bea32014-08-13 17:32:10 +0000247 si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
Jason Sams17e3cdc2013-09-09 17:32:16 -0700248 }
Jason Sams709a0972012-11-15 18:18:04 -0800249 }
250}