blob: 9dc4d90046380823c7329d9cd505e888d4057335 [file] [log] [blame]
Yang Ni1ffd86b2015-01-07 09:16:40 -08001#include "rsCpuScriptGroup2.h"
2
3#include "cpu_ref/rsCpuCore.h"
4#include "rsClosure.h"
5#include "rsContext.h"
6#include "rsCpuCore.h"
7#include "rsCpuScript.h"
8#include "rsScript.h"
9#include "rsScriptGroup2.h"
10
11namespace android {
12namespace renderscript {
13
14namespace {
15
16static const size_t DefaultKernelArgCount = 2;
17
18void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
19 uint32_t xend, uint32_t outstep) {
20 const list<CPUClosure*>& closures = *(list<CPUClosure*>*)kparams->usr;
21 RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
22 const void **oldIns = kparams->ins;
23 uint32_t *oldStrides = kparams->inEStrides;
24
25 std::vector<const void*> ins(DefaultKernelArgCount);
26 std::vector<uint32_t> strides(DefaultKernelArgCount);
27
28 for (CPUClosure* cpuClosure : closures) {
29 const Closure* closure = cpuClosure->mClosure;
30
31 auto in_iter = ins.begin();
32 auto stride_iter = strides.begin();
33
34 for (const auto& arg : closure->mArgs) {
35 const Allocation* a = (const Allocation*)arg;
36 const uint32_t eStride = a->mHal.state.elementSizeBytes;
37 const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
38 eStride * xstart;
39 if (kparams->dimY > 1) {
40 ptr += a->mHal.drvState.lod[0].stride * kparams->y;
41 }
42 *in_iter++ = ptr;
43 *stride_iter++ = eStride;
44 }
45
46 mutable_kparams->ins = &ins[0];
47 mutable_kparams->inEStrides = &strides[0];
48
49 const Allocation* out = closure->mReturnValue;
50 const uint32_t ostep = out->mHal.state.elementSizeBytes;
51 const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
52 ostep * xstart;
53 if (kparams->dimY > 1) {
54 ptr += out->mHal.drvState.lod[0].stride * kparams->y;
55 }
56
57 mutable_kparams->out = (void*)ptr;
58
59 mutable_kparams->usr = cpuClosure->mUsrPtr;
60
61 cpuClosure->mFunc(kparams, xstart, xend, ostep);
62 }
63
64 mutable_kparams->ins = oldIns;
65 mutable_kparams->inEStrides = oldStrides;
66 mutable_kparams->usr = &closures;
67}
68
69/*
70 Returns true if closure depends on any closure in batch via a glboal variable
71 TODO: this probably should go into class Closure.
72 */
73bool conflict(const list<CPUClosure*> &batch, CPUClosure* closure) {
74 for (const auto &p : closure->mClosure->mGlobalDeps) {
75 const Closure* dep = p.first;
76 for (CPUClosure* c : batch) {
77 if (c->mClosure == dep) {
78 return true;
79 }
80 }
81 }
82 for (const auto &p : closure->mClosure->mArgDeps) {
83 const Closure* dep = p.first;
84 for (CPUClosure* c : batch) {
85 if (c->mClosure == dep) {
86 for (const auto &p1 : *p.second) {
87 if (p1.second != nullptr) {
88 return true;
89 }
90 }
91 }
92 }
93 }
94 return false;
95}
96
97} // namespace
98
99CpuScriptGroup2Impl::CpuScriptGroup2Impl(RsdCpuReferenceImpl *cpuRefImpl,
100 const ScriptGroupBase *sg) :
101 mCpuRefImpl(cpuRefImpl), mGroup((const ScriptGroup2*)(sg)) {
102 list<CPUClosure*>* batch = new list<CPUClosure*>();
103 for (Closure* closure: mGroup->mClosures) {
104 const ScriptKernelID* kernelID = closure->mKernelID.get();
105 RsdCpuScriptImpl* si =
106 (RsdCpuScriptImpl *)mCpuRefImpl->lookupScript(kernelID->mScript);
107
108 MTLaunchStruct mtls;
109 si->forEachKernelSetup(kernelID->mSlot, &mtls);
110 // TODO: Is mtls.fep.usrLen ever used?
111 CPUClosure* cc = new CPUClosure(closure, si, (ExpandFuncTy)mtls.kernel,
112 mtls.fep.usr, mtls.fep.usrLen);
113 if (conflict(*batch, cc)) {
114 mBatches.push_back(batch);
115 batch = new list<CPUClosure*>();
116 }
117 batch->push_back(cc);
118 }
119 mBatches.push_back(batch);
120}
121
122CpuScriptGroup2Impl::~CpuScriptGroup2Impl() {
123 for (list<CPUClosure*>* batch : mBatches) {
124 for (CPUClosure* c : *batch) {
125 delete c;
126 }
127 }
128}
129
130void CpuScriptGroup2Impl::execute() {
131 for (list<CPUClosure*>* batch : mBatches) {
132 setGlobalsForBatch(*batch);
133 runBatch(*batch);
134 }
135}
136
137void CpuScriptGroup2Impl::setGlobalsForBatch(const list<CPUClosure*>& batch) {
138 for (CPUClosure* cpuClosure : batch) {
139 const Closure* closure = cpuClosure->mClosure;
140 const ScriptKernelID* kernelID = closure->mKernelID.get();
141 Script* s = kernelID->mScript;
142 for (const auto& p : closure->mGlobals) {
143 const void* value = p.second.first;
144 int size = p.second.second;
145 // We use -1 size to indicate an ObjectBase rather than a primitive type
146 if (size < 0) {
147 s->setVarObj(p.first->mSlot, (ObjectBase*)value);
148 } else {
149 s->setVar(p.first->mSlot, (const void*)&value, size);
150 }
151 }
152 }
153}
154
155void CpuScriptGroup2Impl::runBatch(const list<CPUClosure*>& batch) {
156 for (CPUClosure* cpuClosure : batch) {
157 const Closure* closure = cpuClosure->mClosure;
158 const ScriptKernelID* kernelID = closure->mKernelID.get();
159 cpuClosure->mSi->preLaunch(kernelID->mSlot,
160 (const Allocation**)&closure->mArgs[0],
161 closure->mArgs.size(), closure->mReturnValue,
162 cpuClosure->mUsrPtr, cpuClosure->mUsrSize,
163 nullptr);
164 }
165
166 const CPUClosure* cpuClosure = batch.front();
167 const Closure* closure = cpuClosure->mClosure;
168 MTLaunchStruct mtls;
169
170 cpuClosure->mSi->forEachMtlsSetup((const Allocation**)&closure->mArgs[0],
171 closure->mArgs.size(),
172 closure->mReturnValue,
173 nullptr, 0, nullptr, &mtls);
174
175 mtls.script = nullptr;
176 mtls.kernel = (void (*)())&groupRoot;
177 mtls.fep.usr = &batch;
178
179 mCpuRefImpl->launchThreads(nullptr, 0, nullptr, nullptr, &mtls);
180
181 for (CPUClosure* cpuClosure : batch) {
182 const Closure* closure = cpuClosure->mClosure;
183 const ScriptKernelID* kernelID = closure->mKernelID.get();
184 cpuClosure->mSi->postLaunch(kernelID->mSlot,
185 (const Allocation**)&closure->mArgs[0],
186 closure->mArgs.size(), closure->mReturnValue,
187 nullptr, 0, nullptr);
188 }
189}
190
191} // namespace renderscript
192} // namespace android