blob: e79a67578088e402323ac25aac2e3da5271499c7 [file] [log] [blame]
Jason Sams709a0972012-11-15 18:18:04 -08001/*
2 * Copyright (C) 2011-2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18
19#include "rsCpuCore.h"
20
21#include "rsCpuScript.h"
22//#include "rsdRuntime.h"
23//#include "rsdAllocation.h"
24//#include "rsCpuIntrinsics.h"
25
26
27#include "utils/Vector.h"
28#include "utils/Timers.h"
29#include "utils/StopWatch.h"
30
31
32#include <bcc/BCCContext.h>
33#include <bcc/Renderscript/RSCompilerDriver.h>
34#include <bcc/Renderscript/RSExecutable.h>
35#include <bcc/Renderscript/RSInfo.h>
36
37namespace android {
38namespace renderscript {
39
40
41
42RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
43 mCtx = ctx;
44 mScript = s;
45
46 mRoot = NULL;
47 mRootExpand = NULL;
48 mInit = NULL;
49 mFreeChildren = NULL;
50
51 mCompilerContext = NULL;
52 mCompilerDriver = NULL;
53 mExecutable = NULL;
54
55 mBoundAllocs = NULL;
56 mIntrinsicData = NULL;
57 mIsThreadable = true;
58}
59
60
61bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
62 uint8_t const *bitcode, size_t bitcodeSize,
63 uint32_t flags) {
64 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
65 //ALOGE("rsdScriptInit %p %p", rsc, script);
66
67 mCtx->lockMutex();
68
69 bcc::RSExecutable *exec;
70 const bcc::RSInfo *info;
71
72 mCompilerContext = NULL;
73 mCompilerDriver = NULL;
74 mExecutable = NULL;
75
76 mCompilerContext = new bcc::BCCContext();
77 if (mCompilerContext == NULL) {
78 ALOGE("bcc: FAILS to create compiler context (out of memory)");
79 mCtx->unlockMutex();
80 return false;
81 }
82
83 mCompilerDriver = new bcc::RSCompilerDriver();
84 if (mCompilerDriver == NULL) {
85 ALOGE("bcc: FAILS to create compiler driver (out of memory)");
86 mCtx->unlockMutex();
87 return false;
88 }
89
90 mCompilerDriver->setRSRuntimeLookupFunction(lookupRuntimeStub);
91 mCompilerDriver->setRSRuntimeLookupContext(this);
92
93 exec = mCompilerDriver->build(*mCompilerContext, cacheDir, resName,
94 (const char *)bitcode, bitcodeSize, NULL);
95
96 if (exec == NULL) {
97 ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
98 mCtx->unlockMutex();
99 return false;
100 }
101
102 mExecutable = exec;
103
104 exec->setThreadable(mIsThreadable);
105 if (!exec->syncInfo()) {
106 ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
107 }
108
109 mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
110 mRootExpand =
111 reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
112 mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
113 mFreeChildren =
114 reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
115
116
117 info = &mExecutable->getInfo();
118 if (info->getExportVarNames().size()) {
119 mBoundAllocs = new Allocation *[info->getExportVarNames().size()];
120 memset(mBoundAllocs, 0, sizeof(void *) * info->getExportVarNames().size());
121 }
122
123 mCtx->unlockMutex();
124 return true;
125}
126
127void RsdCpuScriptImpl::populateScript(Script *script) {
128 const bcc::RSInfo *info = &mExecutable->getInfo();
129
130 // Copy info over to runtime
131 script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
132 script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
133 script->mHal.info.exportedPragmaCount = info->getPragmas().size();
134 script->mHal.info.exportedPragmaKeyList =
135 const_cast<const char**>(mExecutable->getPragmaKeys().array());
136 script->mHal.info.exportedPragmaValueList =
137 const_cast<const char**>(mExecutable->getPragmaValues().array());
138
139 if (mRootExpand) {
140 script->mHal.info.root = mRootExpand;
141 } else {
142 script->mHal.info.root = mRoot;
143 }
144}
145
146/*
147bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) {
148 pthread_mutex_lock(&rsdgInitMutex);
149
150 DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
151 if (drv == NULL) {
152 goto error;
153 }
154 s->mHal.drv = drv;
155 drv->mIntrinsicID = iid;
156 drv->mIntrinsicData = rsdIntrinsic_Init(rsc, s, iid, &drv->mIntrinsicFuncs);
157 s->mHal.info.isThreadable = true;
158
159 pthread_mutex_unlock(&rsdgInitMutex);
160 return true;
161
162error:
163 pthread_mutex_unlock(&rsdgInitMutex);
164 return false;
165}
166*/
167
168typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
169
170void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
171 const void * usr, uint32_t usrLen,
172 const RsScriptCall *sc,
173 MTLaunchStruct *mtls) {
174
175 memset(mtls, 0, sizeof(MTLaunchStruct));
176
177 if (ain) {
178 mtls->fep.dimX = ain->getType()->getDimX();
179 mtls->fep.dimY = ain->getType()->getDimY();
180 mtls->fep.dimZ = ain->getType()->getDimZ();
181 //mtls->dimArray = ain->getType()->getDimArray();
182 } else if (aout) {
183 mtls->fep.dimX = aout->getType()->getDimX();
184 mtls->fep.dimY = aout->getType()->getDimY();
185 mtls->fep.dimZ = aout->getType()->getDimZ();
186 //mtls->dimArray = aout->getType()->getDimArray();
187 } else {
188 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
189 return;
190 }
191
192 if (!sc || (sc->xEnd == 0)) {
193 mtls->xEnd = mtls->fep.dimX;
194 } else {
195 rsAssert(sc->xStart < mtls->fep.dimX);
196 rsAssert(sc->xEnd <= mtls->fep.dimX);
197 rsAssert(sc->xStart < sc->xEnd);
198 mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
199 mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
200 if (mtls->xStart >= mtls->xEnd) return;
201 }
202
203 if (!sc || (sc->yEnd == 0)) {
204 mtls->yEnd = mtls->fep.dimY;
205 } else {
206 rsAssert(sc->yStart < mtls->fep.dimY);
207 rsAssert(sc->yEnd <= mtls->fep.dimY);
208 rsAssert(sc->yStart < sc->yEnd);
209 mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
210 mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
211 if (mtls->yStart >= mtls->yEnd) return;
212 }
213
Tim Murrayd4ecb172013-02-07 12:17:03 -0800214 if (!sc || (sc->zEnd == 0)) {
215 mtls->zEnd = mtls->fep.dimZ;
216 } else {
217 rsAssert(sc->zStart < mtls->fep.dimZ);
218 rsAssert(sc->zEnd <= mtls->fep.dimZ);
219 rsAssert(sc->zStart < sc->zEnd);
220 mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
221 mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
222 if (mtls->zStart >= mtls->zEnd) return;
223 }
224
Jason Sams709a0972012-11-15 18:18:04 -0800225 mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
226 mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
227 mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
228 mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
229
230 rsAssert(!ain || (ain->getType()->getDimZ() == 0));
231
232 mtls->rsc = mCtx;
233 mtls->ain = ain;
234 mtls->aout = aout;
235 mtls->fep.usr = usr;
236 mtls->fep.usrLen = usrLen;
237 mtls->mSliceSize = 1;
238 mtls->mSliceNum = 0;
239
240 mtls->fep.ptrIn = NULL;
241 mtls->fep.eStrideIn = 0;
242 mtls->isThreadable = mIsThreadable;
243
244 if (ain) {
245 mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
246 mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
247 mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
248 }
249
250 mtls->fep.ptrOut = NULL;
251 mtls->fep.eStrideOut = 0;
252 if (aout) {
253 mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
254 mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
255 mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
256 }
257}
258
259
260void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
261 const Allocation * ain,
262 Allocation * aout,
263 const void * usr,
264 uint32_t usrLen,
265 const RsScriptCall *sc) {
266
267 MTLaunchStruct mtls;
268 forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
269 forEachKernelSetup(slot, &mtls);
270
271 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
272 mCtx->launchThreads(ain, aout, sc, &mtls);
273 mCtx->setTLS(oldTLS);
274}
275
276void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
277
278 mtls->script = this;
279 mtls->fep.slot = slot;
280
281 rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size());
282 mtls->kernel = reinterpret_cast<ForEachFunc_t>(
283 mExecutable->getExportForeachFuncAddrs()[slot]);
284 rsAssert(mtls->kernel != NULL);
285 mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second;
286}
287
288int RsdCpuScriptImpl::invokeRoot() {
289 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
290 int ret = mRoot();
291 mCtx->setTLS(oldTLS);
292 return ret;
293}
294
295void RsdCpuScriptImpl::invokeInit() {
296 if (mInit) {
297 mInit();
298 }
299}
300
301void RsdCpuScriptImpl::invokeFreeChildren() {
302 if (mFreeChildren) {
303 mFreeChildren();
304 }
305}
306
307void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
308 size_t paramLength) {
309 //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
310
311 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
312 reinterpret_cast<void (*)(const void *, uint32_t)>(
313 mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
314 mCtx->setTLS(oldTLS);
315}
316
317void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
318 //rsAssert(!script->mFieldIsObject[slot]);
319 //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
320
321 //if (mIntrinsicID) {
322 //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
323 //return;
324 //}
325
326 int32_t *destPtr = reinterpret_cast<int32_t *>(
327 mExecutable->getExportVarAddrs()[slot]);
328 if (!destPtr) {
329 //ALOGV("Calling setVar on slot = %i which is null", slot);
330 return;
331 }
332
333 memcpy(destPtr, data, dataLength);
334}
335
336void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
337 const Element *elem,
338 const size_t *dims, size_t dimLength) {
339
340 int32_t *destPtr = reinterpret_cast<int32_t *>(
341 mExecutable->getExportVarAddrs()[slot]);
342 if (!destPtr) {
343 //ALOGV("Calling setVar on slot = %i which is null", slot);
344 return;
345 }
346
347 // We want to look at dimension in terms of integer components,
348 // but dimLength is given in terms of bytes.
349 dimLength /= sizeof(int);
350
351 // Only a single dimension is currently supported.
352 rsAssert(dimLength == 1);
353 if (dimLength == 1) {
354 // First do the increment loop.
355 size_t stride = elem->getSizeBytes();
356 const char *cVal = reinterpret_cast<const char *>(data);
357 for (size_t i = 0; i < dims[0]; i++) {
358 elem->incRefs(cVal);
359 cVal += stride;
360 }
361
362 // Decrement loop comes after (to prevent race conditions).
363 char *oldVal = reinterpret_cast<char *>(destPtr);
364 for (size_t i = 0; i < dims[0]; i++) {
365 elem->decRefs(oldVal);
366 oldVal += stride;
367 }
368 }
369
370 memcpy(destPtr, data, dataLength);
371}
372
373void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
374
375 //rsAssert(!script->mFieldIsObject[slot]);
376 //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
377
378 int32_t *destPtr = reinterpret_cast<int32_t *>(
379 mExecutable->getExportVarAddrs()[slot]);
380 if (!destPtr) {
381 //ALOGV("Calling setVar on slot = %i which is null", slot);
382 return;
383 }
384
385 void *ptr = NULL;
386 mBoundAllocs[slot] = data;
387 if(data) {
388 ptr = data->mHal.drvState.lod[0].mallocPtr;
389 }
390 memcpy(destPtr, &ptr, sizeof(void *));
391}
392
393void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
394
395 //rsAssert(script->mFieldIsObject[slot]);
396 //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
397
398 //if (mIntrinsicID) {
399 //mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot, alloc);
400 //return;
401 //}
402
403 int32_t *destPtr = reinterpret_cast<int32_t *>(
404 mExecutable->getExportVarAddrs()[slot]);
405 if (!destPtr) {
406 //ALOGV("Calling setVar on slot = %i which is null", slot);
407 return;
408 }
409
410 rsrSetObject(mCtx->getContext(), (ObjectBase **)destPtr, data);
411}
412
413RsdCpuScriptImpl::~RsdCpuScriptImpl() {
414
415 if (mExecutable) {
416 Vector<void *>::const_iterator var_addr_iter =
417 mExecutable->getExportVarAddrs().begin();
418 Vector<void *>::const_iterator var_addr_end =
419 mExecutable->getExportVarAddrs().end();
420
421 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
422 mExecutable->getInfo().getObjectSlots().begin();
423 bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
424 mExecutable->getInfo().getObjectSlots().end();
425
426 while ((var_addr_iter != var_addr_end) &&
427 (is_object_iter != is_object_end)) {
428 // The field address can be NULL if the script-side has optimized
429 // the corresponding global variable away.
430 ObjectBase **obj_addr =
431 reinterpret_cast<ObjectBase **>(*var_addr_iter);
432 if (*is_object_iter) {
433 if (*var_addr_iter != NULL) {
434 rsrClearObject(mCtx->getContext(), obj_addr);
435 }
436 }
437 var_addr_iter++;
438 is_object_iter++;
439 }
440 }
441
442 if (mCompilerContext) {
443 delete mCompilerContext;
444 }
445 if (mCompilerDriver) {
446 delete mCompilerDriver;
447 }
448 if (mExecutable) {
449 delete mExecutable;
450 }
451 if (mBoundAllocs) {
452 delete[] mBoundAllocs;
453 }
454}
455
456Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
457 if (!ptr) {
458 return NULL;
459 }
460
461 for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
462 Allocation *a = mBoundAllocs[ct];
463 if (!a) continue;
464 if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
465 return a;
466 }
467 }
468 ALOGE("rsGetAllocation, failed to find %p", ptr);
469 return NULL;
470}
471
472
473}
474}