Merge "Rename the generated test files to avoid confusion."
diff --git a/cpp/ScriptIntrinsics.cpp b/cpp/ScriptIntrinsics.cpp
index a9a31f8..e40d1a0 100644
--- a/cpp/ScriptIntrinsics.cpp
+++ b/cpp/ScriptIntrinsics.cpp
@@ -419,8 +419,8 @@
Script::setVar(0, (void*)v, sizeof(float) * 25);
}
-sp<ScriptIntrinsicHistogram> ScriptIntrinsicHistogram::create(sp<RS> rs) {
- return new ScriptIntrinsicHistogram(rs, nullptr);
+sp<ScriptIntrinsicHistogram> ScriptIntrinsicHistogram::create(sp<RS> rs, sp<const Element> e) {
+ return new ScriptIntrinsicHistogram(rs, e);
}
ScriptIntrinsicHistogram::ScriptIntrinsicHistogram(sp<RS> rs, sp<const Element> e)
@@ -476,7 +476,7 @@
return;
}
- if (!(ain->getType()->getElement()->isCompatible(Element::U8(mRS))) ||
+ if (!(ain->getType()->getElement()->isCompatible(Element::U8(mRS))) &&
!(ain->getType()->getElement()->isCompatible(Element::U8_4(mRS)))) {
mRS->throwError(RS_ERROR_INVALID_ELEMENT,
"Input allocation to Histogram must be U8 or U8_4");
@@ -494,7 +494,7 @@
"when used with forEach_dot");
return;
}
- if (!(ain->getType()->getElement()->isCompatible(Element::U8(mRS))) ||
+ if (!(ain->getType()->getElement()->isCompatible(Element::U8(mRS))) &&
!(ain->getType()->getElement()->isCompatible(Element::U8_4(mRS)))) {
mRS->throwError(RS_ERROR_INVALID_ELEMENT,
"Input allocation to Histogram must be U8 or U8_4");
diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h
index 26e1954..cd8f39a 100644
--- a/cpp/rsCppStructs.h
+++ b/cpp/rsCppStructs.h
@@ -1351,6 +1351,9 @@
void setVar(uint32_t index, int32_t v) const {
setVar(index, &v, sizeof(v));
}
+ void setVar(uint32_t index, uint32_t v) const {
+ setVar(index, &v, sizeof(v));
+ }
void setVar(uint32_t index, int64_t v) const {
setVar(index, &v, sizeof(v));
}
@@ -1722,7 +1725,7 @@
*
* @return ScriptIntrinsicHistogram
*/
- static sp<ScriptIntrinsicHistogram> create(sp<RS> rs);
+ static sp<ScriptIntrinsicHistogram> create(sp<RS> rs, sp<const Element> e);
/**
* Set the output of the histogram. 32 bit integer types are
* supported.
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 2492c22..696e2b6 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -49,7 +49,7 @@
using namespace android::renderscript;
typedef void (*outer_foreach_t)(
- const android::renderscript::RsExpandKernelParams *,
+ const RsExpandKernelDriverInfo *,
uint32_t x1, uint32_t x2, uint32_t outstep);
@@ -348,28 +348,6 @@
RsExpandKernelDriverInfo,
outer_foreach_t);
-static void kparamSetup(RsExpandKernelParams *kparams, const RsExpandKernelDriverInfo *fep) {
- //ALOGE("kp usr %p", fep->usr);
- //ALOGE("kp slot %i", fep->slot);
- //ALOGE("kp dim %i %i %i", fep->dim.x, fep->dim.y, fep->dim.z);
- //ALOGE("kp lid %i", fep->lid);
- //ALOGE("kp in[0] stide %i ptr %p", fep->inStride[0], fep->inPtr[0]);
- //ALOGE("kp out[0] ptr %p", fep->outPtr[0]);
- //ALOGE("kp loc %i %i %i", fep->current.x, fep->current.y, fep->current.z);
-
- kparams->usr = fep->usr;
- kparams->slot = fep->slot;
- kparams->dimX = fep->dim.x;
- kparams->dimY = fep->dim.y;
- kparams->dimZ = fep->dim.z;
- kparams->lid = fep->lid;
- kparams->inEStrides = (uint32_t *)&fep->inStride[0];
- kparams->ins = (const void **)&fep->inPtr[0];
- kparams->out = fep->outPtr[0];
- kparams->y = fep->current.y;
- kparams->z = fep->current.z;
-}
-
static inline void FepPtrSetup(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo *fep,
uint32_t x, uint32_t y,
uint32_t z = 0, uint32_t lod = 0,
@@ -398,16 +376,16 @@
return n;
}
-static bool SelectOuterSlice(MTLaunchStruct* mtls, uint32_t sliceNum) {
+static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) {
uint32_t r = sliceNum;
- r = sliceInt(&mtls->fep.current.z, r, mtls->start.z, mtls->end.z);
- r = sliceInt(&mtls->fep.current.lod, r, mtls->start.lod, mtls->end.lod);
- r = sliceInt(&mtls->fep.current.face, r, mtls->start.face, mtls->end.face);
- r = sliceInt(&mtls->fep.current.array[0], r, mtls->start.array[0], mtls->end.array[0]);
- r = sliceInt(&mtls->fep.current.array[1], r, mtls->start.array[1], mtls->end.array[1]);
- r = sliceInt(&mtls->fep.current.array[2], r, mtls->start.array[2], mtls->end.array[2]);
- r = sliceInt(&mtls->fep.current.array[3], r, mtls->start.array[3], mtls->end.array[3]);
+ r = sliceInt(&fep->current.z, r, mtls->start.z, mtls->end.z);
+ r = sliceInt(&fep->current.lod, r, mtls->start.lod, mtls->end.lod);
+ r = sliceInt(&fep->current.face, r, mtls->start.face, mtls->end.face);
+ r = sliceInt(&fep->current.array[0], r, mtls->start.array[0], mtls->end.array[0]);
+ r = sliceInt(&fep->current.array[1], r, mtls->start.array[1], mtls->end.array[1]);
+ r = sliceInt(&fep->current.array[2], r, mtls->start.array[2], mtls->end.array[2]);
+ r = sliceInt(&fep->current.array[3], r, mtls->start.array[3], mtls->end.array[3]);
return r == 0;
}
@@ -422,23 +400,20 @@
while(1) {
uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
- if (!SelectOuterSlice(mtls, slice)) {
+ if (!SelectOuterSlice(mtls, &fep, slice)) {
return;
}
- for (mtls->fep.current.y = mtls->start.y;
- mtls->fep.current.y < mtls->end.y;
- mtls->fep.current.y++) {
+ for (fep.current.y = mtls->start.y; fep.current.y < mtls->end.y;
+ fep.current.y++) {
- FepPtrSetup(mtls, &mtls->fep, mtls->start.x,
- mtls->fep.current.y, mtls->fep.current.z, mtls->fep.current.lod,
- (RsAllocationCubemapFace)mtls->fep.current.face,
- mtls->fep.current.array[0], mtls->fep.current.array[1],
- mtls->fep.current.array[2], mtls->fep.current.array[3]);
+ FepPtrSetup(mtls, &fep, mtls->start.x,
+ fep.current.y, fep.current.z, fep.current.lod,
+ (RsAllocationCubemapFace)fep.current.face,
+ fep.current.array[0], fep.current.array[1],
+ fep.current.array[2], fep.current.array[3]);
- RsExpandKernelParams kparams;
- kparamSetup(&kparams, &mtls->fep);
- fn(&kparams, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
+ fn(&fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
}
}
@@ -464,10 +439,7 @@
for (fep.current.y = yStart; fep.current.y < yEnd; fep.current.y++) {
FepPtrSetup(mtls, &fep, mtls->start.x, fep.current.y);
- RsExpandKernelParams kparams;
- kparamSetup(&kparams, &fep);
-
- fn(&kparams, mtls->start.x, mtls->end.x, fep.outStride[0]);
+ fn(&fep, mtls->start.x, mtls->end.x, fep.outStride[0]);
}
}
}
@@ -491,10 +463,7 @@
FepPtrSetup(mtls, &fep, xStart, 0);
- RsExpandKernelParams kparams;
- kparamSetup(&kparams, &fep);
-
- fn(&kparams, xStart, xEnd, fep.outStride[0]);
+ fn(&fep, xStart, xEnd, fep.outStride[0]);
}
}
@@ -568,7 +537,7 @@
uint32_t slice = 0;
- while(SelectOuterSlice(mtls, slice++)) {
+ while(SelectOuterSlice(mtls, &mtls->fep, slice++)) {
for (mtls->fep.current.y = mtls->start.y;
mtls->fep.current.y < mtls->end.y;
mtls->fep.current.y++) {
@@ -579,9 +548,7 @@
mtls->fep.current.array[0], mtls->fep.current.array[1],
mtls->fep.current.array[2], mtls->fep.current.array[3]);
- RsExpandKernelParams kparams;
- kparamSetup(&kparams, &mtls->fep);
- fn(&kparams, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
+ fn(&mtls->fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]);
}
}
}
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index 5f1913f..afe8ef5 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -25,9 +25,6 @@
#include "rsScriptC.h"
#include "rsCpuCoreRuntime.h"
-
-#define RS_KERNEL_INPUT_LIMIT 8
-
namespace bcc {
class BCCContext;
class RSCompilerDriver;
@@ -37,51 +34,6 @@
namespace android {
namespace renderscript {
-struct StridePair {
- uint32_t eStride;
- uint32_t yStride;
-};
-
-struct RsLaunchDimensions {
- uint32_t x;
- uint32_t y;
- uint32_t z;
- uint32_t lod;
- uint32_t face;
- uint32_t array[4 /*make a define*/];
-};
-
-struct RsExpandKernelDriverInfo {
- // Warning: This structure is shared with the compiler
- // Any change to the fields here requires a matching compiler change
-
- const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
- uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
- uint32_t inLen;
-
- uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
- uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
- uint32_t outLen;
-
- // Dimension of the launch
- RsLaunchDimensions dim;
-
- // The walking itterator of the launch
- RsLaunchDimensions current;
-
- const void *usr;
- uint32_t usrLen;
-
-
-
- // Items below this line are not used by the compiler and can be change in the driver
- uint32_t lid;
- uint32_t slot;
-
-};
-
-typedef ::RsExpandKernelParams RsExpandKernelParams;
-
extern bool gArchUseSIMD;
typedef void (* InvokeFunc_t)(void);
diff --git a/cpu_ref/rsCpuCoreRuntime.h b/cpu_ref/rsCpuCoreRuntime.h
index 19add1b..c7841ec 100644
--- a/cpu_ref/rsCpuCoreRuntime.h
+++ b/cpu_ref/rsCpuCoreRuntime.h
@@ -20,28 +20,45 @@
#ifndef RSD_CPU_CORE_RUNTIME_H
#define RSD_CPU_CORE_RUNTIME_H
-struct RsExpandKernelParams {
+// Warning: This value is shared with the compiler
+// Any change to this value requires a matching compiler change
+#define RS_KERNEL_INPUT_LIMIT 8
- // Used by kernels
- const void **ins;
- uint32_t *inEStrides;
- void *out;
+struct RsLaunchDimensions {
+ // Warning: This structure is shared with the compiler
+ // Any change to the fields here requires a matching compiler change
+
+ uint32_t x;
uint32_t y;
uint32_t z;
- uint32_t lid;
+ uint32_t lod;
+ uint32_t face;
+ uint32_t array[4 /*make a define*/];
+};
- // Used by ScriptGroup and user kernels.
+struct RsExpandKernelDriverInfo {
+ // Warning: This structure is shared with the compiler
+ // Any change to the fields here requires a matching compiler change
+
+ const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
+ uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
+ uint32_t inLen;
+
+ uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
+ uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
+ uint32_t outLen;
+
+ // Dimension of the launch
+ struct RsLaunchDimensions dim;
+
+ // The walking iterator of the launch
+ struct RsLaunchDimensions current;
+
const void *usr;
+ uint32_t usrLen;
- // Used by intrinsics
- uint32_t dimX;
- uint32_t dimY;
- uint32_t dimZ;
-
- /*
- * FIXME: This is only used by the blend intrinsic. If possible, we should
- * modify blur to not need it.
- */
+ // Items below this line are not used by the compiler and can be change in the driver
+ uint32_t lid;
uint32_t slot;
};
diff --git a/cpu_ref/rsCpuIntrinsic3DLUT.cpp b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
index 86d0478..734e5e5 100644
--- a/cpu_ref/rsCpuIntrinsic3DLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
@@ -38,7 +38,7 @@
protected:
ObjectBaseRef<Allocation> mLUT;
- static void kernel(const RsExpandKernelParams *p,
+ static void kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -58,13 +58,13 @@
int dimx, int dimy, int dimz);
-void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)p->usr;
+ RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)info->usr;
- uchar4 *out = (uchar4 *)p->out;
- uchar4 *in = (uchar4 *)p->ins[0];
+ uchar4 *out = (uchar4 *)info->outPtr[0];
+ uchar4 *in = (uchar4 *)info->inPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index 27a02b7..16348c6 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -33,7 +33,7 @@
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
protected:
- static void kernel(const RsExpandKernelParams *p, uint32_t xstart,
+ static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
uint32_t xend, uint32_t outstep);
};
@@ -109,24 +109,24 @@
extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
#endif
-void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
+ RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)info->usr;
// instep/outstep can be ignored--sizeof(uchar4) known at compile time
- uchar4 *out = (uchar4 *)p->out;
- uchar4 *in = (uchar4 *)p->ins[0];
+ uchar4 *out = (uchar4 *)info->outPtr[0];
+ uchar4 *in = (uchar4 *)info->inPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
if (gArchUseSIMD) {
- if (rsdIntrinsicBlend_K(out, in, p->slot, x1, x2) >= 0)
+ if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
return;
}
#endif
- switch (p->slot) {
+ switch (info->slot) {
case BLEND_CLEAR:
for (;x1 < x2; x1++, out++) {
*out = 0;
@@ -483,7 +483,7 @@
break;
default:
- ALOGE("Called unimplemented value %d", p->slot);
+ ALOGE("Called unimplemented value %d", info->slot);
rsAssert(false);
}
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index ffdb74b..a3ed1d1 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -44,10 +44,10 @@
int mIradius;
ObjectBaseRef<Allocation> mAlloc;
- static void kernelU4(const RsExpandKernelParams *p,
+ static void kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU1(const RsExpandKernelParams *p,
+ static void kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
void ComputeGaussianWeights();
@@ -113,7 +113,7 @@
-static void OneVU4(const RsExpandKernelParams *p, float4 *out, int32_t x, int32_t y,
+static void OneVU4(const RsExpandKernelDriverInfo *info, float4 *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x*4;
@@ -121,7 +121,7 @@
float4 blurredPixel = 0;
for (int r = -iradius; r <= iradius; r ++) {
int validY = rsMax((y + r), 0);
- validY = rsMin(validY, (int)(p->dimY - 1));
+ validY = rsMin(validY, (int)(info->dim.y- 1));
const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride];
float4 pf = convert_float4(pvy[0]);
blurredPixel += pf * gPtr[0];
@@ -131,7 +131,7 @@
out[0] = blurredPixel;
}
-static void OneVU1(const RsExpandKernelParams *p, float *out, int32_t x, int32_t y,
+static void OneVU1(const RsExpandKernelDriverInfo *info, float *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x;
@@ -139,7 +139,7 @@
float blurredPixel = 0;
for (int r = -iradius; r <= iradius; r ++) {
int validY = rsMax((y + r), 0);
- validY = rsMin(validY, (int)(p->dimY - 1));
+ validY = rsMin(validY, (int)(info->dim.y - 1));
float pf = (float)pi[validY * iStride];
blurredPixel += pf * gPtr[0];
gPtr++;
@@ -247,13 +247,13 @@
}
}
-static void OneHU4(const RsExpandKernelParams *p, uchar4 *out, int32_t x,
+static void OneHU4(const RsExpandKernelDriverInfo *info, uchar4 *out, int32_t x,
const float4 *ptrIn, const float* gPtr, int iradius) {
float4 blurredPixel = 0;
for (int r = -iradius; r <= iradius; r ++) {
int validX = rsMax((x + r), 0);
- validX = rsMin(validX, (int)(p->dimX - 1));
+ validX = rsMin(validX, (int)(info->dim.x - 1));
float4 pf = ptrIn[validX];
blurredPixel += pf * gPtr[0];
gPtr++;
@@ -262,13 +262,13 @@
out->xyzw = convert_uchar4(blurredPixel);
}
-static void OneHU1(const RsExpandKernelParams *p, uchar *out, int32_t x,
+static void OneHU1(const RsExpandKernelDriverInfo *info, uchar *out, int32_t x,
const float *ptrIn, const float* gPtr, int iradius) {
float blurredPixel = 0;
for (int r = -iradius; r <= iradius; r ++) {
int validX = rsMax((x + r), 0);
- validX = rsMin(validX, (int)(p->dimX - 1));
+ validX = rsMin(validX, (int)(info->dim.x - 1));
float pf = ptrIn[validX];
blurredPixel += pf * gPtr[0];
gPtr++;
@@ -278,13 +278,13 @@
}
-void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
float4 stackbuf[2048];
float4 *buf = &stackbuf[0];
- RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr;
+ RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Blur executed without input, skipping");
return;
@@ -292,36 +292,37 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uchar4 *out = (uchar4 *)p->out;
+ uchar4 *out = (uchar4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS)
if (gArchUseSIMD) {
- rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * p->y), p->dimX, p->dimY,
- stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
+ rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * info->current.y),
+ info->dim.x, info->dim.y,
+ stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
return;
}
#endif
- if (p->dimX > 2048) {
- if ((p->dimX > cp->mScratchSize[p->lid]) || !cp->mScratch[p->lid]) {
+ if (info->dim.x > 2048) {
+ if ((info->dim.x > cp->mScratchSize[info->lid]) || !cp->mScratch[info->lid]) {
// Pad the side of the allocation by one unit to allow alignment later
- cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], (p->dimX + 1) * 16);
- cp->mScratchSize[p->lid] = p->dimX;
+ cp->mScratch[info->lid] = realloc(cp->mScratch[info->lid], (info->dim.x + 1) * 16);
+ cp->mScratchSize[info->lid] = info->dim.x;
}
// realloc only aligns to 8 bytes so we manually align to 16.
- buf = (float4 *) ((((intptr_t)cp->mScratch[p->lid]) + 15) & ~0xf);
+ buf = (float4 *) ((((intptr_t)cp->mScratch[info->lid]) + 15) & ~0xf);
}
float4 *fout = (float4 *)buf;
- int y = p->y;
- if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius))) {
+ int y = info->current.y;
+ if ((y > cp->mIradius) && (y < ((int)info->dim.y - cp->mIradius))) {
const uchar *pi = pin + (y - cp->mIradius) * stride;
- OneVFU4(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, p->dimX);
+ OneVFU4(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, info->dim.x);
} else {
x1 = 0;
- while(p->dimX > x1) {
- OneVU4(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
+ while(info->dim.x > x1) {
+ OneVU4(info, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
fout++;
x1++;
}
@@ -329,7 +330,7 @@
x1 = xstart;
while ((x1 < (uint32_t)cp->mIradius) && (x1 < x2)) {
- OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius);
+ OneHU4(info, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
@@ -344,17 +345,17 @@
}
#endif
while(x2 > x1) {
- OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius);
+ OneHU4(info, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
float buf[4 * 2048];
- RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr;
+ RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Blur executed without input, skipping");
return;
@@ -362,27 +363,27 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uchar *out = (uchar *)p->out;
+ uchar *out = (uchar *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
#if defined(ARCH_ARM_USE_INTRINSICS)
if (gArchUseSIMD) {
- rsdIntrinsicBlurU1_K(out, pin + stride * p->y, p->dimX, p->dimY,
- stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
+ rsdIntrinsicBlurU1_K(out, pin + stride * info->current.y, info->dim.x, info->dim.y,
+ stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius);
return;
}
#endif
float *fout = (float *)buf;
- int y = p->y;
- if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius -1))) {
+ int y = info->current.y;
+ if ((y > cp->mIradius) && (y < ((int)info->dim.y - cp->mIradius -1))) {
const uchar *pi = pin + (y - cp->mIradius) * stride;
- OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, p->dimX);
+ OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, info->dim.x);
} else {
x1 = 0;
- while(p->dimX > x1) {
- OneVU1(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
+ while(info->dim.x > x1) {
+ OneVU1(info, fout, x1, y, pin, stride, cp->mFp, cp->mIradius);
fout++;
x1++;
}
@@ -391,7 +392,7 @@
x1 = xstart;
while ((x1 < x2) &&
((x1 < (uint32_t)cp->mIradius) || (((uintptr_t)out) & 0x3))) {
- OneHU1(p, out, x1, buf, cp->mFp, cp->mIradius);
+ OneHU1(info, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
@@ -410,7 +411,7 @@
}
#endif
while(x2 > x1) {
- OneHU1(p, out, x1, buf, cp->mFp, cp->mIradius);
+ OneHU1(info, out, x1, buf, cp->mFp, cp->mIradius);
out++;
x1++;
}
diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
index e629dea..a7d576b 100644
--- a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
+++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
@@ -185,7 +185,7 @@
FunctionTab_t mFnTab;
#endif
- static void kernel(const RsExpandKernelParams *p,
+ static void kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
void updateCoeffCache(float fpMul, float addMul);
@@ -776,7 +776,7 @@
}
-static void One(const RsExpandKernelParams *p, void *out,
+static void One(const RsExpandKernelDriverInfo *info, void *out,
const void *py, const float* coeff, const float *add,
uint32_t vsin, uint32_t vsout, bool fin, bool fout) {
@@ -877,15 +877,15 @@
//ALOGE("out %p %f %f %f %f", out, ((float *)out)[0], ((float *)out)[1], ((float *)out)[2], ((float *)out)[3]);
}
-void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr;
+ RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)info->usr;
- uint32_t instep = p->inEStrides[0];
+ uint32_t instep = info->inStride[0];
- uchar *out = (uchar *)p->out;
- uchar *in = (uchar *)p->ins[0];
+ uchar *out = (uchar *)info->outPtr[0];
+ uchar *in = (uchar *)info->inPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -894,7 +894,7 @@
bool floatIn = !!cp->mLastKey.u.inType;
bool floatOut = !!cp->mLastKey.u.outType;
- //if (!p->y) ALOGE("steps %i %i %i %i", instep, outstep, vsin, vsout);
+ //if (!info->current.y) ALOGE("steps %i %i %i %i", instep, outstep, vsin, vsout);
if(x2 > x1) {
int32_t len = x2 - x1;
@@ -929,7 +929,7 @@
}
while(x1 != x2) {
- One(p, out, in, cp->tmpFp, cp->tmpFpa, vsin, vsout, floatIn, floatOut);
+ One(info, out, in, cp->tmpFp, cp->tmpFpa, vsin, vsout, floatIn, floatOut);
out += outstep;
in += instep;
x1++;
diff --git a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
index e3fa245..ce7be79 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
@@ -42,22 +42,22 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsExpandKernelParams *p,
+ static void kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU2(const RsExpandKernelParams *p,
+ static void kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU4(const RsExpandKernelParams *p,
+ static void kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF1(const RsExpandKernelParams *p,
+ static void kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF2(const RsExpandKernelParams *p,
+ static void kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF4(const RsExpandKernelParams *p,
+ static void kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -88,12 +88,12 @@
const void *y2, const short *coef, uint32_t count);
-static void ConvolveOneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
+static void ConvolveOneU4(const RsExpandKernelDriverInfo *info, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
float4 px = convert_float4(py0[x1]) * coeff[0] +
convert_float4(py0[x]) * coeff[1] +
@@ -110,12 +110,12 @@
*out = o;
}
-static void ConvolveOneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
+static void ConvolveOneU2(const RsExpandKernelDriverInfo *info, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
float2 px = convert_float2(py0[x1]) * coeff[0] +
convert_float2(py0[x]) * coeff[1] +
@@ -131,12 +131,12 @@
*out = convert_uchar2(px);
}
-static void ConvolveOneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
+static void ConvolveOneU1(const RsExpandKernelDriverInfo *info, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
float px = ((float)py0[x1]) * coeff[0] +
((float)py0[x]) * coeff[1] +
@@ -150,43 +150,43 @@
*out = clamp(px + 0.5f, 0.f, 255.f);
}
-static void ConvolveOneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
+static void ConvolveOneF4(const RsExpandKernelDriverInfo *info, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
*out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
(py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
+static void ConvolveOneF2(const RsExpandKernelDriverInfo *info, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
*out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
(py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
+static void ConvolveOneF1(const RsExpandKernelDriverInfo *info, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2,
const float* coeff) {
uint32_t x1 = rsMax((int32_t)x-1, 0);
- uint32_t x2 = rsMin((int32_t)x+1, (int32_t)p->dimX-1);
+ uint32_t x2 = rsMin((int32_t)x+1, (int32_t)info->dim.x-1);
*out = (py0[x1] * coeff[0]) + (py0[x] * coeff[1]) + (py0[x2] * coeff[2]) +
(py1[x1] * coeff[3]) + (py1[x] * coeff[4]) + (py1[x2] * coeff[5]) +
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -195,17 +195,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const uchar4 *py0 = (const uchar4 *)(pin + stride * y2);
- const uchar4 *py1 = (const uchar4 *)(pin + stride * p->y);
+ const uchar4 *py1 = (const uchar4 *)(pin + stride * info->current.y);
const uchar4 *py2 = (const uchar4 *)(pin + stride * y1);
- uchar4 *out = (uchar4 *)p->out;
+ uchar4 *out = (uchar4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneU4(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU4(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -223,17 +223,17 @@
#endif
while(x1 != x2) {
- ConvolveOneU4(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU4(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -242,17 +242,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const uchar2 *py0 = (const uchar2 *)(pin + stride * y2);
- const uchar2 *py1 = (const uchar2 *)(pin + stride * p->y);
+ const uchar2 *py1 = (const uchar2 *)(pin + stride * info->current.y);
const uchar2 *py2 = (const uchar2 *)(pin + stride * y1);
- uchar2 *out = (uchar2 *)p->out;
+ uchar2 *out = (uchar2 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneU2(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU2(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -268,17 +268,17 @@
#endif
while(x1 != x2) {
- ConvolveOneU2(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU2(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -287,17 +287,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const uchar *py0 = (const uchar *)(pin + stride * y2);
- const uchar *py1 = (const uchar *)(pin + stride * p->y);
+ const uchar *py1 = (const uchar *)(pin + stride * info->current.y);
const uchar *py2 = (const uchar *)(pin + stride * y1);
- uchar *out = (uchar *)p->out;
+ uchar *out = (uchar *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneU1(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU1(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -313,17 +313,17 @@
#endif
while(x1 != x2) {
- ConvolveOneU1(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneU1(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -332,17 +332,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const float4 *py0 = (const float4 *)(pin + stride * y2);
- const float4 *py1 = (const float4 *)(pin + stride * p->y);
+ const float4 *py1 = (const float4 *)(pin + stride * info->current.y);
const float4 *py2 = (const float4 *)(pin + stride * y1);
- float4 *out = (float4 *)p->out;
+ float4 *out = (float4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneF4(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF4(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -358,17 +358,17 @@
#endif
while(x1 != x2) {
- ConvolveOneF4(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF4(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -377,17 +377,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const float2 *py0 = (const float2 *)(pin + stride * y2);
- const float2 *py1 = (const float2 *)(pin + stride * p->y);
+ const float2 *py1 = (const float2 *)(pin + stride * info->current.y);
const float2 *py2 = (const float2 *)(pin + stride * y1);
- float2 *out = (float2 *)p->out;
+ float2 *out = (float2 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneF2(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF2(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -403,16 +403,16 @@
#endif
while(x1 != x2) {
- ConvolveOneF2(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF2(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Convolve3x3 executed without input, skipping");
@@ -421,17 +421,17 @@
const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
- uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ uint32_t y1 = rsMin((int32_t)info->current.y + 1, (int32_t)(info->dim.y-1));
+ uint32_t y2 = rsMax((int32_t)info->current.y - 1, 0);
const float *py0 = (const float *)(pin + stride * y2);
- const float *py1 = (const float *)(pin + stride * p->y);
+ const float *py1 = (const float *)(pin + stride * info->current.y);
const float *py2 = (const float *)(pin + stride * y1);
- float *out = (float *)p->out;
+ float *out = (float *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
if(x1 == 0) {
- ConvolveOneF1(p, 0, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF1(info, 0, out, py0, py1, py2, cp->mFp);
x1 ++;
out++;
}
@@ -447,7 +447,7 @@
#endif
while(x1 != x2) {
- ConvolveOneF1(p, x1, out, py0, py1, py2, cp->mFp);
+ ConvolveOneF1(info, x1, out, py0, py1, py2, cp->mFp);
out++;
x1++;
}
diff --git a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
index e591e44..29dd886 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
@@ -42,22 +42,22 @@
ObjectBaseRef<Allocation> alloc;
- static void kernelU1(const RsExpandKernelParams *p,
+ static void kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU2(const RsExpandKernelParams *p,
+ static void kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU4(const RsExpandKernelParams *p,
+ static void kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF1(const RsExpandKernelParams *p,
+ static void kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF2(const RsExpandKernelParams *p,
+ static void kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF4(const RsExpandKernelParams *p,
+ static void kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
@@ -86,15 +86,15 @@
}
-static void OneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
+static void OneU4(const RsExpandKernelDriverInfo *info, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2, const uchar4 *py3, const uchar4 *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float4 px = convert_float4(py0[x0]) * coeff[0] +
convert_float4(py0[x1]) * coeff[1] +
@@ -129,15 +129,15 @@
*out = convert_uchar4(px);
}
-static void OneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
+static void OneU2(const RsExpandKernelDriverInfo *info, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2, const uchar2 *py3, const uchar2 *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float2 px = convert_float2(py0[x0]) * coeff[0] +
convert_float2(py0[x1]) * coeff[1] +
@@ -172,15 +172,15 @@
*out = convert_uchar2(px);
}
-static void OneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
+static void OneU1(const RsExpandKernelDriverInfo *info, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2, const uchar *py3, const uchar *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float px = (float)(py0[x0]) * coeff[0] +
(float)(py0[x1]) * coeff[1] +
@@ -215,15 +215,15 @@
*out = px;
}
-static void OneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
+static void OneF4(const RsExpandKernelDriverInfo *info, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2, const float4 *py3, const float4 *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float4 px = py0[x0] * coeff[0] +
py0[x1] * coeff[1] +
@@ -257,15 +257,15 @@
*out = px;
}
-static void OneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
+static void OneF2(const RsExpandKernelDriverInfo *info, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2, const float2 *py3, const float2 *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float2 px = py0[x0] * coeff[0] +
py0[x1] * coeff[1] +
@@ -299,15 +299,15 @@
*out = px;
}
-static void OneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
+static void OneF1(const RsExpandKernelDriverInfo *info, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2, const float *py3, const float *py4,
const float* coeff) {
uint32_t x0 = rsMax((int32_t)x-2, 0);
uint32_t x1 = rsMax((int32_t)x-1, 0);
uint32_t x2 = x;
- uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1));
- uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1));
+ uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1));
+ uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1));
float px = py0[x0] * coeff[0] +
py0[x1] * coeff[1] +
@@ -346,10 +346,10 @@
const void *y2, const void *y3, const void *y4,
const short *coef, uint32_t count);
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -357,11 +357,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const uchar4 *py0 = (const uchar4 *)(pin + stride * y0);
const uchar4 *py1 = (const uchar4 *)(pin + stride * y1);
@@ -369,12 +369,12 @@
const uchar4 *py3 = (const uchar4 *)(pin + stride * y3);
const uchar4 *py4 = (const uchar4 *)(pin + stride * y4);
- uchar4 *out = (uchar4 *)p->out;
+ uchar4 *out = (uchar4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneU4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -400,16 +400,16 @@
#endif
while(x1 < x2) {
- OneU4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -417,11 +417,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const uchar2 *py0 = (const uchar2 *)(pin + stride * y0);
const uchar2 *py1 = (const uchar2 *)(pin + stride * y1);
@@ -429,12 +429,12 @@
const uchar2 *py3 = (const uchar2 *)(pin + stride * y3);
const uchar2 *py4 = (const uchar2 *)(pin + stride * y4);
- uchar2 *out = (uchar2 *)p->out;
+ uchar2 *out = (uchar2 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneU2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -449,16 +449,16 @@
#endif
while(x1 < x2) {
- OneU2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -466,11 +466,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const uchar *py0 = (const uchar *)(pin + stride * y0);
const uchar *py1 = (const uchar *)(pin + stride * y1);
@@ -478,12 +478,12 @@
const uchar *py3 = (const uchar *)(pin + stride * y3);
const uchar *py4 = (const uchar *)(pin + stride * y4);
- uchar *out = (uchar *)p->out;
+ uchar *out = (uchar *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneU1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -498,16 +498,16 @@
#endif
while(x1 < x2) {
- OneU1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneU1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -515,11 +515,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const float4 *py0 = (const float4 *)(pin + stride * y0);
const float4 *py1 = (const float4 *)(pin + stride * y1);
@@ -527,12 +527,12 @@
const float4 *py3 = (const float4 *)(pin + stride * y3);
const float4 *py4 = (const float4 *)(pin + stride * y4);
- float4 *out = (float4 *)p->out;
+ float4 *out = (float4 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneF4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -547,16 +547,16 @@
#endif
while(x1 < x2) {
- OneF4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -564,11 +564,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const float2 *py0 = (const float2 *)(pin + stride * y0);
const float2 *py1 = (const float2 *)(pin + stride * y1);
@@ -576,12 +576,12 @@
const float2 *py3 = (const float2 *)(pin + stride * y3);
const float2 *py4 = (const float2 *)(pin + stride * y4);
- float2 *out = (float2 *)p->out;
+ float2 *out = (float2 *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneF2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -596,16 +596,16 @@
#endif
while(x1 < x2) {
- OneF2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
+ RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
return;
@@ -613,11 +613,11 @@
const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
const size_t stride = cp->alloc->mHal.drvState.lod[0].stride;
- uint32_t y0 = rsMax((int32_t)p->y-2, 0);
- uint32_t y1 = rsMax((int32_t)p->y-1, 0);
- uint32_t y2 = p->y;
- uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1));
- uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1));
+ uint32_t y0 = rsMax((int32_t)info->current.y-2, 0);
+ uint32_t y1 = rsMax((int32_t)info->current.y-1, 0);
+ uint32_t y2 = info->current.y;
+ uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1));
+ uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1));
const float *py0 = (const float *)(pin + stride * y0);
const float *py1 = (const float *)(pin + stride * y1);
@@ -625,12 +625,12 @@
const float *py3 = (const float *)(pin + stride * y3);
const float *py4 = (const float *)(pin + stride * y4);
- float *out = (float *)p->out;
+ float *out = (float *)info->outPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
while((x1 < x2) && (x1 < 2)) {
- OneF1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
@@ -645,7 +645,7 @@
#endif
while(x1 < x2) {
- OneF1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp);
+ OneF1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp);
out++;
x1++;
}
diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp
index 4779187..fd60794 100644
--- a/cpu_ref/rsCpuIntrinsicHistogram.cpp
+++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp
@@ -49,29 +49,29 @@
int *mSums;
ObjectBaseRef<Allocation> mAllocOut;
- static void kernelP1U4(const RsExpandKernelParams *p,
+ static void kernelP1U4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1U3(const RsExpandKernelParams *p,
+ static void kernelP1U3(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1U2(const RsExpandKernelParams *p,
+ static void kernelP1U2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1U1(const RsExpandKernelParams *p,
+ static void kernelP1U1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1L4(const RsExpandKernelParams *p,
+ static void kernelP1L4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1L3(const RsExpandKernelParams *p,
+ static void kernelP1L3(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1L2(const RsExpandKernelParams *p,
+ static void kernelP1L2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelP1L1(const RsExpandKernelParams *p,
+ static void kernelP1L1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
@@ -166,61 +166,61 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * 4 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * 4 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 2) ] ++;
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
sums[(in[3] << 2) + 3] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * 4 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * 4 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 2) ] ++;
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * 2 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * 2 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 1) ] ++;
sums[(in[1] << 1) + 1] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]) +
@@ -228,69 +228,69 @@
(cp->mDotI[2] * in[2]) +
(cp->mDotI[3] * in[3]);
sums[(t + 0x7f) >> 8] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]) +
(cp->mDotI[1] * in[1]) +
(cp->mDotI[2] * in[2]);
sums[(t + 0x7f) >> 8] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]) +
(cp->mDotI[1] * in[1]);
sums[(t + 0x7f) >> 8] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]);
sums[(t + 0x7f) >> 8] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->ins[0];
- int * sums = &cp->mSums[256 * p->lid];
+ RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
+ uchar *in = (uchar *)info->inPtr[0];
+ int * sums = &cp->mSums[256 * info->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[in[0]] ++;
- in += p->inEStrides[0];
+ in += info->inStride[0];
}
}
diff --git a/cpu_ref/rsCpuIntrinsicLUT.cpp b/cpu_ref/rsCpuIntrinsicLUT.cpp
index b08a0e5..622fe1e 100644
--- a/cpu_ref/rsCpuIntrinsicLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsicLUT.cpp
@@ -38,7 +38,7 @@
protected:
ObjectBaseRef<Allocation> lut;
- static void kernel(const RsExpandKernelParams *p,
+ static void kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -53,13 +53,13 @@
}
-void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)p->usr;
+ RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)info->usr;
- uchar *out = (uchar *)p->out;
- const uchar *in = (uchar *)p->ins[0];
+ uchar *out = (uchar *)info->outPtr[0];
+ const uchar *in = (uchar *)info->inPtr[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp
index 2c51b5a..5668d96 100644
--- a/cpu_ref/rsCpuIntrinsicResize.cpp
+++ b/cpu_ref/rsCpuIntrinsicResize.cpp
@@ -46,22 +46,22 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsExpandKernelParams *p,
+ static void kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU2(const RsExpandKernelParams *p,
+ static void kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelU4(const RsExpandKernelParams *p,
+ static void kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF1(const RsExpandKernelParams *p,
+ static void kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF2(const RsExpandKernelParams *p,
+ static void kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
- static void kernelF4(const RsExpandKernelParams *p,
+ static void kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -255,10 +255,10 @@
return p;
}
-void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -269,7 +269,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -283,7 +283,7 @@
const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
- uchar4 *out = ((uchar4 *)p->out) + xstart;
+ uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -295,10 +295,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -309,7 +309,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -323,7 +323,7 @@
const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
- uchar2 *out = ((uchar2 *)p->out) + xstart;
+ uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -335,10 +335,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -349,7 +349,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -363,7 +363,7 @@
const uchar *yp2 = pin + stride * ys2;
const uchar *yp3 = pin + stride * ys3;
- uchar *out = ((uchar *)p->out) + xstart;
+ uchar *out = ((uchar *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -375,10 +375,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -389,7 +389,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -403,7 +403,7 @@
const float4 *yp2 = (const float4 *)(pin + stride * ys2);
const float4 *yp3 = (const float4 *)(pin + stride * ys3);
- float4 *out = ((float4 *)p->out) + xstart;
+ float4 *out = ((float4 *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -415,10 +415,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -429,7 +429,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -443,7 +443,7 @@
const float2 *yp2 = (const float2 *)(pin + stride * ys2);
const float2 *yp3 = (const float2 *)(pin + stride * ys3);
- float2 *out = ((float2 *)p->out) + xstart;
+ float2 *out = ((float2 *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -455,10 +455,10 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
+ RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
if (!cp->mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -469,7 +469,7 @@
const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
- float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
+ float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
int starty = (int) floor(yf - 1);
yf = yf - floor(yf);
int maxy = srcHeight - 1;
@@ -483,7 +483,7 @@
const float *yp2 = (const float *)(pin + stride * ys2);
const float *yp3 = (const float *)(pin + stride * ys3);
- float *out = ((float *)p->out) + xstart;
+ float *out = ((float *)info->outPtr[0]) + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index afd3cd3..395a158 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -46,7 +46,7 @@
protected:
ObjectBaseRef<Allocation> alloc;
- static void kernel(const RsExpandKernelParams *p,
+ static void kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
};
@@ -101,10 +101,10 @@
extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t xstart, size_t xend);
extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v, size_t xstart, size_t xend);
-void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsExpandKernelParams *p,
+void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)p->usr;
+ RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)info->usr;
if (!cp->alloc.get()) {
ALOGE("YuvToRGB executed without input, skipping");
return;
@@ -119,11 +119,11 @@
// calculate correct stride in legacy case
if (cp->alloc->mHal.drvState.lod[0].dimY == 0) {
- strideY = p->dimX;
+ strideY = info->dim.x;
}
- const uchar *Y = pinY + (p->y * strideY);
+ const uchar *Y = pinY + (info->current.y * strideY);
- uchar4 *out = (uchar4 *)p->out + xstart;
+ uchar4 *out = (uchar4 *)info->outPtr[0] + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -131,23 +131,23 @@
const uchar *pinU = (const uchar *)cp->alloc->mHal.drvState.lod[1].mallocPtr;
const size_t strideU = cp->alloc->mHal.drvState.lod[1].stride;
- const uchar *u = pinU + ((p->y >> 1) * strideU);
+ const uchar *u = pinU + ((info->current.y >> 1) * strideU);
const uchar *pinV = (const uchar *)cp->alloc->mHal.drvState.lod[2].mallocPtr;
const size_t strideV = cp->alloc->mHal.drvState.lod[2].stride;
- const uchar *v = pinV + ((p->y >> 1) * strideV);
+ const uchar *v = pinV + ((info->current.y >> 1) * strideV);
- //ALOGE("pinY, %p, Y, %p, p->y, %d, strideY, %d", pinY, Y, p->y, strideY);
- //ALOGE("pinU, %p, U, %p, p->y, %d, strideU, %d", pinU, u, p->y, strideU);
- //ALOGE("pinV, %p, V, %p, p->y, %d, strideV, %d", pinV, v, p->y, strideV);
+ //ALOGE("pinY, %p, Y, %p, info->current.y, %d, strideY, %d", pinY, Y, info->current.y, strideY);
+ //ALOGE("pinU, %p, U, %p, info->current.y, %d, strideU, %d", pinU, u, info->current.y, strideU);
+ //ALOGE("pinV, %p, V, %p, info->current.y, %d, strideV, %d", pinV, v, info->current.y, strideV);
//ALOGE("dimX, %d, dimY, %d", cp->alloc->mHal.drvState.lod[0].dimX, cp->alloc->mHal.drvState.lod[0].dimY);
- //ALOGE("p->dimX, %d, p->dimY, %d", p->dimX, p->dimY);
+ //ALOGE("info->dim.x, %d, info->dim.y, %d", info->dim.x, info->dim.y);
if (pinU == nullptr) {
// Legacy yuv support didn't fill in uv
v = ((uint8_t *)cp->alloc->mHal.drvState.lod[0].mallocPtr) +
- (strideY * p->dimY) +
- ((p->y >> 1) * strideY);
+ (strideY * info->dim.y) +
+ ((info->current.y >> 1) * strideY);
u = v + 1;
cstep = 2;
}
@@ -166,7 +166,7 @@
if((x2 > x1) && gArchUseSIMD) {
int32_t len = x2 - x1;
if (cstep == 1) {
- rsdIntrinsicYuv2_K(p->out, Y, u, v, x1, x2);
+ rsdIntrinsicYuv2_K(info->outPtr[0], Y, u, v, x1, x2);
x1 += len;
out += len;
} else if (cstep == 2) {
@@ -175,11 +175,11 @@
intptr_t ipv = (intptr_t)v;
if (ipu == (ipv + 1)) {
- rsdIntrinsicYuv_K(p->out, Y, v, x1, x2);
+ rsdIntrinsicYuv_K(info->outPtr[0], Y, v, x1, x2);
x1 += len;
out += len;
} else if (ipu == (ipv - 1)) {
- rsdIntrinsicYuvR_K(p->out, Y, u, x1, x2);
+ rsdIntrinsicYuvR_K(info->outPtr[0], Y, u, x1, x2);
x1 += len;
out += len;
}
@@ -188,7 +188,7 @@
#endif
if(x2 > x1) {
- // ALOGE("y %i %i %i", p->y, x1, x2);
+ // ALOGE("y %i %i %i", info->current.y, x1, x2);
while(x1 < x2) {
int cx = (x1 >> 1) * cstep;
*out = rsYuvToRGBA_uchar4(Y[x1], u[cx], v[cx]);
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index aaaa2a2..72da141 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -43,7 +43,7 @@
class RsdCpuScriptImpl : public RsdCpuReferenceImpl::CpuScript {
public:
typedef void (*outer_foreach_t)(
- const RsExpandKernelParams *,
+ const RsExpandKernelDriverInfo *,
uint32_t x1, uint32_t x2,
uint32_t outstep);
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 281a715..82208db 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -42,83 +42,83 @@
}
-typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams,
+typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
-void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
+void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
uint32_t xstart, uint32_t xend,
uint32_t outstep) {
- const ScriptList *sl = (const ScriptList *)kparams->usr;
- RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
+ const ScriptList *sl = (const ScriptList *)kinfo->usr;
+ RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
- const void **oldIns = mkparams->ins;
- uint32_t *oldStrides = mkparams->inEStrides;
-
- void *localIns[1];
- uint32_t localStride[1];
-
- mkparams->ins = (const void**)localIns;
- mkparams->inEStrides = localStride;
+ const uint32_t oldInStride = mkinfo->inStride[0];
for (size_t ct = 0; ct < sl->count; ct++) {
ScriptGroupRootFunc_t func;
func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
- mkparams->usr = sl->usrPtrs[ct];
+ mkinfo->usr = sl->usrPtrs[ct];
if (sl->ins[ct]) {
- localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+ rsAssert(kinfo->inLen == 1);
- localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
+ mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+
+ mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
if (sl->inExts[ct]) {
- localIns[0] = (void*)
- ((const uint8_t *)localIns[0] +
- sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y);
+ mkinfo->inPtr[0] =
+ (mkinfo->inPtr[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
- } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
- localIns[0] = (void*)
- ((const uint8_t *)localIns[0] +
- sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid);
+ } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
+ mkinfo->inPtr[0] =
+ (mkinfo->inPtr[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
}
} else {
- localIns[0] = nullptr;
- localStride[0] = 0;
+ rsAssert(kinfo->inLen == 0);
+
+ mkinfo->inPtr[0] = nullptr;
+ mkinfo->inStride[0] = 0;
}
uint32_t ostep;
if (sl->outs[ct]) {
- mkparams->out =
+ rsAssert(kinfo->outLen == 1);
+
+ mkinfo->outPtr[0] =
(uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
if (sl->outExts[ct]) {
- mkparams->out =
- (uint8_t *)mkparams->out +
- sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y;
+ mkinfo->outPtr[0] =
+ mkinfo->outPtr[0] +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
- } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
- mkparams->out =
- (uint8_t *)mkparams->out +
- sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
+ } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
+ mkinfo->outPtr[0] =
+ mkinfo->outPtr[0] +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
}
} else {
- mkparams->out = nullptr;
- ostep = 0;
+ rsAssert(kinfo->outLen == 0);
+
+ mkinfo->outPtr[0] = nullptr;
+ ostep = 0;
}
//ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
- func(kparams, xstart, xend, ostep);
+ func(kinfo, xstart, xend, ostep);
}
//ALOGE("script group root");
- mkparams->ins = oldIns;
- mkparams->inEStrides = oldStrides;
- mkparams->usr = sl;
+ mkinfo->inStride[0] = oldInStride;
+ mkinfo->usr = sl;
}
diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h
index 50ba2ac..acfe754 100644
--- a/cpu_ref/rsCpuScriptGroup.h
+++ b/cpu_ref/rsCpuScriptGroup.h
@@ -33,7 +33,7 @@
CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg);
bool init();
- static void scriptGroupRoot(const RsExpandKernelParams *p,
+ static void scriptGroupRoot(const RsExpandKernelDriverInfo *info,
uint32_t xstart, uint32_t xend,
uint32_t outstep);
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index 915fa4d..2e50ecb 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -16,7 +16,6 @@
#endif
#include "cpu_ref/rsCpuCore.h"
-#include "cpu_ref/rsCpuCoreRuntime.h"
#include "rsClosure.h"
#include "rsContext.h"
#include "rsCpuCore.h"
@@ -36,21 +35,21 @@
const size_t DefaultKernelArgCount = 2;
-void groupRoot(const RsExpandKernelParams *kparams, uint32_t xstart,
+void groupRoot(const RsExpandKernelDriverInfo *kinfo, uint32_t xstart,
uint32_t xend, uint32_t outstep) {
- const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kparams->usr;
- RsExpandKernelParams *mutable_kparams = (RsExpandKernelParams *)kparams;
- const void **oldIns = kparams->ins;
- uint32_t *oldStrides = kparams->inEStrides;
+ const List<CPUClosure*>& closures = *(List<CPUClosure*>*)kinfo->usr;
+ RsExpandKernelDriverInfo *mutable_kinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
- std::vector<const void*> ins(DefaultKernelArgCount);
- std::vector<uint32_t> strides(DefaultKernelArgCount);
+ const size_t oldInLen = mutable_kinfo->inLen;
+
+ decltype(mutable_kinfo->inStride) oldInStride;
+ memcpy(&oldInStride, &mutable_kinfo->inStride, sizeof(oldInStride));
for (CPUClosure* cpuClosure : closures) {
const Closure* closure = cpuClosure->mClosure;
- auto in_iter = ins.begin();
- auto stride_iter = strides.begin();
+ // There had better be enough space in mutable_kinfo
+ rsAssert(closure->mNumArg <= RS_KERNEL_INPUT_LIMIT);
for (size_t i = 0; i < closure->mNumArg; i++) {
const void* arg = closure->mArgs[i];
@@ -58,31 +57,30 @@
const uint32_t eStride = a->mHal.state.elementSizeBytes;
const uint8_t* ptr = (uint8_t*)(a->mHal.drvState.lod[0].mallocPtr) +
eStride * xstart;
- if (kparams->dimY > 1) {
- ptr += a->mHal.drvState.lod[0].stride * kparams->y;
+ if (kinfo->dim.y > 1) {
+ ptr += a->mHal.drvState.lod[0].stride * kinfo->current.y;
}
- *in_iter++ = ptr;
- *stride_iter++ = eStride;
+ mutable_kinfo->inPtr[i] = ptr;
+ mutable_kinfo->inStride[i] = eStride;
}
-
- mutable_kparams->ins = &ins[0];
- mutable_kparams->inEStrides = &strides[0];
+ mutable_kinfo->inLen = closure->mNumArg;
const Allocation* out = closure->mReturnValue;
const uint32_t ostep = out->mHal.state.elementSizeBytes;
const uint8_t* ptr = (uint8_t *)(out->mHal.drvState.lod[0].mallocPtr) +
ostep * xstart;
- if (kparams->dimY > 1) {
- ptr += out->mHal.drvState.lod[0].stride * kparams->y;
+ if (kinfo->dim.y > 1) {
+ ptr += out->mHal.drvState.lod[0].stride * kinfo->current.y;
}
- mutable_kparams->out = (void*)ptr;
+ rsAssert(kinfo->outLen <= 1);
+ mutable_kinfo->outPtr[0] = const_cast<uint8_t*>(ptr);
- cpuClosure->mFunc(kparams, xstart, xend, ostep);
+ cpuClosure->mFunc(kinfo, xstart, xend, ostep);
}
- mutable_kparams->ins = oldIns;
- mutable_kparams->inEStrides = oldStrides;
+ mutable_kinfo->inLen = oldInLen;
+ memcpy(&mutable_kinfo->inStride, &oldInStride, sizeof(oldInStride));
}
} // namespace
diff --git a/cpu_ref/rsCpuScriptGroup2.h b/cpu_ref/rsCpuScriptGroup2.h
index 3074cc8..1883f90 100644
--- a/cpu_ref/rsCpuScriptGroup2.h
+++ b/cpu_ref/rsCpuScriptGroup2.h
@@ -4,7 +4,7 @@
#include "rsd_cpu.h"
#include "rsList.h"
-struct RsExpandKernelParams;
+struct RsExpandKernelDriverInfo;
namespace android {
namespace renderscript {
@@ -15,9 +15,7 @@
class ScriptExecutable;
class ScriptGroup2;
-typedef ::RsExpandKernelParams RsExpandKernelParams;
-
-typedef void (*ExpandFuncTy)(const RsExpandKernelParams*, uint32_t, uint32_t,
+typedef void (*ExpandFuncTy)(const RsExpandKernelDriverInfo*, uint32_t, uint32_t,
uint32_t);
typedef void (*InvokeFuncTy)(const void*, uint32_t);
diff --git a/driver/rsdRuntimeStubs.cpp b/driver/rsdRuntimeStubs.cpp
index d14e687..aef6448 100644
--- a/driver/rsdRuntimeStubs.cpp
+++ b/driver/rsdRuntimeStubs.cpp
@@ -93,7 +93,6 @@
OPAQUETYPE(rs_mesh);
OPAQUETYPE(rs_font);
-OPAQUETYPE(rs_path);
#undef OPAQUETYPE
@@ -1407,9 +1406,6 @@
{ "_Z13rsClearObjectP10rs_sampler", (void *)&SC_ClearObject, true },
{ "_Z13rsClearObjectP9rs_script", (void *)&SC_ClearObject, true },
- { "_Z11rsSetObjectP7rs_pathS_", (void *)&SC_SetObject, true },
- { "_Z13rsClearObjectP7rs_path", (void *)&SC_ClearObject, true },
- { "_Z10rsIsObject7rs_path", (void *)&SC_IsObject, true },
{ "_Z11rsSetObjectP7rs_meshS_", (void *)&SC_SetObject, true },
{ "_Z13rsClearObjectP7rs_mesh", (void *)&SC_ClearObject, true },
@@ -1788,7 +1784,6 @@
IS_CLEAR_SET_OBJ(::rs_sampler, _Z10rsIsObject10rs_sampler, _Z11rsSetObjectP10rs_samplerS_)
IS_CLEAR_SET_OBJ(::rs_script, _Z10rsIsObject9rs_script, _Z11rsSetObjectP9rs_scriptS_)
-IS_CLEAR_SET_OBJ(::rs_path, _Z10rsIsObject7rs_path, _Z11rsSetObjectP7rs_pathS_)
IS_CLEAR_SET_OBJ(::rs_mesh, _Z10rsIsObject7rs_mesh, _Z11rsSetObjectP7rs_meshS_)
IS_CLEAR_SET_OBJ(::rs_program_fragment, _Z10rsIsObject19rs_program_fragment, _Z11rsSetObjectP19rs_program_fragmentS_)
IS_CLEAR_SET_OBJ(::rs_program_vertex, _Z10rsIsObject17rs_program_vertex, _Z11rsSetObjectP17rs_program_vertexS_)
diff --git a/driver/runtime/rs_core.c b/driver/runtime/rs_core.c
index 3fd48a1..856c42b 100644
--- a/driver/runtime/rs_core.c
+++ b/driver/runtime/rs_core.c
@@ -177,16 +177,64 @@
return (int)rsRand((float)min, (float)max);
}
+extern uint32_t __attribute__((overloadable)) rsGetArray0(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[0];
+}
+
+extern uint32_t __attribute__((overloadable)) rsGetArray1(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[1];
+}
+
+extern uint32_t __attribute__((overloadable)) rsGetArray2(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[2];
+}
+
+extern uint32_t __attribute__((overloadable)) rsGetArray3(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[3];
+}
+
+extern rs_allocation_cubemap_face __attribute__((overloadable)) rsGetFace(rs_kernel_context ctxt) {
+ return (rs_allocation_cubemap_face)(((struct RsExpandKernelDriverInfo *)ctxt)->current.face);
+}
+
+extern uint32_t __attribute__((overloadable)) rsGetLod(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->current.lod;
+}
+
extern uint32_t __attribute__((overloadable)) rsGetDimX(rs_kernel_context ctxt) {
- return ((struct RsExpandKernelParams *)ctxt)->dimX;
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.x;
}
extern uint32_t __attribute__((overloadable)) rsGetDimY(rs_kernel_context ctxt) {
- return ((struct RsExpandKernelParams *)ctxt)->dimY;
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.y;
}
extern uint32_t __attribute__((overloadable)) rsGetDimZ(rs_kernel_context ctxt) {
- return ((struct RsExpandKernelParams *)ctxt)->dimZ;
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.z;
+}
+
+extern uint32_t __attribute__((overloadable)) rsGetDimArray0(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[0];
+}
+
+extern uint32_t __attribute__((overloadable)) rsGetDimArray1(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[1];
+}
+
+extern uint32_t __attribute__((overloadable)) rsGetDimArray2(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[2];
+}
+
+extern uint32_t __attribute__((overloadable)) rsGetDimArray3(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[3];
+}
+
+extern bool __attribute__((overloadable)) rsGetDimHasFaces(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.face != 0;
+}
+
+extern uint32_t __attribute__((overloadable)) rsGetDimLod(rs_kernel_context ctxt) {
+ return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.lod;
}
#define PRIM_DEBUG(T) \
diff --git a/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java b/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java
index 63a722f..e2de83a 100644
--- a/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java
+++ b/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java
@@ -68,6 +68,7 @@
unitTests.add(new UT_kernel2d(this, mRes, mCtx));
// unitTests.add(new UT_kernel3d(this, mRes, mCtx)); // does not pass reliably yet
unitTests.add(new UT_kernel2d_oldstyle(this, mRes, mCtx));
+ unitTests.add(new UT_ctxt_default(this, mRes, mCtx));
unitTests.add(new UT_bug_char(this, mRes, mCtx));
unitTests.add(new UT_clamp(this, mRes, mCtx));
unitTests.add(new UT_clamp_relaxed(this, mRes, mCtx));
diff --git a/java/tests/RsTest/src/com/android/rs/test/UT_ctxt_default.java b/java/tests/RsTest/src/com/android/rs/test/UT_ctxt_default.java
new file mode 100644
index 0000000..9993ca4
--- /dev/null
+++ b/java/tests/RsTest/src/com/android/rs/test/UT_ctxt_default.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.test;
+
+import android.content.Context;
+import android.content.res.Resources;
+import android.renderscript.*;
+import android.util.Log;
+
+public class UT_ctxt_default extends UnitTest {
+ private Resources mRes;
+ private Allocation A;
+ private Allocation B;
+
+ protected UT_ctxt_default(RSTestCore rstc, Resources res, Context ctx) {
+ super(rstc, "Kernel context default", ctx);
+ mRes = res;
+ }
+
+ private void initializeGlobals(RenderScript RS, ScriptC_ctxt_default s) {
+ Type.Builder typeBuilder = new Type.Builder(RS, Element.I32(RS));
+ int X = 2;
+ s.set_gDimX(X);
+ typeBuilder.setX(X);
+
+ A = Allocation.createTyped(RS, typeBuilder.create());
+ s.set_A(A);
+ B = Allocation.createTyped(RS, typeBuilder.create());
+ s.set_B(B);
+ return;
+ }
+
+ public void run() {
+ RenderScript pRS = RenderScript.create(mCtx);
+ ScriptC_ctxt_default s = new ScriptC_ctxt_default(pRS);
+ pRS.setMessageHandler(mRsMessage);
+ initializeGlobals(pRS, s);
+ s.forEach_init_vars(A);
+ s.forEach_root(A, B);
+ s.invoke_verify_root();
+ s.invoke_kernel_test();
+ pRS.finish();
+ waitForMessage();
+ pRS.destroy();
+ }
+}
diff --git a/java/tests/RsTest/src/com/android/rs/test/ctxt_default.rs b/java/tests/RsTest/src/com/android/rs/test/ctxt_default.rs
new file mode 100644
index 0000000..ba86ada
--- /dev/null
+++ b/java/tests/RsTest/src/com/android/rs/test/ctxt_default.rs
@@ -0,0 +1,68 @@
+#include "shared.rsh"
+
+rs_allocation A;
+rs_allocation B;
+uint32_t gDimX;
+static bool failed = false;
+
+void init_vars(int *out) {
+ *out = 7;
+}
+
+int RS_KERNEL root(int ain, rs_kernel_context ctxt, uint32_t x) {
+ _RS_ASSERT_EQU(rsGetArray0(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetArray1(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetArray2(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetArray3(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetFace(ctxt), RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+ _RS_ASSERT_EQU(rsGetLod(ctxt), 0);
+
+ _RS_ASSERT_EQU(rsGetDimY(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetDimZ(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetDimArray0(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetDimArray1(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetDimArray2(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetDimArray3(ctxt), 0);
+ _RS_ASSERT_EQU(rsGetDimHasFaces(ctxt), false);
+ _RS_ASSERT_EQU(rsGetDimLod(ctxt), 0);
+
+ if (!_RS_ASSERT_EQU(ain, 7))
+ rsDebug("root at x", x);
+ uint32_t dimX = rsGetDimX(ctxt);
+ _RS_ASSERT_EQU(dimX, gDimX);
+ return ain + x;
+}
+
+static bool test_root_output() {
+ bool failed = false;
+ int i;
+
+ for (i = 0; i < gDimX; i++) {
+ int bElt = rsGetElementAt_int(B, i);
+ int aElt = rsGetElementAt_int(A, i);
+ if (!_RS_ASSERT_EQU(bElt, (aElt + i)))
+ rsDebug("test_root_output at i", i);
+ }
+
+ if (failed) {
+ rsDebug("ctxt_default test_root_output FAILED", 0);
+ }
+ else {
+ rsDebug("ctxt_default test_root_output PASSED", 0);
+ }
+
+ return failed;
+}
+
+void verify_root() {
+ failed |= test_root_output();
+}
+
+void kernel_test() {
+ if (failed) {
+ rsSendToClientBlocking(RS_MSG_TEST_FAILED);
+ }
+ else {
+ rsSendToClientBlocking(RS_MSG_TEST_PASSED);
+ }
+}
diff --git a/rsDriverLoader.cpp b/rsDriverLoader.cpp
index b245b4c..37bd12a 100644
--- a/rsDriverLoader.cpp
+++ b/rsDriverLoader.cpp
@@ -144,7 +144,7 @@
ret &= fn(RS_HAL_GRAPHICS_FRAGMENT_DESTROY, (void **)&rsc->mHal.funcs.fragment.destroy);
ret &= fn(RS_HAL_GRAPHICS_MESH_INIT, (void **)&rsc->mHal.funcs.mesh.init);
ret &= fn(RS_HAL_GRAPHICS_MESH_DRAW, (void **)&rsc->mHal.funcs.mesh.draw);
- ret &= fn(RS_HAL_GRAPHICS_MESH_DESTROY, (void **)&rsc->mHal.funcs);
+ ret &= fn(RS_HAL_GRAPHICS_MESH_DESTROY, (void **)&rsc->mHal.funcs.mesh.destroy);
ret &= fn(RS_HAL_GRAPHICS_FB_INIT, (void **)&rsc->mHal.funcs.framebuffer.init);
ret &= fn(RS_HAL_GRAPHICS_FB_SET_ACTIVE, (void **)&rsc->mHal.funcs.framebuffer.setActive);
ret &= fn(RS_HAL_GRAPHICS_FB_DESTROY, (void **)&rsc->mHal.funcs.framebuffer.destroy);
diff --git a/rs_hal.h b/rs_hal.h
index f8e9e4b..b4070ac 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -34,6 +34,10 @@
* rsdHalAbort() will be called to clean up any partially
* allocated state.
*
+ * A driver should return FALSE for any conditions that will
+ * prevent the driver from working normally.
+ *
+ *
* If these are successful, the driver will be loaded and used
* normally. Teardown will use the normal
* context->mHal.funcs.shutdown() path. There will be no call
@@ -97,7 +101,6 @@
#ifndef __LP64__
typedef struct { const int* p; } __attribute__((packed, aligned(4))) rs_mesh;
-typedef struct { const int* p; } __attribute__((packed, aligned(4))) rs_path;
typedef struct { const int* p; } __attribute__((packed, aligned(4))) rs_program_fragment;
typedef struct { const int* p; } __attribute__((packed, aligned(4))) rs_program_vertex;
typedef struct { const int* p; } __attribute__((packed, aligned(4))) rs_program_raster;
@@ -469,8 +472,15 @@
* implementation of the function. If it does not have an entry
* for an enum, its should set the function pointer to NULL
*
+ * Returning NULL is expected in cases during development as new
+ * entry points are added that a driver may not understand. If
+ * the runtime receives a NULL it will decide if the function is
+ * required and will either continue loading or abort as needed.
+ *
+ *
* return: False will abort loading the driver, true indicates
* success
+ *
*/
bool rsdHalQueryHal(android::renderscript::RsHalInitEnums entry, void **fnPtr);
diff --git a/scriptc/rs_core.rsh b/scriptc/rs_core.rsh
index b67db6a..0aed990 100644
--- a/scriptc/rs_core.rsh
+++ b/scriptc/rs_core.rsh
@@ -200,6 +200,48 @@
#if (defined(RS_VERSION) && (RS_VERSION >= 23))
/**
+ * Return Array0 coordinate of kernel launch described by the specified launch context.
+ * Returns 0 if Array0 dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetArray0(rs_kernel_context ctxt);
+
+/**
+ * Return Array1 coordinate of kernel launch described by the specified launch context.
+ * Returns 0 if Array1 dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetArray1(rs_kernel_context ctxt);
+
+/**
+ * Return Array2 coordinate of kernel launch described by the specified launch context.
+ * Returns 0 if Array2 dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetArray2(rs_kernel_context ctxt);
+
+/**
+ * Return Array3 coordinate of kernel launch described by the specified launch context.
+ * Returns 0 if Array3 dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetArray3(rs_kernel_context ctxt);
+
+/**
+ * Return Face coordinate of kernel launch described by the specified launch context.
+ * Returns RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X if Face dimension is not present.
+ */
+extern rs_allocation_cubemap_face __attribute__((overloadable))
+ rsGetFace(rs_kernel_context ctxt);
+
+/**
+ * Return Lod coordinate of kernel launch described by the specificed launch context.
+ * Returns 0 if Lod dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetLod(rs_kernel_context ctxt);
+
+/**
* Return X dimension of kernel launch described by the specified launch context.
*/
extern uint32_t __attribute__((overloadable))
@@ -219,6 +261,47 @@
extern uint32_t __attribute__((overloadable))
rsGetDimZ(rs_kernel_context ctxt);
+/**
+ * Return Array0 dimension of kernel launch described by the specificed launch context.
+ * Returns 0 if Array0 dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetDimArray0(rs_kernel_context ctxt);
+
+/**
+ * Return Array1 dimension of kernel launch described by the specificed launch context.
+ * Returns 0 if Array1 dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetDimArray1(rs_kernel_context ctxt);
+
+/**
+ * Return Array2 dimension of kernel launch described by the specificed launch context.
+ * Returns 0 if Array2 dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetDimArray2(rs_kernel_context ctxt);
+
+/**
+ * Return Array3 dimension of kernel launch described by the specificed launch context.
+ * Returns 0 if Array3 dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetDimArray3(rs_kernel_context ctxt);
+
+/**
+ * Is the Faces dimension present in the kernel launch described by the specified launch context?
+ */
+extern bool __attribute__((overloadable))
+ rsGetDimHasFaces(rs_kernel_context ctxt);
+
+/**
+ * Return Lod dimension of kernel launch described by the specificed launch context.
+ * Returns 0 if Lod dimension is not present.
+ */
+extern uint32_t __attribute__((overloadable))
+ rsGetDimLod(rs_kernel_context ctxt);
+
#endif // (defined(RS_VERSION) && (RS_VERSION >= 23))
#endif