Merge "Split the RsForEachStubParamStruct in two."
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 499f890..a0564fc 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -48,7 +48,7 @@
using namespace android::renderscript;
typedef void (*outer_foreach_t)(
- const android::renderscript::RsForEachStubParamStruct *,
+ const android::renderscript::RsExpandKernelParams *,
uint32_t x1, uint32_t x2,
uint32_t instep, uint32_t outstep);
@@ -353,17 +353,21 @@
static void wc_xy(void *usr, uint32_t idx) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- p.lid = idx;
- uint32_t sig = mtls->sig;
+
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
+
+ // Used by CpuScriptGroup, IntrinsicBlur, and IntrinsicHistogram
+ kparams.lid = idx;
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
while (1) {
- uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
- uint32_t yEnd = yStart + mtls->mSliceSize;
+ uint32_t yEnd = yStart + mtls->mSliceSize;
+
yEnd = rsMin(yEnd, mtls->yEnd);
+
if (yEnd <= yStart) {
return;
}
@@ -371,29 +375,39 @@
//ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
//ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
- for (p.y = yStart; p.y < yEnd; p.y++) {
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) +
- (mtls->fep.eStrideOut * mtls->xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) +
- (mtls->fep.eStrideIn * mtls->xStart);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
+ for (kparams.y = yStart; kparams.y < yEnd; kparams.y++) {
+ kparams.out = mtls->fep.ptrOut +
+ (mtls->fep.yStrideOut * kparams.y) +
+ (mtls->fep.eStrideOut * mtls->xStart);
+
+ kparams.in = mtls->fep.ptrIn +
+ (mtls->fep.yStrideIn * kparams.y) +
+ (mtls->fep.eStrideIn * mtls->xStart);
+
+
+ fn(&kparams, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn,
+ mtls->fep.eStrideOut);
}
}
}
static void wc_x(void *usr, uint32_t idx) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- p.lid = idx;
- uint32_t sig = mtls->sig;
+
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
+
+ // Used by CpuScriptGroup, IntrinsicBlur, and IntrisicHistogram
+ kparams.lid = idx;
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
while (1) {
- uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
- uint32_t xEnd = xStart + mtls->mSliceSize;
+ uint32_t xEnd = xStart + mtls->mSliceSize;
+
xEnd = rsMin(xEnd, mtls->xEnd);
+
if (xEnd <= xStart) {
return;
}
@@ -401,14 +415,15 @@
//ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
//ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
- p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
- fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
+ kparams.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
+ kparams.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
+
+ fn(&kparams, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
}
}
void RsdCpuReferenceImpl::launchThreads(const Allocation * ain, Allocation * aout,
- const RsScriptCall *sc, MTLaunchStruct *mtls) {
+ const RsScriptCall *sc, MTLaunchStruct *mtls) {
//android::StopWatch kernel_time("kernel time");
@@ -457,22 +472,34 @@
//ALOGE("launch 1");
} else {
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- uint32_t sig = mtls->sig;
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
//ALOGE("launch 3");
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
- for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
- for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
- uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] +
- mtls->fep.dimY * p.z + p.y;
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
- (mtls->fep.eStrideOut * mtls->xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) +
- (mtls->fep.eStrideIn * mtls->xStart);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
+ for (uint32_t arrayIndex = mtls->arrayStart;
+ arrayIndex < mtls->arrayEnd; arrayIndex++) {
+
+ for (kparams.z = mtls->zStart; kparams.z < mtls->zEnd;
+ kparams.z++) {
+
+ for (kparams.y = mtls->yStart; kparams.y < mtls->yEnd;
+ kparams.y++) {
+
+ uint32_t offset =
+ kparams.dimY * kparams.dimZ * arrayIndex +
+ kparams.dimY * kparams.z + kparams.y;
+
+ kparams.out = mtls->fep.ptrOut +
+ (mtls->fep.yStrideOut * offset) +
+ (mtls->fep.eStrideOut * mtls->xStart);
+
+ kparams.in = mtls->fep.ptrIn +
+ (mtls->fep.yStrideIn * offset) +
+ (mtls->fep.eStrideIn * mtls->xStart);
+
+ fn(&kparams, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn,
+ mtls->fep.eStrideOut);
}
}
}
@@ -529,41 +556,46 @@
//ALOGE("launch 1");
} else {
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- uint32_t sig = mtls->sig;
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
// Allocate space for our input base pointers.
- p.ins = new const void*[inLen];
+ kparams.ins = new const void*[inLen];
// Allocate space for our input stride information.
- p.eStrideIns = new uint32_t[inLen];
+ kparams.eStrideIns = new uint32_t[inLen];
// Fill our stride information.
- for (int index = inLen; --index >= 0;) {
- p.eStrideIns[index] = mtls->fep.inStrides[index].eStride;
+ for (int inIndex = inLen; --inIndex >= 0;) {
+ kparams.eStrideIns[inIndex] = mtls->fep.inStrides[inIndex].eStride;
}
//ALOGE("launch 3");
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- uint32_t offset_invariant = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0];
+ for (uint32_t arrayIndex = mtls->arrayStart;
+ arrayIndex < mtls->arrayEnd; arrayIndex++) {
- for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
- uint32_t offset_part = offset_invariant * p.ar[0];
+ for (kparams.z = mtls->zStart; kparams.z < mtls->zEnd;
+ kparams.z++) {
- for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
- for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
- uint32_t offset = offset_part + mtls->fep.dimY * p.z + p.y;
+ for (kparams.y = mtls->yStart; kparams.y < mtls->yEnd;
+ kparams.y++) {
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
- (mtls->fep.eStrideOut * mtls->xStart);
+ uint32_t offset =
+ mtls->fep.dimY * mtls->fep.dimZ * arrayIndex +
+ mtls->fep.dimY * kparams.z + kparams.y;
- for (int index = inLen; --index >= 0;) {
- StridePair &strides = mtls->fep.inStrides[index];
+ kparams.out = mtls->fep.ptrOut +
+ (mtls->fep.yStrideOut * offset) +
+ (mtls->fep.eStrideOut * mtls->xStart);
- p.ins[index] = mtls->fep.ptrIns[index] +
- (strides.yStride * offset) +
- (strides.eStride * mtls->xStart);
+ for (int inIndex = inLen; --inIndex >= 0;) {
+ StridePair &strides = mtls->fep.inStrides[inIndex];
+
+ kparams.ins[inIndex] =
+ mtls->fep.ptrIns[inIndex] +
+ (strides.yStride * offset) +
+ (strides.eStride * mtls->xStart);
}
/*
@@ -571,14 +603,15 @@
* kernels get their stride information from a member of p
* that points to an array.
*/
- fn(&p, mtls->xStart, mtls->xEnd, 0, mtls->fep.eStrideOut);
+ fn(&kparams, mtls->xStart, mtls->xEnd, 0,
+ mtls->fep.eStrideOut);
}
}
}
// Free our arrays.
- delete[] p.ins;
- delete[] p.eStrideIns;
+ delete[] kparams.ins;
+ delete[] kparams.eStrideIns;
}
}
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index c54dca2..5d4b6cc 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -34,32 +34,18 @@
namespace android {
namespace renderscript {
-typedef struct {
+struct StridePair {
uint32_t eStride;
uint32_t yStride;
-} StridePair;
+};
-typedef struct {
- const void *in;
- void *out;
+struct RsExpandKernelDriverInfo {
const void *usr;
uint32_t usrLen;
- uint32_t x;
- uint32_t y;
- uint32_t z;
- uint32_t lod;
- RsAllocationCubemapFace face;
- uint32_t ar[16];
-
- const void **ins;
- uint32_t *eStrideIns;
-
- uint32_t lid;
uint32_t dimX;
uint32_t dimY;
uint32_t dimZ;
- uint32_t dimArray;
const uint8_t *ptrIn;
uint8_t *ptrOut;
@@ -71,7 +57,54 @@
const uint8_t** ptrIns;
StridePair* inStrides;
-} RsForEachStubParamStruct;
+
+ ~RsExpandKernelDriverInfo() {
+ if (ptrIns != NULL) {
+ delete[] ptrIns;
+ }
+
+ if (inStrides != NULL) {
+ delete[] inStrides;
+ }
+ }
+};
+
+struct RsExpandKernelParams {
+
+ // Used by kernels
+ const void *in;
+ void *out;
+ uint32_t y;
+ uint32_t z;
+ uint32_t lid;
+
+ const void **ins;
+ uint32_t *eStrideIns;
+
+ // Used by ScriptGroup and user kernels.
+ const void *usr;
+
+ // Used by intrinsics
+ uint32_t dimX;
+ uint32_t dimY;
+ uint32_t dimZ;
+
+ /*
+ * FIXME: This is only used by the blend intrinsic. If possible, we should
+ * modify blur to not need it.
+ */
+ uint32_t slot;
+
+ /// Copy fields needed by a kernel from a driver struct.
+ void takeFields(const RsExpandKernelDriverInfo &dstruct) {
+ this->usr = dstruct.usr;
+ this->slot = dstruct.slot;
+
+ this->dimX = dstruct.dimX;
+ this->dimY = dstruct.dimY;
+ this->dimZ = dstruct.dimZ;
+ }
+};
extern bool gArchUseSIMD;
@@ -89,7 +122,7 @@
} ScriptTLSStruct;
typedef struct {
- RsForEachStubParamStruct fep;
+ RsExpandKernelDriverInfo fep;
RsdCpuReferenceImpl *rsc;
RsdCpuScriptImpl *script;
diff --git a/cpu_ref/rsCpuIntrinsic3DLUT.cpp b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
index a7c9487..c839c19 100644
--- a/cpu_ref/rsCpuIntrinsic3DLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
@@ -38,7 +38,7 @@
protected:
ObjectBaseRef<Allocation> mLUT;
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -58,7 +58,7 @@
int dimx, int dimy, int dimz);
-void RsdCpuScriptIntrinsic3DLUT::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index 228b887..b604658 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -33,7 +33,7 @@
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
protected:
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -110,7 +110,7 @@
extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
#endif
-void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index c1ca4e2..22e1176 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -44,10 +44,10 @@
int mIradius;
ObjectBaseRef<Allocation> mAlloc;
- static void kernelU4(const RsForEachStubParamStruct *p,
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
void ComputeGaussianWeights();
@@ -113,7 +113,7 @@
-static void OneVU4(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y,
+static void OneVU4(const RsExpandKernelParams *p, float4 *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x*4;
@@ -131,7 +131,7 @@
out->xyzw = blurredPixel;
}
-static void OneVU1(const RsForEachStubParamStruct *p, float *out, int32_t x, int32_t y,
+static void OneVU1(const RsExpandKernelParams *p, float *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x;
@@ -243,7 +243,7 @@
}
}
-static void OneHU4(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x,
+static void OneHU4(const RsExpandKernelParams *p, uchar4 *out, int32_t x,
const float4 *ptrIn, const float* gPtr, int iradius) {
float4 blurredPixel = 0;
@@ -258,7 +258,7 @@
out->xyzw = convert_uchar4(blurredPixel);
}
-static void OneHU1(const RsForEachStubParamStruct *p, uchar *out, int32_t x,
+static void OneHU1(const RsExpandKernelParams *p, uchar *out, int32_t x,
const float *ptrIn, const float* gPtr, int iradius) {
float blurredPixel = 0;
@@ -274,7 +274,7 @@
}
-void RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -345,7 +345,7 @@
}
}
-void RsdCpuScriptIntrinsicBlur::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
float buf[4 * 2048];
diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
index 9b234f4..a194048 100644
--- a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
+++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
@@ -187,7 +187,7 @@
FunctionTab_t mFnTab;
#endif
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
void updateCoeffCache(float fpMul, float addMul);
@@ -777,7 +777,7 @@
}
-static void One(const RsForEachStubParamStruct *p, void *out,
+static void One(const RsExpandKernelParams *p, void *out,
const void *py, const float* coeff, const float *add,
uint32_t vsin, uint32_t vsout, bool fin, bool fout) {
@@ -878,7 +878,7 @@
//ALOGE("out %p %f %f %f %f", out, ((float *)out)[0], ((float *)out)[1], ((float *)out)[2], ((float *)out)[3]);
}
-void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
index 552a835..d5af88c 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
@@ -42,22 +42,22 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU2(const RsForEachStubParamStruct *p,
+ static void kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU4(const RsForEachStubParamStruct *p,
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF1(const RsForEachStubParamStruct *p,
+ static void kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF2(const RsForEachStubParamStruct *p,
+ static void kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF4(const RsForEachStubParamStruct *p,
+ static void kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -88,7 +88,7 @@
const void *y2, const short *coef, uint32_t count);
-static void ConvolveOneU4(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
+static void ConvolveOneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
const float* coeff) {
@@ -110,7 +110,7 @@
*out = o;
}
-static void ConvolveOneU2(const RsForEachStubParamStruct *p, uint32_t x, uchar2 *out,
+static void ConvolveOneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2,
const float* coeff) {
@@ -131,7 +131,7 @@
*out = convert_uchar2(px);
}
-static void ConvolveOneU1(const RsForEachStubParamStruct *p, uint32_t x, uchar *out,
+static void ConvolveOneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2,
const float* coeff) {
@@ -150,7 +150,7 @@
*out = clamp(px, 0.f, 255.f);
}
-static void ConvolveOneF4(const RsForEachStubParamStruct *p, uint32_t x, float4 *out,
+static void ConvolveOneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2,
const float* coeff) {
@@ -161,7 +161,7 @@
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF2(const RsForEachStubParamStruct *p, uint32_t x, float2 *out,
+static void ConvolveOneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2,
const float* coeff) {
@@ -172,7 +172,7 @@
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF1(const RsForEachStubParamStruct *p, uint32_t x, float *out,
+static void ConvolveOneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2,
const float* coeff) {
@@ -183,7 +183,7 @@
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -230,7 +230,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -275,7 +275,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -320,7 +320,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -365,7 +365,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
@@ -409,7 +409,7 @@
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
index 48b5ca5..8421175 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
@@ -42,22 +42,22 @@
ObjectBaseRef<Allocation> alloc;
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU2(const RsForEachStubParamStruct *p,
+ static void kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU4(const RsForEachStubParamStruct *p,
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF1(const RsForEachStubParamStruct *p,
+ static void kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF2(const RsForEachStubParamStruct *p,
+ static void kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelF4(const RsForEachStubParamStruct *p,
+ static void kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
@@ -86,7 +86,7 @@
}
-static void OneU4(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
+static void OneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2, const uchar4 *py3, const uchar4 *py4,
const float* coeff) {
@@ -129,7 +129,7 @@
*out = convert_uchar4(px);
}
-static void OneU2(const RsForEachStubParamStruct *p, uint32_t x, uchar2 *out,
+static void OneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2, const uchar2 *py3, const uchar2 *py4,
const float* coeff) {
@@ -172,7 +172,7 @@
*out = convert_uchar2(px);
}
-static void OneU1(const RsForEachStubParamStruct *p, uint32_t x, uchar *out,
+static void OneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2, const uchar *py3, const uchar *py4,
const float* coeff) {
@@ -215,7 +215,7 @@
*out = px;
}
-static void OneF4(const RsForEachStubParamStruct *p, uint32_t x, float4 *out,
+static void OneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2, const float4 *py3, const float4 *py4,
const float* coeff) {
@@ -257,7 +257,7 @@
*out = px;
}
-static void OneF2(const RsForEachStubParamStruct *p, uint32_t x, float2 *out,
+static void OneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2, const float2 *py3, const float2 *py4,
const float* coeff) {
@@ -299,7 +299,7 @@
*out = px;
}
-static void OneF1(const RsForEachStubParamStruct *p, uint32_t x, float *out,
+static void OneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2, const float *py3, const float *py4,
const float* coeff) {
@@ -346,7 +346,7 @@
const void *y2, const void *y3, const void *y4,
const short *coef, uint32_t count);
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -406,7 +406,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -455,7 +455,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -504,7 +504,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -553,7 +553,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
@@ -602,7 +602,7 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp
index cdfe7d1..1c430b7 100644
--- a/cpu_ref/rsCpuIntrinsicHistogram.cpp
+++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp
@@ -49,29 +49,29 @@
int *mSums;
ObjectBaseRef<Allocation> mAllocOut;
- static void kernelP1U4(const RsForEachStubParamStruct *p,
+ static void kernelP1U4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1U3(const RsForEachStubParamStruct *p,
+ static void kernelP1U3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1U2(const RsForEachStubParamStruct *p,
+ static void kernelP1U2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1U1(const RsForEachStubParamStruct *p,
+ static void kernelP1U1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L4(const RsForEachStubParamStruct *p,
+ static void kernelP1L4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L3(const RsForEachStubParamStruct *p,
+ static void kernelP1L3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L2(const RsForEachStubParamStruct *p,
+ static void kernelP1L2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelP1L1(const RsForEachStubParamStruct *p,
+ static void kernelP1L1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
@@ -160,7 +160,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -177,7 +177,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -193,7 +193,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -208,7 +208,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -226,7 +226,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -243,7 +243,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -259,7 +259,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
@@ -274,7 +274,7 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
diff --git a/cpu_ref/rsCpuIntrinsicLUT.cpp b/cpu_ref/rsCpuIntrinsicLUT.cpp
index 5b2adc5..db73a83 100644
--- a/cpu_ref/rsCpuIntrinsicLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsicLUT.cpp
@@ -38,7 +38,7 @@
protected:
ObjectBaseRef<Allocation> lut;
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -53,7 +53,7 @@
}
-void RsdCpuScriptIntrinsicLUT::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicLoopFilter.cpp b/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
index c31fcdf..45f85e5 100644
--- a/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
+++ b/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
@@ -174,7 +174,7 @@
ObjectBaseRef<Allocation> mFrameBuffer;
void doLoopFilter();
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -182,7 +182,7 @@
}
}
-void RsdCpuScriptIntrinsicLoopFilter::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicLoopFilter::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicLoopFilter *cp = (RsdCpuScriptIntrinsicLoopFilter*)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp
index d18eb8f..af1127e 100644
--- a/cpu_ref/rsCpuIntrinsicResize.cpp
+++ b/cpu_ref/rsCpuIntrinsicResize.cpp
@@ -46,13 +46,13 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU2(const RsForEachStubParamStruct *p,
+ static void kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
- static void kernelU4(const RsForEachStubParamStruct *p,
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -179,7 +179,7 @@
return (uchar)p;
}
-void RsdCpuScriptIntrinsicResize::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
@@ -219,7 +219,7 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
@@ -259,7 +259,7 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index 563b3e1..d9ab98c 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -46,7 +46,7 @@
protected:
ObjectBaseRef<Allocation> alloc;
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
};
@@ -101,7 +101,7 @@
extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t xstart, size_t xend);
extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v, size_t xstart, size_t xend);
-void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)p->usr;
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index f4ca1ed..d51e9e3 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -47,7 +47,7 @@
class RsdCpuScriptImpl : public RsdCpuReferenceImpl::CpuScript {
public:
typedef void (*outer_foreach_t)(
- const RsForEachStubParamStruct *,
+ const RsExpandKernelParams *,
uint32_t x1, uint32_t x2,
uint32_t instep, uint32_t outstep);
#ifdef RS_COMPATIBILITY_LIB
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 40eddf2..1e42185 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -44,66 +44,71 @@
}
-typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
+typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);
-void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
+void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep) {
- const ScriptList *sl = (const ScriptList *)p->usr;
- RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
- const void *oldUsr = p->usr;
+ const ScriptList *sl = (const ScriptList *)kparams->usr;
+ RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
- for(size_t ct=0; ct < sl->count; ct++) {
+ for (size_t ct = 0; ct < sl->count; ct++) {
ScriptGroupRootFunc_t func;
- func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
- mp->usr = sl->usrPtrs[ct];
+ func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
+ mkparams->usr = sl->usrPtrs[ct];
- mp->ptrIn = NULL;
- mp->in = NULL;
- mp->ptrOut = NULL;
- mp->out = NULL;
+ mkparams->in = NULL;
+ mkparams->out = NULL;
uint32_t istep = 0;
uint32_t ostep = 0;
if (sl->ins[ct]) {
- mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+ mkparams->in =
+ (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+
istep = sl->ins[ct]->mHal.state.elementSizeBytes;
- mp->in = mp->ptrIn;
+
if (sl->inExts[ct]) {
- mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
- } else {
- if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
- mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
- }
+ mkparams->in =
+ (const uint8_t *)mkparams->in +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y;
+
+ } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
+ mkparams->in =
+ (const uint8_t *)mkparams->in +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid;
}
}
if (sl->outs[ct]) {
- mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
- mp->out = mp->ptrOut;
+ mkparams->out =
+ (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
+
ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
+
if (sl->outExts[ct]) {
- mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
- } else {
- if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
- mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
- }
+ mkparams->out =
+ (uint8_t *)mkparams->out +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y;
+
+ } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
+ mkparams->out =
+ (uint8_t *)mkparams->out +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
}
}
//ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
- func(p, xstart, xend, istep, ostep);
+ func(kparams, xstart, xend, istep, ostep);
}
//ALOGE("script group root");
- //ConvolveParams *cp = (ConvolveParams *)p->usr;
-
- mp->usr = oldUsr;
+ mkparams->usr = sl;
}
@@ -245,5 +250,3 @@
}
}
}
-
-
diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h
index 78e179d..71f2dd8 100644
--- a/cpu_ref/rsCpuScriptGroup.h
+++ b/cpu_ref/rsCpuScriptGroup.h
@@ -33,7 +33,7 @@
CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg);
bool init();
- static void scriptGroupRoot(const RsForEachStubParamStruct *p,
+ static void scriptGroupRoot(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
uint32_t instep, uint32_t outstep);