resolved conflicts for merge of adbc54f3 to lmp-dev-plus-aosp
Change-Id: I04f438a6fc4cd374a821c32102cc758d9a30d731
diff --git a/Android.mk b/Android.mk
index e8ec005..f6aab50 100644
--- a/Android.mk
+++ b/Android.mk
@@ -1,7 +1,7 @@
LOCAL_PATH:=$(call my-dir)
-rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable -fno-exceptions
+rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable -fno-exceptions -std=c++11
ifeq ($(TARGET_BUILD_PDK), true)
rs_base_CFLAGS += -D__RS_PDK__
endif
@@ -176,6 +176,9 @@
LOCAL_C_INCLUDES += external/libcxx/include
LOCAL_CFLAGS += $(rs_base_CFLAGS)
+# TODO: external/freetype still uses the register keyword
+# Bug: 17163086
+LOCAL_CFLAGS += -Wno-deprecated-register
LOCAL_CPPFLAGS += -fno-exceptions
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index aa26c6a..3bd7d1e 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -1,7 +1,8 @@
LOCAL_PATH:=$(call my-dir)
-rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable -fno-exceptions
+rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable \
+ -fno-exceptions -std=c++11
ifeq ($(TARGET_BUILD_PDK), true)
rs_base_CFLAGS += -D__RS_PDK__
endif
@@ -73,7 +74,7 @@
ifeq ($(ARCH_X86_HAVE_SSSE3),true)
LOCAL_CFLAGS += -DARCH_X86_HAVE_SSSE3
LOCAL_SRC_FILES+= \
- rsCpuIntrinsics_x86.c
+ rsCpuIntrinsics_x86.cpp
endif
LOCAL_SHARED_LIBRARIES += libRS libcutils libutils liblog libsync libc++
diff --git a/cpu_ref/linkloader/include/ELFObject.h b/cpu_ref/linkloader/include/ELFObject.h
index 0c195b9..86ac6bf 100644
--- a/cpu_ref/linkloader/include/ELFObject.h
+++ b/cpu_ref/linkloader/include/ELFObject.h
@@ -140,6 +140,11 @@
void *context,
ELFSectionRelTableTy *reltab,
ELFSectionProgBitsTy *text);
+
+ void relocateMIPS64(void *(*find_sym)(void *context, char const *name),
+ void *context,
+ ELFSectionRelTableTy *reltab,
+ ELFSectionProgBitsTy *text);
};
#include "impl/ELFObject.hxx"
diff --git a/cpu_ref/linkloader/include/ELFReloc.h b/cpu_ref/linkloader/include/ELFReloc.h
index 84754b9..a6d7f5e 100644
--- a/cpu_ref/linkloader/include/ELFReloc.h
+++ b/cpu_ref/linkloader/include/ELFReloc.h
@@ -134,13 +134,27 @@
public:
xword_t getSymTabIndex() const {
+#if defined(__mips__)
+/*
+ * Packed r_info on MIPS is:
+ * r_sym (4) - r_ssym (1) - r_type3 (1) - r_type2 (1) - r_type (1)
+ * Each entry represents up to three actual relocations.
+ * Thus, the macros look different.
+ */
+#define ELF64_R_SYM(i) ((i)&0xffffffffL)
+#else
#define ELF64_R_SYM(i) ((i)>>32)
+#endif
return ELF64_R_SYM(this->r_info);
#undef ELF64_R_SYM
}
xword_t getType() const {
+#if defined(__mips__)
+#define ELF64_R_TYPE(i) ((i)>>32)
+#else
#define ELF64_R_TYPE(i) ((i)&0xffffffffL)
+#endif
return ELF64_R_TYPE(this->r_info);
#undef ELF64_R_TYPE
}
diff --git a/cpu_ref/linkloader/include/GOT.h b/cpu_ref/linkloader/include/GOT.h
index b72bf66..c86ca77 100644
--- a/cpu_ref/linkloader/include/GOT.h
+++ b/cpu_ref/linkloader/include/GOT.h
@@ -20,10 +20,15 @@
#include "utils/rsl_assert.h"
#include "ELF.h"
-#define GP_OFFSET ((int)0x8000)
-#define GOT_SIZE (1 << 16) // bytes
-#define GOT_ENTRY_SIZE 4 // bytes
-#define NUM_OF_GOT_ENTRY (GOT_SIZE/GOT_ENTRY_SIZE)
+#define GP_OFFSET ((int)0x8000)
+#ifdef __LP64__
+#define GOT_SIZE (1 << 17) // bytes
+#define GOT_ENTRY_SIZE 8 // bytes
+#else
+#define GOT_SIZE (1 << 16) // bytes
+#define GOT_ENTRY_SIZE 4 // bytes
+#endif
+#define NUM_OF_GOT_ENTRY (GOT_SIZE/GOT_ENTRY_SIZE)
void *got_address();
int search_got(int symbol_index, void *addr, uint8_t bind_type);
diff --git a/cpu_ref/linkloader/include/impl/ELFObject.hxx b/cpu_ref/linkloader/include/impl/ELFObject.hxx
index 81736b5..d849a93 100644
--- a/cpu_ref/linkloader/include/impl/ELFObject.hxx
+++ b/cpu_ref/linkloader/include/impl/ELFObject.hxx
@@ -1022,6 +1022,167 @@
}
}
+template <unsigned Bitwidth>
+inline void ELFObject<Bitwidth>::
+relocateMIPS64(void *(*find_sym)(void *context, char const *name),
+ void *context,
+ ELFSectionRelTableTy *reltab,
+ ELFSectionProgBitsTy *text) {
+ ELFSectionSymTabTy *symtab =
+ static_cast<ELFSectionSymTabTy *>(getSectionByName(".symtab"));
+ rsl_assert(symtab && "Symtab is required.");
+
+ int64_t calculatedValue;
+ bool applyRelocation = true;
+ bool useCalculatedValue;
+
+ for (size_t i = 0; i < reltab->size(); ++i) {
+ ELFRelocTy *rel = (*reltab)[i];
+ ELFSymbolTy *sym = (*symtab)[rel->getSymTabIndex()];
+
+ typedef int64_t Inst_t;
+ Inst_t *inst = (Inst_t *)&(*text)[rel->getOffset()];
+ Inst_t P = (Inst_t)(uintptr_t)inst;
+ Inst_t A = (Inst_t)rel->getAddend();
+ Inst_t S = (Inst_t)(uintptr_t)sym->getAddress(EM_MIPS);
+
+ if (S == 0) {
+ S = (Inst_t)(uintptr_t)find_sym(context, sym->getName());
+ if (!S) {
+ missingSymbols = true;
+ }
+ sym->setAddress((void *)S);
+ }
+
+ uint8_t rtype[3];
+ rtype[0] = (rel->getType() >> 24) & 0xFF;
+ rtype[1] = (rel->getType() >> 16) & 0xFF;
+ rtype[2] = (rel->getType() >> 8) & 0xFF;
+
+ for (size_t j = 0; j < 3; ++j) {
+ useCalculatedValue = !applyRelocation;
+ if (j < 2) {
+ applyRelocation = (rtype[j+1] == R_MIPS_NONE);
+ } else if ((i + 1) < reltab->size()) {
+ // Enter here if there are more relocations left in the table
+ // and check if the next one affects the same instruction.
+ ELFRelocTy *next_rel = (*reltab)[i + 1];
+ Inst_t *next_inst = (Inst_t *)&(*text)[next_rel->getOffset()];
+ applyRelocation = (inst != next_inst);
+ }
+
+ if (useCalculatedValue) {
+ S = 0;
+ A = calculatedValue;
+ }
+
+ switch (rtype[j]) {
+ default:
+ rsl_assert(0 && "Not implemented relocation type.");
+ break;
+
+ case R_MIPS_NONE:
+ break;
+
+ case R_MIPS_64:
+ calculatedValue = S + A;
+ if (applyRelocation) {
+ *inst = calculatedValue;
+ }
+ break;
+
+ case R_MIPS_26:
+ if (sym->getBindingAttribute() == STB_LOCAL) {
+ // Local binding.
+ A |= ((P + 4) & 0xF0000000);
+ A += S;
+ calculatedValue = (A >> 2);
+ if (applyRelocation) {
+ *inst |= (calculatedValue & 0x3FFFFFF);
+ }
+ } else {
+ // External binding.
+ A += S;
+ calculatedValue = (A >> 2);
+ if (applyRelocation) {
+ *inst |= (calculatedValue & 0x3FFFFFF);
+ }
+ }
+ break;
+
+ case R_MIPS_CALL16:
+ case R_MIPS_GOT_PAGE:
+ case R_MIPS_GOT_DISP: {
+ A = A & 0xFFFF;
+ int got_index = search_got((int)rel->getSymTabIndex(),
+ (void *)(S + A),
+ sym->getBindingAttribute());
+ calculatedValue = (got_index << 3) - 0x7FF0;
+ if (applyRelocation) {
+ *inst |= (calculatedValue & 0xFFFF);
+ }
+ break;
+ }
+
+ case R_MIPS_GPREL32:
+ calculatedValue = A + S - ((int64_t)got_address() + 0x7FF0);
+ if (applyRelocation) {
+ *inst |= calculatedValue;
+ }
+ break;
+
+ case R_MIPS_GOT_OFST:
+ calculatedValue = (S + A) & 0xFFFF;
+ if (applyRelocation) {
+ *inst |= calculatedValue;
+ }
+ break;
+
+ case R_MIPS_GPREL16:
+ calculatedValue = A + S - ((int64_t)got_address() + 0x7FF0);
+ if (applyRelocation) {
+ *inst |= (calculatedValue & 0xFFFF);
+ }
+ break;
+
+ case R_MIPS_SUB:
+ calculatedValue = S - A;
+ if (applyRelocation) {
+ *inst = calculatedValue;
+ }
+ break;
+
+ case R_MIPS_HI16:
+ calculatedValue = ((S + A + 0x8000) >> 16) & 0xFFFF;
+ if (applyRelocation) {
+ *inst |= calculatedValue;
+ }
+ break;
+
+ case R_MIPS_LO16:
+ calculatedValue = (S + A) & 0xFFFF;
+ if (applyRelocation) {
+ *inst |= calculatedValue;
+ }
+ break;
+
+ case R_MIPS_HIGHER:
+ calculatedValue = ((S + A + 0x80008000) >> 32) & 0xFFFF;
+ if (applyRelocation) {
+ *inst |= calculatedValue;
+ }
+ break;
+
+ case R_MIPS_HIGHEST:
+ calculatedValue = ((S + A + 0x800080008000) >> 48) & 0xFFFF;
+ if (applyRelocation) {
+ *inst |= calculatedValue;
+ }
+ break;
+ }
+ }
+ }
+}
// TODO: Refactor all relocations.
template <unsigned Bitwidth>
@@ -1121,7 +1282,11 @@
relocateX86_64(find_sym, context, reltab, need_rel);
break;
case EM_MIPS:
- relocateMIPS(find_sym, context, reltab, need_rel);
+ if (getHeader()->getClass() == ELFCLASS64) {
+ relocateMIPS64(find_sym, context, reltab, need_rel);
+ } else {
+ relocateMIPS(find_sym, context, reltab, need_rel);
+ }
break;
default:
diff --git a/cpu_ref/linkloader/lib/GOT.cpp b/cpu_ref/linkloader/lib/GOT.cpp
index 3f523c5..7e85fb1 100644
--- a/cpu_ref/linkloader/lib/GOT.cpp
+++ b/cpu_ref/linkloader/lib/GOT.cpp
@@ -33,7 +33,12 @@
// For local symbols (R_MIPS_GOT16), we only store the high 16-bit value
// after adding 0x8000.
if (bind_type == STB_LOCAL)
+#ifdef __LP64__
+ addr = (void *)(((intptr_t)addr + 0x8000) & 0xFFFFFFFFFFFF0000);
+#else
addr = (void *)(((intptr_t)addr + 0x8000) & 0xFFFF0000);
+#endif
+
for (i = 0; i < got_symbol_count; i++) {
if (got_symbol_indexes[i] == symbol_index) {
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 3124ba1..752f169 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -48,9 +48,8 @@
using namespace android::renderscript;
typedef void (*outer_foreach_t)(
- const android::renderscript::RsForEachStubParamStruct *,
- uint32_t x1, uint32_t x2,
- uint32_t instep, uint32_t outstep);
+ const android::renderscript::RsExpandKernelParams *,
+ uint32_t x1, uint32_t x2, uint32_t outstep);
static pthread_key_t gThreadTLSKey = 0;
@@ -350,153 +349,132 @@
}
typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
+typedef void (*walk_loop_t)(MTLaunchStruct*,
+ RsExpandKernelParams&,
+ outer_foreach_t);
-static void wc_xy(void *usr, uint32_t idx) {
+
+static void walk_wrapper(void* usr, uint32_t idx, walk_loop_t walk_loop) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- p.lid = idx;
- uint32_t sig = mtls->sig;
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- while (1) {
- uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
- uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
- uint32_t yEnd = yStart + mtls->mSliceSize;
- yEnd = rsMin(yEnd, mtls->yEnd);
- if (yEnd <= yStart) {
- return;
- }
+ uint32_t inLen = mtls->fep.inLen;
- //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
- //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
- for (p.y = yStart; p.y < yEnd; p.y++) {
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) +
- (mtls->fep.eStrideOut * mtls->xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) +
- (mtls->fep.eStrideIn * mtls->xStart);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
+ // Used by CpuScriptGroup, IntrinsicBlur, and IntrinsicHistogram
+ kparams.lid = idx;
+
+ if (inLen > 0) {
+ // Allocate space for our input base pointers.
+ kparams.ins = (const void**)alloca(inLen * sizeof(void*));
+
+ // Allocate space for our input stride information.
+ kparams.inEStrides = (uint32_t*)alloca(inLen * sizeof(uint32_t));
+
+ // Fill our stride information.
+ for (int inIndex = inLen; --inIndex >= 0;) {
+ kparams.inEStrides[inIndex] = mtls->fep.inStrides[inIndex].eStride;
}
}
-}
-
-static void wc_x(void *usr, uint32_t idx) {
- MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- p.lid = idx;
- uint32_t sig = mtls->sig;
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- while (1) {
- uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
- uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
- uint32_t xEnd = xStart + mtls->mSliceSize;
- xEnd = rsMin(xEnd, mtls->xEnd);
- if (xEnd <= xStart) {
- return;
- }
- //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
- //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
-
- p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
- fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
- }
+ walk_loop(mtls, kparams, fn);
}
-void RsdCpuReferenceImpl::launchThreads(const Allocation * ain, Allocation * aout,
- const RsScriptCall *sc, MTLaunchStruct *mtls) {
+static void walk_2d(void *usr, uint32_t idx) {
+ walk_wrapper(usr, idx, [](MTLaunchStruct *mtls,
+ RsExpandKernelParams &kparams,
+ outer_foreach_t fn) {
- //android::StopWatch kernel_time("kernel time");
+ while (1) {
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
+ uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
+ uint32_t yEnd = yStart + mtls->mSliceSize;
- if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
- const size_t targetByteChunk = 16 * 1024;
- mInForEach = true;
- if (mtls->fep.dimY > 1) {
- uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4);
- uint32_t s2 = 0;
+ yEnd = rsMin(yEnd, mtls->yEnd);
- // This chooses our slice size to rate limit atomic ops to
- // one per 16k bytes of reads/writes.
- if (mtls->fep.yStrideOut) {
- s2 = targetByteChunk / mtls->fep.yStrideOut;
- } else {
- s2 = targetByteChunk / mtls->fep.yStrideIn;
- }
- mtls->mSliceSize = rsMin(s1, s2);
-
- if(mtls->mSliceSize < 1) {
- mtls->mSliceSize = 1;
+ if (yEnd <= yStart) {
+ return;
}
- // mtls->mSliceSize = 2;
- launchThreads(wc_xy, mtls);
- } else {
- uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4);
- uint32_t s2 = 0;
+ for (kparams.y = yStart; kparams.y < yEnd; kparams.y++) {
+ kparams.out = mtls->fep.outPtr +
+ (mtls->fep.outStride.yStride * kparams.y) +
+ (mtls->fep.outStride.eStride * mtls->xStart);
- // This chooses our slice size to rate limit atomic ops to
- // one per 16k bytes of reads/writes.
- if (mtls->fep.eStrideOut) {
- s2 = targetByteChunk / mtls->fep.eStrideOut;
- } else {
- s2 = targetByteChunk / mtls->fep.eStrideIn;
- }
- mtls->mSliceSize = rsMin(s1, s2);
+ for (int inIndex = mtls->fep.inLen; --inIndex >= 0;) {
+ StridePair &strides = mtls->fep.inStrides[inIndex];
- if(mtls->mSliceSize < 1) {
- mtls->mSliceSize = 1;
- }
-
- launchThreads(wc_x, mtls);
- }
- mInForEach = false;
-
- //ALOGE("launch 1");
- } else {
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- uint32_t sig = mtls->sig;
-
- //ALOGE("launch 3");
- outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
- for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
- for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
- uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] +
- mtls->fep.dimY * p.z + p.y;
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
- (mtls->fep.eStrideOut * mtls->xStart);
- p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) +
- (mtls->fep.eStrideIn * mtls->xStart);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
+ kparams.ins[inIndex] =
+ mtls->fep.inPtrs[inIndex] +
+ (strides.yStride * kparams.y) +
+ (strides.eStride * mtls->xStart);
}
+
+ fn(&kparams, mtls->xStart, mtls->xEnd,
+ mtls->fep.outStride.eStride);
}
}
- }
+ });
}
-void RsdCpuReferenceImpl::launchThreads(const Allocation** ains, uint32_t inLen, Allocation* aout,
- const RsScriptCall* sc, MTLaunchStruct* mtls) {
+static void walk_1d(void *usr, uint32_t idx) {
+ walk_wrapper(usr, idx, [](MTLaunchStruct *mtls,
+ RsExpandKernelParams &kparams,
+ outer_foreach_t fn) {
+
+ while (1) {
+ uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
+ uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
+ uint32_t xEnd = xStart + mtls->mSliceSize;
+
+ xEnd = rsMin(xEnd, mtls->xEnd);
+
+ if (xEnd <= xStart) {
+ return;
+ }
+
+ kparams.out = mtls->fep.outPtr +
+ (mtls->fep.outStride.eStride * xStart);
+
+ for (int inIndex = mtls->fep.inLen; --inIndex >= 0;) {
+ StridePair &strides = mtls->fep.inStrides[inIndex];
+
+ kparams.ins[inIndex] =
+ mtls->fep.inPtrs[inIndex] + (strides.eStride * xStart);
+ }
+
+ fn(&kparams, xStart, xEnd, mtls->fep.outStride.eStride);
+ }
+ });
+}
+
+
+void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains,
+ uint32_t inLen,
+ Allocation* aout,
+ const RsScriptCall* sc,
+ MTLaunchStruct* mtls) {
//android::StopWatch kernel_time("kernel time");
if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
const size_t targetByteChunk = 16 * 1024;
mInForEach = true;
+
if (mtls->fep.dimY > 1) {
uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4);
uint32_t s2 = 0;
// This chooses our slice size to rate limit atomic ops to
// one per 16k bytes of reads/writes.
- if (mtls->fep.yStrideOut) {
- s2 = targetByteChunk / mtls->fep.yStrideOut;
+ if (mtls->fep.outStride.yStride) {
+ s2 = targetByteChunk / mtls->fep.outStride.yStride;
} else {
- s2 = targetByteChunk / mtls->fep.yStrideIn;
+ // We know that there is either an output or an input.
+ s2 = targetByteChunk / mtls->fep.inStrides[0].yStride;
}
mtls->mSliceSize = rsMin(s1, s2);
@@ -504,18 +482,18 @@
mtls->mSliceSize = 1;
}
- // mtls->mSliceSize = 2;
- launchThreads(wc_xy, mtls);
+ launchThreads(walk_2d, mtls);
} else {
uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4);
uint32_t s2 = 0;
// This chooses our slice size to rate limit atomic ops to
// one per 16k bytes of reads/writes.
- if (mtls->fep.eStrideOut) {
- s2 = targetByteChunk / mtls->fep.eStrideOut;
+ if (mtls->fep.outStride.eStride) {
+ s2 = targetByteChunk / mtls->fep.outStride.eStride;
} else {
- s2 = targetByteChunk / mtls->fep.eStrideIn;
+ // We know that there is either an output or an input.
+ s2 = targetByteChunk / mtls->fep.inStrides[0].eStride;
}
mtls->mSliceSize = rsMin(s1, s2);
@@ -523,62 +501,61 @@
mtls->mSliceSize = 1;
}
- launchThreads(wc_x, mtls);
+ launchThreads(walk_1d, mtls);
}
mInForEach = false;
- //ALOGE("launch 1");
} else {
- RsForEachStubParamStruct p;
- memcpy(&p, &mtls->fep, sizeof(p));
- uint32_t sig = mtls->sig;
+ RsExpandKernelParams kparams;
+ kparams.takeFields(mtls->fep);
- // Allocate space for our input base pointers.
- p.ins = new const void*[inLen];
+ if (inLen > 0) {
+ // Allocate space for our input base pointers.
+ kparams.ins = (const void**)alloca(inLen * sizeof(void*));
- // Allocate space for our input stride information.
- p.eStrideIns = new uint32_t[inLen];
+ // Allocate space for our input stride information.
+ kparams.inEStrides = (uint32_t*)alloca(inLen * sizeof(uint32_t));
- // Fill our stride information.
- for (int index = inLen; --index >= 0;) {
- p.eStrideIns[index] = mtls->fep.inStrides[index].eStride;
+ // Fill our stride information.
+ for (int inIndex = inLen; --inIndex >= 0;) {
+ kparams.inEStrides[inIndex] =
+ mtls->fep.inStrides[inIndex].eStride;
+ }
}
//ALOGE("launch 3");
outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
- uint32_t offset_invariant = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0];
+ for (uint32_t arrayIndex = mtls->arrayStart;
+ arrayIndex < mtls->arrayEnd; arrayIndex++) {
- for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
- uint32_t offset_part = offset_invariant * p.ar[0];
+ for (kparams.z = mtls->zStart; kparams.z < mtls->zEnd;
+ kparams.z++) {
- for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
- for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
- uint32_t offset = offset_part + mtls->fep.dimY * p.z + p.y;
+ for (kparams.y = mtls->yStart; kparams.y < mtls->yEnd;
+ kparams.y++) {
- p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
- (mtls->fep.eStrideOut * mtls->xStart);
+ uint32_t offset =
+ mtls->fep.dimY * mtls->fep.dimZ * arrayIndex +
+ mtls->fep.dimY * kparams.z + kparams.y;
- for (int index = inLen; --index >= 0;) {
- StridePair &strides = mtls->fep.inStrides[index];
+ kparams.out = mtls->fep.outPtr +
+ (mtls->fep.outStride.yStride * offset) +
+ (mtls->fep.outStride.eStride * mtls->xStart);
- p.ins[index] = mtls->fep.ptrIns[index] +
- (strides.yStride * offset) +
- (strides.eStride * mtls->xStart);
+ for (int inIndex = inLen; --inIndex >= 0;) {
+ StridePair &strides = mtls->fep.inStrides[inIndex];
+
+ kparams.ins[inIndex] =
+ mtls->fep.inPtrs[inIndex] +
+ (strides.yStride * offset) +
+ (strides.eStride * mtls->xStart);
}
- /*
- * The fourth argument is zero here because multi-input
- * kernels get their stride information from a member of p
- * that points to an array.
- */
- fn(&p, mtls->xStart, mtls->xEnd, 0, mtls->fep.eStrideOut);
+ fn(&kparams, mtls->xStart, mtls->xEnd,
+ mtls->fep.outStride.eStride);
}
}
}
-
- // Free our arrays.
- delete[] p.ins;
- delete[] p.eStrideIns;
}
}
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index c54dca2..2fea3fc 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -25,6 +25,8 @@
#include <string>
+#define RS_KERNEL_INPUT_THRESHOLD 32
+
namespace bcc {
class BCCContext;
class RSCompilerDriver;
@@ -34,44 +36,80 @@
namespace android {
namespace renderscript {
-typedef struct {
+struct StridePair {
uint32_t eStride;
uint32_t yStride;
-} StridePair;
+};
-typedef struct {
- const void *in;
- void *out;
- const void *usr;
- uint32_t usrLen;
- uint32_t x;
- uint32_t y;
- uint32_t z;
- uint32_t lod;
- RsAllocationCubemapFace face;
- uint32_t ar[16];
+struct RsExpandKernelDriverInfo {
+ const uint8_t **inPtrs;
+ uint32_t inLen;
- const void **ins;
- uint32_t *eStrideIns;
+ uint8_t *outPtr;
- uint32_t lid;
+ StridePair *inStrides;
+ StridePair outStride;
uint32_t dimX;
uint32_t dimY;
uint32_t dimZ;
- uint32_t dimArray;
- const uint8_t *ptrIn;
- uint8_t *ptrOut;
- uint32_t eStrideIn;
- uint32_t eStrideOut;
- uint32_t yStrideIn;
- uint32_t yStrideOut;
uint32_t slot;
- const uint8_t** ptrIns;
- StridePair* inStrides;
-} RsForEachStubParamStruct;
+ const void *usr;
+ uint32_t usrLen;
+
+ bool heapAllocatedArrays;
+
+ RsExpandKernelDriverInfo() : heapAllocatedArrays(false) {}
+
+ ~RsExpandKernelDriverInfo() {
+ if (heapAllocatedArrays) {
+ if (inPtrs != NULL) {
+ delete[] inPtrs;
+ }
+
+ if (inStrides != NULL) {
+ delete[] inStrides;
+ }
+ }
+ }
+};
+
+struct RsExpandKernelParams {
+
+ // Used by kernels
+ const void **ins;
+ uint32_t *inEStrides;
+ void *out;
+ uint32_t y;
+ uint32_t z;
+ uint32_t lid;
+
+ // Used by ScriptGroup and user kernels.
+ const void *usr;
+
+ // Used by intrinsics
+ uint32_t dimX;
+ uint32_t dimY;
+ uint32_t dimZ;
+
+ /*
+ * FIXME: This is only used by the blend intrinsic. If possible, we should
+ * modify blur to not need it.
+ */
+ uint32_t slot;
+
+ /// Copy fields needed by a kernel from a driver struct.
+ void takeFields(const RsExpandKernelDriverInfo &dstruct) {
+ this->usr = dstruct.usr;
+ this->slot = dstruct.slot;
+
+ this->dimX = dstruct.dimX;
+ this->dimY = dstruct.dimY;
+ this->dimZ = dstruct.dimZ;
+ }
+};
extern bool gArchUseSIMD;
@@ -82,21 +120,21 @@
class RsdCpuScriptImpl;
class RsdCpuReferenceImpl;
-typedef struct ScriptTLSStructRec {
+struct ScriptTLSStruct {
android::renderscript::Context * mContext;
const android::renderscript::Script * mScript;
RsdCpuScriptImpl *mImpl;
-} ScriptTLSStruct;
+};
-typedef struct {
- RsForEachStubParamStruct fep;
+struct MTLaunchStruct {
+ RsExpandKernelDriverInfo fep;
RsdCpuReferenceImpl *rsc;
RsdCpuScriptImpl *script;
ForEachFunc_t kernel;
uint32_t sig;
- const Allocation * ain;
+ const Allocation ** ains;
Allocation * aout;
uint32_t mSliceSize;
@@ -112,12 +150,9 @@
uint32_t arrayStart;
uint32_t arrayEnd;
- // Multi-input data.
- const Allocation ** ains;
-} MTLaunchStruct;
-
-
-
+ const uint8_t *inPtrsBuff[RS_KERNEL_INPUT_THRESHOLD];
+ StridePair inStridesBuff[RS_KERNEL_INPUT_THRESHOLD];
+};
class RsdCpuReferenceImpl : public RsdCpuReference {
public:
@@ -138,9 +173,6 @@
return mWorkers.mCount + 1;
}
- void launchThreads(const Allocation * ain, Allocation * aout,
- const RsScriptCall *sc, MTLaunchStruct *mtls);
-
void launchThreads(const Allocation** ains, uint32_t inLen, Allocation* aout,
const RsScriptCall* sc, MTLaunchStruct* mtls);
diff --git a/cpu_ref/rsCpuIntrinsic.cpp b/cpu_ref/rsCpuIntrinsic.cpp
index 5a7fffd..8437c99 100644
--- a/cpu_ref/rsCpuIntrinsic.cpp
+++ b/cpu_ref/rsCpuIntrinsic.cpp
@@ -73,54 +73,29 @@
}
-void RsdCpuScriptIntrinsic::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc) {
+void RsdCpuScriptIntrinsic::preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {
}
-void RsdCpuScriptIntrinsic::postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc) {
+void RsdCpuScriptIntrinsic::postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {
}
void RsdCpuScriptIntrinsic::invokeForEach(uint32_t slot,
- const Allocation * ain,
+ const Allocation ** ains,
+ uint32_t inLen,
Allocation * aout,
const void * usr,
uint32_t usrLen,
const RsScriptCall *sc) {
MTLaunchStruct mtls;
- preLaunch(slot, ain, aout, usr, usrLen, sc);
- forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
- mtls.script = this;
- mtls.fep.slot = slot;
-
- mtls.kernel = (void (*)())mRootPtr;
- mtls.fep.usr = this;
-
- RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
- mCtx->launchThreads(ain, aout, sc, &mtls);
- mCtx->setTLS(oldTLS);
-
- postLaunch(slot, ain, aout, usr, usrLen, sc);
-}
-
-void RsdCpuScriptIntrinsic::invokeForEachMulti(uint32_t slot,
- const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) {
-
- MTLaunchStruct mtls;
- /*
- * FIXME: Possibly create new preLaunch and postLaunch functions that take
- * all of the input allocation pointers.
- */
- preLaunch(slot, ains[0], aout, usr, usrLen, sc);
+ preLaunch(slot, ains, inLen, aout, usr, usrLen, sc);
forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
mtls.script = this;
@@ -133,7 +108,7 @@
mCtx->launchThreads(ains, inLen, aout, sc, &mtls);
mCtx->setTLS(oldTLS);
- postLaunch(slot, ains[0], aout, usr, usrLen, sc);
+ postLaunch(slot, ains, inLen, aout, usr, usrLen, sc);
}
void RsdCpuScriptIntrinsic::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
diff --git a/cpu_ref/rsCpuIntrinsic.h b/cpu_ref/rsCpuIntrinsic.h
index bf6a8ac..95aaa14 100644
--- a/cpu_ref/rsCpuIntrinsic.h
+++ b/cpu_ref/rsCpuIntrinsic.h
@@ -28,43 +28,42 @@
public:
virtual void populateScript(Script *) = 0;
- virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength);
+ virtual void invokeFunction(uint32_t slot, const void * params,
+ size_t paramLength);
virtual int invokeRoot();
+
virtual void invokeForEach(uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc);
+ const Allocation ** ain,
+ uint32_t inLen,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc);
- virtual void invokeForEachMulti(uint32_t slot,
- const Allocation ** ain,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc);
-
- virtual void forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls);
+ virtual void forEachKernelSetup(uint32_t slot, MTLaunchStruct * mtls);
virtual void invokeInit();
virtual void invokeFreeChildren();
- virtual void preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc);
- virtual void postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc);
+ virtual void preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout, const void * usr,
+ uint32_t usrLen, const RsScriptCall * sc);
+ virtual void postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall * sc);
- virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength);
- virtual void setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
- const Element *e, const uint32_t *dims, size_t dimLength);
+ virtual void setGlobalVar(uint32_t slot, const void * data,
+ size_t dataLength);
+ virtual void setGlobalVarWithElemDims(uint32_t slot, const void * data,
+ size_t dataLength, const Element * e,
+ const uint32_t * dims,
+ size_t dimLength);
virtual void setGlobalBind(uint32_t slot, Allocation *data);
virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
virtual ~RsdCpuScriptIntrinsic();
- RsdCpuScriptIntrinsic(RsdCpuReferenceImpl *ctx, const Script *s, const Element *,
- RsScriptIntrinsicID iid);
+ RsdCpuScriptIntrinsic(RsdCpuReferenceImpl * ctx, const Script * s,
+ const Element * e, RsScriptIntrinsicID iid);
protected:
RsScriptIntrinsicID mID;
diff --git a/cpu_ref/rsCpuIntrinsic3DLUT.cpp b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
index a7c9487..ce7c9c6 100644
--- a/cpu_ref/rsCpuIntrinsic3DLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
@@ -38,9 +38,9 @@
protected:
ObjectBaseRef<Allocation> mLUT;
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
};
}
@@ -58,13 +58,13 @@
int dimx, int dimy, int dimz);
-void RsdCpuScriptIntrinsic3DLUT::kernel(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t outstep) {
RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)p->usr;
uchar4 *out = (uchar4 *)p->out + xstart;
- uchar4 *in = (uchar4 *)p->in + xstart;
+ uchar4 *in = (uchar4 *)p->ins[0] + xstart;
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -161,9 +161,9 @@
}
}
-RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx,
- const Script *s, const Element *e)
- : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) {
+RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT(
+ RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) :
+ RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) {
mRootPtr = &kernel;
}
@@ -185,5 +185,3 @@
return new RsdCpuScriptIntrinsic3DLUT(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index 228b887..2beec3d 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -33,9 +33,8 @@
RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
protected:
- static void kernel(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ static void kernel(const RsExpandKernelParams *p, uint32_t xstart,
+ uint32_t xend, uint32_t outstep);
};
}
@@ -110,14 +109,14 @@
extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
#endif
-void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
// instep/outstep can be ignored--sizeof(uchar4) known at compile time
uchar4 *out = (uchar4 *)p->out;
- uchar4 *in = (uchar4 *)p->in;
+ uchar4 *in = (uchar4 *)p->ins[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -509,6 +508,3 @@
const Script *s, const Element *e) {
return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
}
-
-
-
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index c1ca4e2..7f888e9 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -44,12 +44,12 @@
int mIradius;
ObjectBaseRef<Allocation> mAlloc;
- static void kernelU4(const RsForEachStubParamStruct *p,
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelU1(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
void ComputeGaussianWeights();
};
@@ -113,7 +113,7 @@
-static void OneVU4(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y,
+static void OneVU4(const RsExpandKernelParams *p, float4 *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x*4;
@@ -131,7 +131,7 @@
out->xyzw = blurredPixel;
}
-static void OneVU1(const RsForEachStubParamStruct *p, float *out, int32_t x, int32_t y,
+static void OneVU1(const RsExpandKernelParams *p, float *out, int32_t x, int32_t y,
const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
const uchar *pi = ptrIn + x;
@@ -243,7 +243,7 @@
}
}
-static void OneHU4(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x,
+static void OneHU4(const RsExpandKernelParams *p, uchar4 *out, int32_t x,
const float4 *ptrIn, const float* gPtr, int iradius) {
float4 blurredPixel = 0;
@@ -258,7 +258,7 @@
out->xyzw = convert_uchar4(blurredPixel);
}
-static void OneHU1(const RsForEachStubParamStruct *p, uchar *out, int32_t x,
+static void OneHU1(const RsExpandKernelParams *p, uchar *out, int32_t x,
const float *ptrIn, const float* gPtr, int iradius) {
float blurredPixel = 0;
@@ -274,9 +274,9 @@
}
-void RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
float4 stackbuf[2048];
float4 *buf = &stackbuf[0];
@@ -345,9 +345,9 @@
}
}
-void RsdCpuScriptIntrinsicBlur::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
float buf[4 * 2048];
RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr;
if (!cp->mAlloc.get()) {
@@ -464,5 +464,3 @@
return new RsdCpuScriptIntrinsicBlur(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
index 19894bc..e298d94 100644
--- a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
+++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
@@ -169,10 +169,9 @@
virtual ~RsdCpuScriptIntrinsicColorMatrix();
RsdCpuScriptIntrinsicColorMatrix(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
- virtual void preLaunch(uint32_t slot, const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen, const RsScriptCall *sc);
- virtual void postLaunch(uint32_t slot, const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen, const RsScriptCall *sc);
+ virtual void preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout, const void * usr,
+ uint32_t usrLen, const RsScriptCall *sc);
protected:
float fp[16];
@@ -188,9 +187,9 @@
FunctionTab_t mFnTab;
#endif
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
void updateCoeffCache(float fpMul, float addMul);
Key_t mLastKey;
@@ -778,7 +777,7 @@
}
-static void One(const RsForEachStubParamStruct *p, void *out,
+static void One(const RsExpandKernelParams *p, void *out,
const void *py, const float* coeff, const float *add,
uint32_t vsin, uint32_t vsout, bool fin, bool fout) {
@@ -879,10 +878,13 @@
//ALOGE("out %p %f %f %f %f", out, ((float *)out)[0], ((float *)out)[1], ((float *)out)[2], ((float *)out)[3]);
}
-void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr;
+
+ uint32_t instep = p->inEStrides[0];
+
uchar *out = (uchar *)p->out;
uchar *in = (uchar *)p->in;
uint32_t x1 = xstart;
@@ -932,11 +934,15 @@
}
}
-void RsdCpuScriptIntrinsicColorMatrix::preLaunch(
- uint32_t slot, const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen, const RsScriptCall *sc) {
+void RsdCpuScriptIntrinsicColorMatrix::preLaunch(uint32_t slot,
+ const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc) {
- const Element *ein = ain->mHal.state.type->getElement();
+ const Element *ein = ains[0]->mHal.state.type->getElement();
const Element *eout = aout->mHal.state.type->getElement();
if (ein->getType() == eout->getType()) {
@@ -953,8 +959,8 @@
}
}
- Key_t key = computeKey(ain->mHal.state.type->getElement(),
- aout->mHal.state.type->getElement());
+ Key_t key = computeKey(ein, eout);
+
#if defined(ARCH_X86_HAVE_SSSE3)
if ((mOptKernel == NULL) || (mLastKey.key != key.key)) {
// FIXME: Disable mOptKernel to pass RS color matrix CTS cases
@@ -996,12 +1002,6 @@
#endif //if !defined(ARCH_X86_HAVE_SSSE3)
}
-void RsdCpuScriptIntrinsicColorMatrix::postLaunch(
- uint32_t slot, const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen, const RsScriptCall *sc) {
-
-}
-
RsdCpuScriptIntrinsicColorMatrix::RsdCpuScriptIntrinsicColorMatrix(
RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
: RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX) {
diff --git a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
index 552a835..f9b70cc 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
@@ -42,24 +42,24 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelU2(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelU4(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelF1(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelF2(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelF4(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
};
}
@@ -88,7 +88,7 @@
const void *y2, const short *coef, uint32_t count);
-static void ConvolveOneU4(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
+static void ConvolveOneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
const float* coeff) {
@@ -110,7 +110,7 @@
*out = o;
}
-static void ConvolveOneU2(const RsForEachStubParamStruct *p, uint32_t x, uchar2 *out,
+static void ConvolveOneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2,
const float* coeff) {
@@ -131,7 +131,7 @@
*out = convert_uchar2(px);
}
-static void ConvolveOneU1(const RsForEachStubParamStruct *p, uint32_t x, uchar *out,
+static void ConvolveOneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2,
const float* coeff) {
@@ -150,7 +150,7 @@
*out = clamp(px, 0.f, 255.f);
}
-static void ConvolveOneF4(const RsForEachStubParamStruct *p, uint32_t x, float4 *out,
+static void ConvolveOneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2,
const float* coeff) {
@@ -161,7 +161,7 @@
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF2(const RsForEachStubParamStruct *p, uint32_t x, float2 *out,
+static void ConvolveOneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2,
const float* coeff) {
@@ -172,7 +172,7 @@
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-static void ConvolveOneF1(const RsForEachStubParamStruct *p, uint32_t x, float *out,
+static void ConvolveOneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2,
const float* coeff) {
@@ -183,9 +183,9 @@
(py2[x1] * coeff[6]) + (py2[x] * coeff[7]) + (py2[x2] * coeff[8]);
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
if (!cp->mAlloc.get()) {
@@ -230,9 +230,9 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
if (!cp->mAlloc.get()) {
@@ -275,9 +275,9 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
if (!cp->mAlloc.get()) {
@@ -320,9 +320,9 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
if (!cp->mAlloc.get()) {
@@ -365,9 +365,9 @@
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
if (!cp->mAlloc.get()) {
@@ -409,9 +409,9 @@
}
}
}
-void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
if (!cp->mAlloc.get()) {
@@ -507,5 +507,3 @@
return new RsdCpuScriptIntrinsicConvolve3x3(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
index ffa9543..e263e74 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
@@ -42,24 +42,24 @@
ObjectBaseRef<Allocation> alloc;
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelU2(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelU4(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelF1(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelF2(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelF4(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
};
@@ -86,7 +86,7 @@
}
-static void OneU4(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
+static void OneU4(const RsExpandKernelParams *p, uint32_t x, uchar4 *out,
const uchar4 *py0, const uchar4 *py1, const uchar4 *py2, const uchar4 *py3, const uchar4 *py4,
const float* coeff) {
@@ -129,7 +129,7 @@
*out = convert_uchar4(px);
}
-static void OneU2(const RsForEachStubParamStruct *p, uint32_t x, uchar2 *out,
+static void OneU2(const RsExpandKernelParams *p, uint32_t x, uchar2 *out,
const uchar2 *py0, const uchar2 *py1, const uchar2 *py2, const uchar2 *py3, const uchar2 *py4,
const float* coeff) {
@@ -172,7 +172,7 @@
*out = convert_uchar2(px);
}
-static void OneU1(const RsForEachStubParamStruct *p, uint32_t x, uchar *out,
+static void OneU1(const RsExpandKernelParams *p, uint32_t x, uchar *out,
const uchar *py0, const uchar *py1, const uchar *py2, const uchar *py3, const uchar *py4,
const float* coeff) {
@@ -215,7 +215,7 @@
*out = px;
}
-static void OneF4(const RsForEachStubParamStruct *p, uint32_t x, float4 *out,
+static void OneF4(const RsExpandKernelParams *p, uint32_t x, float4 *out,
const float4 *py0, const float4 *py1, const float4 *py2, const float4 *py3, const float4 *py4,
const float* coeff) {
@@ -257,7 +257,7 @@
*out = px;
}
-static void OneF2(const RsForEachStubParamStruct *p, uint32_t x, float2 *out,
+static void OneF2(const RsExpandKernelParams *p, uint32_t x, float2 *out,
const float2 *py0, const float2 *py1, const float2 *py2, const float2 *py3, const float2 *py4,
const float* coeff) {
@@ -299,7 +299,7 @@
*out = px;
}
-static void OneF1(const RsForEachStubParamStruct *p, uint32_t x, float *out,
+static void OneF1(const RsExpandKernelParams *p, uint32_t x, float *out,
const float *py0, const float *py1, const float *py2, const float *py3, const float *py4,
const float* coeff) {
@@ -346,9 +346,9 @@
const void *y2, const void *y3, const void *y4,
const short *coef, uint32_t count);
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
@@ -406,9 +406,9 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
@@ -455,9 +455,9 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
@@ -504,9 +504,9 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
@@ -553,9 +553,9 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
@@ -602,9 +602,9 @@
}
}
-void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
if (!cp->alloc.get()) {
ALOGE("Convolve5x5 executed without input, skipping");
@@ -705,6 +705,3 @@
return new RsdCpuScriptIntrinsicConvolve5x5(ctx, s, e);
}
-
-
-
diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp
index cdfe7d1..d3dce6d 100644
--- a/cpu_ref/rsCpuIntrinsicHistogram.cpp
+++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp
@@ -36,10 +36,10 @@
RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
protected:
- void preLaunch(uint32_t slot, const Allocation * ain,
+ void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
Allocation * aout, const void * usr,
uint32_t usrLen, const RsScriptCall *sc);
- void postLaunch(uint32_t slot, const Allocation * ain,
+ void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
Allocation * aout, const void * usr,
uint32_t usrLen, const RsScriptCall *sc);
@@ -49,31 +49,31 @@
int *mSums;
ObjectBaseRef<Allocation> mAllocOut;
- static void kernelP1U4(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelP1U3(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelP1U2(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelP1U1(const RsForEachStubParamStruct *p,
- uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ static void kernelP1U4(const RsExpandKernelParams *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t outstep);
+ static void kernelP1U3(const RsExpandKernelParams *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t outstep);
+ static void kernelP1U2(const RsExpandKernelParams *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t outstep);
+ static void kernelP1U1(const RsExpandKernelParams *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t outstep);
- static void kernelP1L4(const RsForEachStubParamStruct *p,
+ static void kernelP1L4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelP1L3(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelP1L3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelP1L2(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelP1L2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelP1L1(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelP1L1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
};
@@ -97,9 +97,12 @@
-void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc) {
+void
+RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot,
+ const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {
const uint32_t threads = mCtx->getThreadCount();
uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
@@ -123,7 +126,7 @@
}
break;
case 1:
- switch(ain->getType()->getElement()->getVectorSize()) {
+ switch(ains[0]->getType()->getElement()->getVectorSize()) {
case 1:
mRootPtr = &kernelP1L1;
break;
@@ -142,9 +145,12 @@
memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
}
-void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc) {
+void
+RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot,
+ const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {
unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
uint32_t threads = mCtx->getThreadCount();
@@ -160,12 +166,12 @@
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * 4 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
@@ -173,47 +179,47 @@
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
sums[(in[3] << 2) + 3] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * 4 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 2) ] ++;
sums[(in[1] << 2) + 1] ++;
sums[(in[2] << 2) + 2] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * 2 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[(in[0] << 1) ] ++;
sums[(in[1] << 1) + 1] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
@@ -222,16 +228,16 @@
(cp->mDotI[2] * in[2]) +
(cp->mDotI[3] * in[3]);
sums[(t + 0x7f) >> 8] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
@@ -239,52 +245,52 @@
(cp->mDotI[1] * in[1]) +
(cp->mDotI[2] * in[2]);
sums[(t + 0x7f) >> 8] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]) +
(cp->mDotI[1] * in[1]);
sums[(t + 0x7f) >> 8] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
int t = (cp->mDotI[0] * in[0]);
sums[(t + 0x7f) >> 8] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
-void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
- uchar *in = (uchar *)p->in;
+ uchar *in = (uchar *)p->ins[0];
int * sums = &cp->mSums[256 * p->lid];
for (uint32_t x = xstart; x < xend; x++) {
sums[in[0]] ++;
- in += instep;
+ in += p->inEStrides[0];
}
}
@@ -323,5 +329,3 @@
return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuIntrinsicLUT.cpp b/cpu_ref/rsCpuIntrinsicLUT.cpp
index 5b2adc5..b08a0e5 100644
--- a/cpu_ref/rsCpuIntrinsicLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsicLUT.cpp
@@ -38,9 +38,9 @@
protected:
ObjectBaseRef<Allocation> lut;
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
};
}
@@ -53,13 +53,13 @@
}
-void RsdCpuScriptIntrinsicLUT::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)p->usr;
uchar *out = (uchar *)p->out;
- const uchar *in = (uchar *)p->in;
+ const uchar *in = (uchar *)p->ins[0];
uint32_t x1 = xstart;
uint32_t x2 = xend;
@@ -103,5 +103,3 @@
return new RsdCpuScriptIntrinsicLUT(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuIntrinsicLoopFilter.cpp b/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
index c31fcdf..05ccfd6 100644
--- a/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
+++ b/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
@@ -174,17 +174,17 @@
ObjectBaseRef<Allocation> mFrameBuffer;
void doLoopFilter();
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
};
}
}
-void RsdCpuScriptIntrinsicLoopFilter::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicLoopFilter::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicLoopFilter *cp = (RsdCpuScriptIntrinsicLoopFilter*)p->usr;
memset((void*)&cp->mPrch.chart, 0, sizeof(cp->mPrch.chart));
cp->mPrch.chart[0] = 0x0fffffff;
@@ -1232,4 +1232,3 @@
rsAssert(rv == 0);
}
}
-
diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp
index 474f82d..fa0e8ee 100644
--- a/cpu_ref/rsCpuIntrinsicResize.cpp
+++ b/cpu_ref/rsCpuIntrinsicResize.cpp
@@ -35,8 +35,8 @@
virtual ~RsdCpuScriptIntrinsicResize();
RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
- virtual void preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
+ virtual void preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout, const void * usr,
uint32_t usrLen, const RsScriptCall *sc);
float scaleX;
@@ -46,15 +46,15 @@
ObjectBaseRef<const Allocation> mAlloc;
ObjectBaseRef<const Element> mElement;
- static void kernelU1(const RsForEachStubParamStruct *p,
+ static void kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelU2(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
- static void kernelU4(const RsForEachStubParamStruct *p,
+ uint32_t outstep);
+ static void kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
};
}
@@ -175,9 +175,9 @@
return (uchar)p;
}
-void RsdCpuScriptIntrinsicResize::kernelU4(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
if (!cp->mAlloc.get()) {
@@ -215,9 +215,9 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU2(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
if (!cp->mAlloc.get()) {
@@ -255,9 +255,9 @@
}
}
-void RsdCpuScriptIntrinsicResize::kernelU1(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
if (!cp->mAlloc.get()) {
@@ -304,9 +304,11 @@
RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
}
-void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
+void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
+ const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc)
{
if (!mAlloc.get()) {
ALOGE("Resize executed without input, skipping");
@@ -347,5 +349,3 @@
return new RsdCpuScriptIntrinsicResize(ctx, s, e);
}
-
-
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index c53ef31..390ca3c 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -46,9 +46,9 @@
protected:
ObjectBaseRef<Allocation> alloc;
- static void kernel(const RsForEachStubParamStruct *p,
+ static void kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
};
}
@@ -101,9 +101,9 @@
extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t xstart, size_t xend);
extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v, size_t xstart, size_t xend);
-void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p,
+void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)p->usr;
if (!cp->alloc.get()) {
ALOGE("YuvToRGB executed without input, skipping");
diff --git a/cpu_ref/rsCpuIntrinsics_x86.c b/cpu_ref/rsCpuIntrinsics_x86.cpp
similarity index 100%
rename from cpu_ref/rsCpuIntrinsics_x86.c
rename to cpu_ref/rsCpuIntrinsics_x86.cpp
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index a11fda1..e0b4004 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -176,7 +176,7 @@
// library fallback path. Those applications don't have a private
// library path, so they need to install to the system directly.
// Note that this is really just a testing path.
- android::String8 scriptSONameSystem("/system/lib/librs.");
+ std::string scriptSONameSystem("/system/lib/librs.");
scriptSONameSystem.append(resName);
scriptSONameSystem.append(".so");
loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
@@ -216,9 +216,11 @@
const static char *BCC_EXE_PATH = "/system/bin/bcc";
-static void setCompileArguments(std::vector<const char*>* args, const android::String8& bcFileName,
- const char* cacheDir, const char* resName, const char* core_lib,
- bool useRSDebugContext, const char* bccPluginName) {
+static void setCompileArguments(std::vector<const char*>* args,
+ const std::string& bcFileName,
+ const char* cacheDir, const char* resName,
+ const char* core_lib, bool useRSDebugContext,
+ const char* bccPluginName) {
rsAssert(cacheDir && resName && core_lib);
args->push_back(BCC_EXE_PATH);
args->push_back("-o");
@@ -242,27 +244,27 @@
}
}
- args->push_back(bcFileName.string());
+ args->push_back(bcFileName.c_str());
args->push_back(NULL);
}
-static bool compileBitcode(const android::String8& bcFileName,
+static bool compileBitcode(const std::string &bcFileName,
const char *bitcode,
size_t bitcodeSize,
- const char** compileArguments,
- const std::string& compileCommandLine) {
+ const char **compileArguments,
+ const std::string &compileCommandLine) {
rsAssert(bitcode && bitcodeSize);
- FILE *bcfile = fopen(bcFileName.string(), "w");
+ FILE *bcfile = fopen(bcFileName.c_str(), "w");
if (!bcfile) {
- ALOGE("Could not write to %s", bcFileName.string());
+ ALOGE("Could not write to %s", bcFileName.c_str());
return false;
}
size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
fclose(bcfile);
if (nwritten != bitcodeSize) {
ALOGE("Could not write %zu bytes to %s", bitcodeSize,
- bcFileName.string());
+ bcFileName.c_str());
return false;
}
@@ -426,7 +428,7 @@
useRSDebugContext = true;
}
- android::String8 bcFileName(cacheDir);
+ std::string bcFileName(cacheDir);
bcFileName.append("/");
bcFileName.append(resName);
bcFileName.append(".bc");
@@ -760,9 +762,9 @@
script->mHal.info.exportedForeachFuncList = &mExportedForEachFuncList[0];
script->mHal.info.exportedPragmaCount = mExecutable->getPragmaKeys().size();
script->mHal.info.exportedPragmaKeyList =
- const_cast<const char**>(mExecutable->getPragmaKeys().array());
+ const_cast<const char**>(&mExecutable->getPragmaKeys().front());
script->mHal.info.exportedPragmaValueList =
- const_cast<const char**>(mExecutable->getPragmaValues().array());
+ const_cast<const char**>(&mExecutable->getPragmaValues().front());
if (mRootExpand) {
script->mHal.info.root = mRootExpand;
@@ -789,119 +791,8 @@
typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen,
- const RsScriptCall *sc,
- MTLaunchStruct *mtls) {
-
- memset(mtls, 0, sizeof(MTLaunchStruct));
-
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
- if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
- return;
- }
-
- if (ain != NULL) {
- const Type *inType = ain->getType();
-
- mtls->fep.dimX = inType->getDimX();
- mtls->fep.dimY = inType->getDimY();
- mtls->fep.dimZ = inType->getDimZ();
-
- } else if (aout != NULL) {
- const Type *outType = aout->getType();
-
- mtls->fep.dimX = outType->getDimX();
- mtls->fep.dimY = outType->getDimY();
- mtls->fep.dimZ = outType->getDimZ();
-
- } else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
- return;
- }
-
- if (ain != NULL && aout != NULL) {
- if (!ain->hasSameDims(aout)) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
- "Failed to launch kernel; dimensions of input and output allocations do not match.");
-
- return;
- }
- }
-
- if (!sc || (sc->xEnd == 0)) {
- mtls->xEnd = mtls->fep.dimX;
- } else {
- rsAssert(sc->xStart < mtls->fep.dimX);
- rsAssert(sc->xEnd <= mtls->fep.dimX);
- rsAssert(sc->xStart < sc->xEnd);
- mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
- mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
- if (mtls->xStart >= mtls->xEnd) return;
- }
-
- if (!sc || (sc->yEnd == 0)) {
- mtls->yEnd = mtls->fep.dimY;
- } else {
- rsAssert(sc->yStart < mtls->fep.dimY);
- rsAssert(sc->yEnd <= mtls->fep.dimY);
- rsAssert(sc->yStart < sc->yEnd);
- mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
- mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
- if (mtls->yStart >= mtls->yEnd) return;
- }
-
- if (!sc || (sc->zEnd == 0)) {
- mtls->zEnd = mtls->fep.dimZ;
- } else {
- rsAssert(sc->zStart < mtls->fep.dimZ);
- rsAssert(sc->zEnd <= mtls->fep.dimZ);
- rsAssert(sc->zStart < sc->zEnd);
- mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
- mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
- if (mtls->zStart >= mtls->zEnd) return;
- }
-
- mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
- mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
- mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
- mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
-
- rsAssert(!ain || (ain->getType()->getDimZ() == 0));
-
- mtls->rsc = mCtx;
- mtls->ain = ain;
- mtls->aout = aout;
- mtls->fep.usr = usr;
- mtls->fep.usrLen = usrLen;
- mtls->mSliceSize = 1;
- mtls->mSliceNum = 0;
-
- mtls->fep.ptrIn = NULL;
- mtls->fep.eStrideIn = 0;
- mtls->isThreadable = mIsThreadable;
-
- if (ain) {
- mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
- mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
- }
-
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
- }
-}
-
-void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, uint32_t inLen,
+void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
+ uint32_t inLen,
Allocation * aout,
const void * usr, uint32_t usrLen,
const RsScriptCall *sc,
@@ -909,24 +800,24 @@
memset(mtls, 0, sizeof(MTLaunchStruct));
- // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
- if (ains != NULL) {
- for (int index = inLen; --index >= 0;) {
- const Allocation* ain = ains[index];
+ for (int index = inLen; --index >= 0;) {
+ const Allocation* ain = ains[index];
- if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
- return;
- }
+ // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
+ if (ain != NULL && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null in allocations");
+ return;
}
}
if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null out allocations");
return;
}
- if (ains != NULL) {
+ if (inLen > 0) {
const Allocation *ain0 = ains[0];
const Type *inType = ain0->getType();
@@ -951,11 +842,12 @@
mtls->fep.dimZ = outType->getDimZ();
} else {
- mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
+ mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
+ "rsForEach called with null allocations");
return;
}
- if (ains != NULL && aout != NULL) {
+ if (inLen > 0 && aout != NULL) {
if (!ains[0]->hasSameDims(aout)) {
mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
"Failed to launch kernel; dimensions of input and output allocations do not match.");
@@ -1002,7 +894,7 @@
mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
- rsAssert(!ains || (ains[0]->getType()->getDimZ() == 0));
+ rsAssert(inLen == 0 || (ains[0]->getType()->getDimZ() == 0));
mtls->rsc = mCtx;
mtls->ains = ains;
@@ -1012,18 +904,28 @@
mtls->mSliceSize = 1;
mtls->mSliceNum = 0;
- mtls->fep.ptrIns = NULL;
- mtls->fep.eStrideIn = 0;
+ mtls->fep.inPtrs = NULL;
+ mtls->fep.inStrides = NULL;
mtls->isThreadable = mIsThreadable;
- if (ains) {
- mtls->fep.ptrIns = new const uint8_t*[inLen];
- mtls->fep.inStrides = new StridePair[inLen];
+ if (inLen > 0) {
+
+ if (inLen <= RS_KERNEL_INPUT_THRESHOLD) {
+ mtls->fep.inPtrs = (const uint8_t**)mtls->inPtrsBuff;
+ mtls->fep.inStrides = mtls->inStridesBuff;
+ } else {
+ mtls->fep.heapAllocatedArrays = true;
+
+ mtls->fep.inPtrs = new const uint8_t*[inLen];
+ mtls->fep.inStrides = new StridePair[inLen];
+ }
+
+ mtls->fep.inLen = inLen;
for (int index = inLen; --index >= 0;) {
const Allocation *ain = ains[index];
- mtls->fep.ptrIns[index] =
+ mtls->fep.inPtrs[index] =
(const uint8_t*)ain->mHal.drvState.lod[0].mallocPtr;
mtls->fep.inStrides[index].eStride =
@@ -1033,41 +935,27 @@
}
}
- mtls->fep.ptrOut = NULL;
- mtls->fep.eStrideOut = 0;
- if (aout) {
- mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
- mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
+ mtls->fep.outPtr = NULL;
+ mtls->fep.outStride.eStride = 0;
+ mtls->fep.outStride.yStride = 0;
+ if (aout != NULL) {
+ mtls->fep.outPtr = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
+
+ mtls->fep.outStride.eStride = aout->getType()->getElementSizeBytes();
+ mtls->fep.outStride.yStride = aout->mHal.drvState.lod[0].stride;
}
}
void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
- const Allocation * ain,
+ const Allocation ** ains,
+ uint32_t inLen,
Allocation * aout,
const void * usr,
uint32_t usrLen,
const RsScriptCall *sc) {
MTLaunchStruct mtls;
- forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
- forEachKernelSetup(slot, &mtls);
-
- RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
- mCtx->launchThreads(ain, aout, sc, &mtls);
- mCtx->setTLS(oldTLS);
-}
-
-void RsdCpuScriptImpl::invokeForEachMulti(uint32_t slot,
- const Allocation ** ains,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) {
-
- MTLaunchStruct mtls;
forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls);
forEachKernelSetup(slot, &mtls);
@@ -1255,9 +1143,9 @@
RsdCpuScriptImpl::~RsdCpuScriptImpl() {
#ifndef RS_COMPATIBILITY_LIB
if (mExecutable) {
- Vector<void *>::const_iterator var_addr_iter =
+ std::vector<void *>::const_iterator var_addr_iter =
mExecutable->getExportVarAddrs().begin();
- Vector<void *>::const_iterator var_addr_end =
+ std::vector<void *>::const_iterator var_addr_end =
mExecutable->getExportVarAddrs().end();
bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
@@ -1338,17 +1226,15 @@
return NULL;
}
-void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
-void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc)
-{
-}
+void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc) {}
}
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index f4ca1ed..c5fc183 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -47,9 +47,9 @@
class RsdCpuScriptImpl : public RsdCpuReferenceImpl::CpuScript {
public:
typedef void (*outer_foreach_t)(
- const RsForEachStubParamStruct *,
+ const RsExpandKernelParams *,
uint32_t x1, uint32_t x2,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
#ifdef RS_COMPATIBILITY_LIB
typedef void (* InvokeFunc_t)(void);
typedef void (* ForEachFunc_t)(void);
@@ -64,26 +64,22 @@
virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength);
virtual int invokeRoot();
- virtual void preLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
+ virtual void preLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout, const void * usr,
uint32_t usrLen, const RsScriptCall *sc);
- virtual void postLaunch(uint32_t slot, const Allocation * ain,
- Allocation * aout, const void * usr,
- uint32_t usrLen, const RsScriptCall *sc);
- virtual void invokeForEach(uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc);
+ virtual void postLaunch(uint32_t slot, const Allocation ** ains,
+ uint32_t inLen, Allocation * aout,
+ const void * usr, uint32_t usrLen,
+ const RsScriptCall *sc);
- virtual void invokeForEachMulti(uint32_t slot,
- const Allocation** ains,
- uint32_t inLen,
- Allocation* aout,
- const void* usr,
- uint32_t usrLen,
- const RsScriptCall* sc);
+ virtual void invokeForEach(uint32_t slot,
+ const Allocation ** ains,
+ uint32_t inLen,
+ Allocation* aout,
+ const void* usr,
+ uint32_t usrLen,
+ const RsScriptCall* sc);
+
virtual void invokeInit();
virtual void invokeFreeChildren();
@@ -100,10 +96,6 @@
const Script * getScript() {return mScript;}
- void forEachMtlsSetup(const Allocation * ain, Allocation * aout,
- const void * usr, uint32_t usrLen,
- const RsScriptCall *sc, MTLaunchStruct *mtls);
-
void forEachMtlsSetup(const Allocation ** ains, uint32_t inLen,
Allocation * aout, const void * usr, uint32_t usrLen,
const RsScriptCall *sc, MTLaunchStruct *mtls);
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index a9de00c..1d26f59 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -44,76 +44,93 @@
}
-typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
+typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
-void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
+void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep) {
+ uint32_t outstep) {
- const ScriptList *sl = (const ScriptList *)p->usr;
- RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
- const void *oldUsr = p->usr;
+ const ScriptList *sl = (const ScriptList *)kparams->usr;
+ RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
- for(size_t ct=0; ct < sl->count; ct++) {
+ const void **oldIns = mkparams->ins;
+ uint32_t *oldStrides = mkparams->inEStrides;
+
+ void *localIns[1];
+ uint32_t localStride[1];
+
+ mkparams->ins = (const void**)localIns;
+ mkparams->inEStrides = localStride;
+
+ for (size_t ct = 0; ct < sl->count; ct++) {
ScriptGroupRootFunc_t func;
- func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
- mp->usr = sl->usrPtrs[ct];
-
- mp->ptrIn = NULL;
- mp->in = NULL;
- mp->ptrOut = NULL;
- mp->out = NULL;
-
- uint32_t istep = 0;
- uint32_t ostep = 0;
+ func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
+ mkparams->usr = sl->usrPtrs[ct];
if (sl->ins[ct]) {
- mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
- istep = sl->ins[ct]->mHal.state.elementSizeBytes;
- mp->in = mp->ptrIn;
+ localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+
+ localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
+
if (sl->inExts[ct]) {
- mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
- } else {
- if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
- mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
- }
+ localIns[0] = (void*)
+ ((const uint8_t *)localIns[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y);
+
+ } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
+ localIns[0] = (void*)
+ ((const uint8_t *)localIns[0] +
+ sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid);
}
+
+ } else {
+ localIns[0] = NULL;
+ localStride[0] = 0;
}
+ uint32_t ostep;
if (sl->outs[ct]) {
- mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
- mp->out = mp->ptrOut;
+ mkparams->out =
+ (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
+
ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
+
if (sl->outExts[ct]) {
- mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
- } else {
- if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
- mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
- }
+ mkparams->out =
+ (uint8_t *)mkparams->out +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->y;
+
+ } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
+ mkparams->out =
+ (uint8_t *)mkparams->out +
+ sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
}
+ } else {
+ mkparams->out = NULL;
+ ostep = 0;
}
//ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
- func(p, xstart, xend, istep, ostep);
+ func(kparams, xstart, xend, ostep);
}
//ALOGE("script group root");
- //ConvolveParams *cp = (ConvolveParams *)p->usr;
-
- mp->usr = oldUsr;
+ mkparams->ins = oldIns;
+ mkparams->inEStrides = oldStrides;
+ mkparams->usr = sl;
}
void CpuScriptGroupImpl::execute() {
- Vector<Allocation *> ins;
- Vector<bool> inExts;
- Vector<Allocation *> outs;
- Vector<bool> outExts;
- Vector<const ScriptKernelID *> kernels;
+ std::vector<Allocation *> ins;
+ std::vector<char> inExts;
+ std::vector<Allocation *> outs;
+ std::vector<char> outExts;
+ std::vector<const ScriptKernelID *> kernels;
bool fieldDep = false;
for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
@@ -179,69 +196,108 @@
rsAssert((k->mHasKernelOutput == (aout != NULL)) &&
(k->mHasKernelInput == (ain != NULL)));
- ins.add(ain);
- inExts.add(inExt);
- outs.add(aout);
- outExts.add(outExt);
- kernels.add(k);
+ ins.push_back(ain);
+ inExts.push_back(inExt);
+ outs.push_back(aout);
+ outExts.push_back(outExt);
+ kernels.push_back(k);
}
}
MTLaunchStruct mtls;
- if(fieldDep) {
+ if (fieldDep) {
for (size_t ct=0; ct < ins.size(); ct++) {
Script *s = kernels[ct]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
uint32_t slot = kernels[ct]->mSlot;
- si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
+ uint32_t inLen;
+ const Allocation **ains;
+
+ if (ins[ct] == NULL) {
+ inLen = 0;
+ ains = NULL;
+
+ } else {
+ inLen = 1;
+ ains = const_cast<const Allocation**>(&ins[ct]);
+ }
+
+ si->forEachMtlsSetup(ains, inLen, outs[ct], NULL, 0, NULL, &mtls);
+
si->forEachKernelSetup(slot, &mtls);
- si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
- mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
- si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
+ si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
+ mtls.fep.usrLen, NULL);
+
+ mCtx->launchThreads(ains, inLen, outs[ct], NULL, &mtls);
+
+ si->postLaunch(slot, ains, inLen, outs[ct], NULL, 0, NULL);
}
} else {
ScriptList sl;
- sl.ins = ins.array();
- sl.outs = outs.array();
- sl.kernels = kernels.array();
- sl.count = kernels.size();
- Vector<const void *> usrPtrs;
- Vector<const void *> fnPtrs;
- Vector<uint32_t> sigs;
+ /*
+ * TODO: This is a hacky way of doing this and should be replaced by a
+ * call to std::vector's data() member once we have a C++11
+ * version of the STL.
+ */
+ sl.ins = &ins.front();
+ sl.outs = &outs.front();
+ sl.kernels = &kernels.front();
+ sl.count = kernels.size();
+
+ uint32_t inLen;
+ const Allocation **ains;
+
+ if (ins[0] == NULL) {
+ inLen = 0;
+ ains = NULL;
+
+ } else {
+ inLen = 1;
+ ains = const_cast<const Allocation**>(&ins[0]);
+ }
+
+ std::vector<const void *> usrPtrs;
+ std::vector<const void *> fnPtrs;
+ std::vector<uint32_t> sigs;
for (size_t ct=0; ct < kernels.size(); ct++) {
Script *s = kernels[ct]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
- fnPtrs.add((void *)mtls.kernel);
- usrPtrs.add(mtls.fep.usr);
- sigs.add(mtls.fep.usrLen);
- si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
+ fnPtrs.push_back((void *)mtls.kernel);
+ usrPtrs.push_back(mtls.fep.usr);
+ sigs.push_back(mtls.fep.usrLen);
+ si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
+ mtls.fep.usr, mtls.fep.usrLen, NULL);
}
- sl.sigs = sigs.array();
- sl.usrPtrs = usrPtrs.array();
- sl.fnPtrs = fnPtrs.array();
- sl.inExts = inExts.array();
- sl.outExts = outExts.array();
+
+ sl.sigs = &sigs.front();
+ sl.usrPtrs = &usrPtrs.front();
+ sl.fnPtrs = &fnPtrs.front();
+
+ sl.inExts = (bool*)&inExts.front();
+ sl.outExts = (bool*)&outExts.front();
Script *s = kernels[0]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
- si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
+
+ si->forEachMtlsSetup(ains, inLen, outs[0], NULL, 0, NULL, &mtls);
+
mtls.script = NULL;
mtls.kernel = (void (*)())&scriptGroupRoot;
mtls.fep.usr = &sl;
- mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
+
+ mCtx->launchThreads(ains, inLen, outs[0], NULL, &mtls);
for (size_t ct=0; ct < kernels.size(); ct++) {
Script *s = kernels[ct]->mScript;
RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
- si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
+ si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], NULL, 0,
+ NULL);
}
}
}
-
-
diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h
index 78e179d..1a4af05 100644
--- a/cpu_ref/rsCpuScriptGroup.h
+++ b/cpu_ref/rsCpuScriptGroup.h
@@ -33,9 +33,9 @@
CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg);
bool init();
- static void scriptGroupRoot(const RsForEachStubParamStruct *p,
+ static void scriptGroupRoot(const RsExpandKernelParams *p,
uint32_t xstart, uint32_t xend,
- uint32_t instep, uint32_t outstep);
+ uint32_t outstep);
protected:
struct ScriptList {
diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h
index 0076cb9..4728b7c 100644
--- a/cpu_ref/rsd_cpu.h
+++ b/cpu_ref/rsd_cpu.h
@@ -69,21 +69,15 @@
virtual void populateScript(Script *) = 0;
virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength) = 0;
virtual int invokeRoot() = 0;
+
virtual void invokeForEach(uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) = 0;
-
- virtual void invokeForEachMulti(uint32_t slot,
- const Allocation** ains,
- uint32_t inLen,
- Allocation * aout,
- const void * usr,
- uint32_t usrLen,
- const RsScriptCall *sc) = 0;
-
+ const Allocation ** ains,
+ uint32_t inLen,
+ Allocation * aout,
+ const void * usr,
+ uint32_t usrLen,
+ const RsScriptCall *sc) = 0;
+
virtual void invokeInit() = 0;
virtual void invokeFreeChildren() = 0;
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index 9a40756..0586785 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -1213,5 +1213,3 @@
}
#endif
}
-
-
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index 27029cf..419422a 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include <vector>
+
#include "../cpu_ref/rsd_cpu.h"
#include "rsdCore.h"
@@ -26,7 +28,6 @@
#include "rsScriptC.h"
#if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
-#include "utils/Vector.h"
#include "utils/Timers.h"
#include "utils/StopWatch.h"
#endif
@@ -43,8 +44,9 @@
size_t bitcodeSize,
uint32_t flags) {
RsdHal *dc = (RsdHal *)rsc->mHal.drv;
- RsdCpuReference::CpuScript * cs = dc->mCpuRef->createScript(script, resName, cacheDir,
- bitcode, bitcodeSize, flags);
+ RsdCpuReference::CpuScript * cs =
+ dc->mCpuRef->createScript(script, resName, cacheDir, bitcode,
+ bitcodeSize, flags);
if (cs == NULL) {
return false;
}
@@ -53,7 +55,8 @@
return true;
}
-bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) {
+bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid,
+ Element *e) {
RsdHal *dc = (RsdHal *)rsc->mHal.drv;
RsdCpuReference::CpuScript * cs = dc->mCpuRef->createIntrinsic(s, iid, e);
if (cs == NULL) {
@@ -73,8 +76,15 @@
size_t usrLen,
const RsScriptCall *sc) {
- RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
- cs->invokeForEach(slot, ain, aout, usr, usrLen, sc);
+ if (ain == NULL) {
+ rsdScriptInvokeForEachMulti(rsc, s, slot, NULL, 0, aout, usr, usrLen,
+ sc);
+ } else {
+ const Allocation *ains[1] = {ain};
+
+ rsdScriptInvokeForEachMulti(rsc, s, slot, ains, 1, aout, usr, usrLen,
+ sc);
+ }
}
void rsdScriptInvokeForEachMulti(const Context *rsc,
@@ -88,7 +98,7 @@
const RsScriptCall *sc) {
RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
- cs->invokeForEachMulti(slot, ains, inLen, aout, usr, usrLen, sc);
+ cs->invokeForEach(slot, ains, inLen, aout, usr, usrLen, sc);
}
diff --git a/driver/rsdMeshObj.cpp b/driver/rsdMeshObj.cpp
index 66c3b18..8f072a5 100644
--- a/driver/rsdMeshObj.cpp
+++ b/driver/rsdMeshObj.cpp
@@ -112,9 +112,9 @@
mAttribs[userNum].type = rsdTypeToGLType(f->mHal.state.dataType);
mAttribs[userNum].normalized = f->mHal.state.dataType != RS_TYPE_FLOAT_32;
mAttribs[userNum].stride = stride;
- String8 tmp(RS_SHADER_ATTR);
+ std::string tmp(RS_SHADER_ATTR);
tmp.append(elem->mHal.state.fieldNames[fieldI]);
- mAttribs[userNum].name.setTo(tmp.string());
+ mAttribs[userNum].name = tmp.c_str();
// Remember which allocation this attribute came from
mAttribAllocationIndex[userNum] = ct;
diff --git a/driver/rsdShader.cpp b/driver/rsdShader.cpp
index 0b182ff..d1a486b 100644
--- a/driver/rsdShader.cpp
+++ b/driver/rsdShader.cpp
@@ -33,7 +33,7 @@
const char * shaderText, size_t shaderLength,
const char** textureNames, size_t textureNamesCount,
const size_t *textureNamesLength) {
- mUserShader.setTo(shaderText, shaderLength);
+ mUserShader.replace(0, shaderLength, shaderText);
mRSProgram = p;
mType = type;
initMemberVars();
@@ -41,13 +41,14 @@
init(textureNames, textureNamesCount, textureNamesLength);
for(size_t i=0; i < textureNamesCount; i++) {
- mTextureNames.push(String8(textureNames[i], textureNamesLength[i]));
+ mTextureNames.push_back(std::string(textureNames[i],
+ textureNamesLength[i]));
}
}
RsdShader::~RsdShader() {
for (uint32_t i = 0; i < mStateBasedShaders.size(); i ++) {
- StateBasedKey *state = mStateBasedShaders.itemAt(i);
+ StateBasedKey *state = mStateBasedShaders[i];
if (state->mShaderID) {
glDeleteShader(state->mShaderID);
}
@@ -76,7 +77,7 @@
RsdShader::StateBasedKey *returnKey = NULL;
for (uint32_t i = 0; i < mStateBasedShaders.size(); i ++) {
- returnKey = mStateBasedShaders.itemAt(i);
+ returnKey = mStateBasedShaders[i];
for (uint32_t ct = 0; ct < mRSProgram->mHal.state.texturesCount; ct ++) {
uint32_t texType = 0;
@@ -108,7 +109,7 @@
// We have not created a shader for this particular state yet
state = new StateBasedKey(mTextureCount);
mCurrentState = state;
- mStateBasedShaders.add(state);
+ mStateBasedShaders.push_back(state);
createShader();
loadShader(rsc);
return mCurrentState->mShaderID;
@@ -129,15 +130,15 @@
mTextureUniformIndexStart = uniformCount;
for (uint32_t ct=0; ct < mRSProgram->mHal.state.texturesCount; ct++) {
- mUniformNames[uniformCount].setTo("UNI_");
+ mUniformNames[uniformCount] = "UNI_";
mUniformNames[uniformCount].append(textureNames[ct], textureNamesLength[ct]);
mUniformArraySizes[uniformCount] = 1;
uniformCount++;
}
}
-String8 RsdShader::getGLSLInputString() const {
- String8 s;
+std::string RsdShader::getGLSLInputString() const {
+ std::string s;
for (uint32_t ct=0; ct < mRSProgram->mHal.state.inputElementsCount; ct++) {
const Element *e = mRSProgram->mHal.state.inputElements[ct];
for (uint32_t field=0; field < e->mHal.state.fieldsCount; field++) {
@@ -237,11 +238,11 @@
if (rsc->props.mLogShaders) {
ALOGV("Loading shader type %x, ID %i", mType, mCurrentState->mShaderID);
- ALOGV("%s", mShader.string());
+ ALOGV("%s", mShader.c_str());
}
if (mCurrentState->mShaderID) {
- const char * ss = mShader.string();
+ const char * ss = mShader.c_str();
RSD_CALL_GL(glShaderSource, mCurrentState->mShaderID, 1, &ss, NULL);
RSD_CALL_GL(glCompileShader, mCurrentState->mShaderID);
@@ -299,7 +300,9 @@
mShader.append(fn);
if (e->mHal.state.fieldArraySizes[field] > 1) {
- mShader.appendFormat("[%d]", e->mHal.state.fieldArraySizes[field]);
+ mShader += "[";
+ mShader += std::to_string(e->mHal.state.fieldArraySizes[field]);
+ mShader += "]";
}
mShader.append(";\n");
}
@@ -585,27 +588,28 @@
mUniformCount += mRSProgram->mHal.state.texturesCount;
if (mAttribCount) {
- mAttribNames = new String8[mAttribCount];
+ mAttribNames = new std::string[mAttribCount];
}
if (mUniformCount) {
- mUniformNames = new String8[mUniformCount];
+ mUniformNames = new std::string[mUniformCount];
mUniformArraySizes = new uint32_t[mUniformCount];
}
mTextureCount = mRSProgram->mHal.state.texturesCount;
}
-void RsdShader::initAddUserElement(const Element *e, String8 *names, uint32_t *arrayLengths,
- uint32_t *count, const char *prefix) {
+void RsdShader::initAddUserElement(const Element *e, std::string *names,
+ uint32_t *arrayLengths, uint32_t *count,
+ const char *prefix) {
rsAssert(e->mHal.state.fieldsCount);
for (uint32_t ct=0; ct < e->mHal.state.fieldsCount; ct++) {
const Element *ce = e->mHal.state.fields[ct];
if (ce->mHal.state.fieldsCount) {
initAddUserElement(ce, names, arrayLengths, count, prefix);
} else {
- String8 tmp(prefix);
+ std::string tmp(prefix);
tmp.append(e->mHal.state.fieldNames[ct]);
- names[*count].setTo(tmp.string());
+ names[*count] = tmp;
if (arrayLengths) {
arrayLengths[*count] = e->mHal.state.fieldArraySizes[ct];
}
diff --git a/driver/rsdShader.h b/driver/rsdShader.h
index fba1790..0dc5102 100644
--- a/driver/rsdShader.h
+++ b/driver/rsdShader.h
@@ -17,7 +17,7 @@
#ifndef ANDROID_RSD_SHADER_H
#define ANDROID_RSD_SHADER_H
-#include <utils/String8.h>
+#include <string>
// ---------------------------------------------------------------------------
namespace android {
@@ -49,16 +49,16 @@
// Add ability to get all ID's to clean up the cached program objects
uint32_t getStateBasedIDCount() const { return mStateBasedShaders.size(); }
uint32_t getStateBasedID(uint32_t index) const {
- return mStateBasedShaders.itemAt(index)->mShaderID;
+ return mStateBasedShaders[index]->mShaderID;
}
uint32_t getAttribCount() const {return mAttribCount;}
uint32_t getUniformCount() const {return mUniformCount;}
- const android::String8 & getAttribName(uint32_t i) const {return mAttribNames[i];}
- const android::String8 & getUniformName(uint32_t i) const {return mUniformNames[i];}
+ const std::string & getAttribName(uint32_t i) const {return mAttribNames[i];}
+ const std::string & getUniformName(uint32_t i) const {return mUniformNames[i];}
uint32_t getUniformArraySize(uint32_t i) const {return mUniformArraySizes[i];}
- android::String8 getGLSLInputString() const;
+ std::string getGLSLInputString() const;
bool isValid() const {return mIsValid;}
void forceDirty() const {mDirty = true;}
@@ -91,7 +91,7 @@
void setupUserConstants(const android::renderscript::Context *rsc,
RsdShaderCache *sc, bool isFragment);
void initAddUserElement(const android::renderscript::Element *e,
- android::String8 *names, uint32_t *arrayLengths,
+ std::string *names, uint32_t *arrayLengths,
uint32_t *count, const char *prefix);
void setupTextures(const android::renderscript::Context *rsc, RsdShaderCache *sc);
void setupSampler(const android::renderscript::Context *rsc,
@@ -104,21 +104,21 @@
void initAttribAndUniformArray();
mutable bool mDirty;
- android::String8 mShader;
- android::String8 mUserShader;
+ std::string mShader;
+ std::string mUserShader;
uint32_t mType;
uint32_t mTextureCount;
StateBasedKey *mCurrentState;
uint32_t mAttribCount;
uint32_t mUniformCount;
- android::String8 *mAttribNames;
- android::String8 *mUniformNames;
+ std::string *mAttribNames;
+ std::string *mUniformNames;
uint32_t *mUniformArraySizes;
- android::Vector<android::String8> mTextureNames;
+ std::vector<std::string> mTextureNames;
- android::Vector<StateBasedKey*> mStateBasedShaders;
+ std::vector<StateBasedKey*> mStateBasedShaders;
int32_t mTextureUniformIndexStart;
@@ -133,7 +133,3 @@
};
#endif //ANDROID_RSD_SHADER_H
-
-
-
-
diff --git a/driver/rsdShaderCache.cpp b/driver/rsdShaderCache.cpp
index 69b43fc..0e36b49 100644
--- a/driver/rsdShaderCache.cpp
+++ b/driver/rsdShaderCache.cpp
@@ -29,7 +29,7 @@
RsdShaderCache::RsdShaderCache() {
- mEntries.setCapacity(16);
+ mEntries.reserve(16);
mVertexDirty = true;
mFragmentDirty = true;
}
@@ -38,9 +38,13 @@
cleanupAll();
}
-void RsdShaderCache::updateUniformArrayData(const Context *rsc, RsdShader *prog, uint32_t linkedID,
- UniformData *data, const char* logTag,
- UniformQueryData **uniformList, uint32_t uniListSize) {
+void RsdShaderCache::updateUniformArrayData(const Context *rsc,
+ RsdShader *prog,
+ uint32_t linkedID,
+ UniformData *data,
+ const char* logTag,
+ UniformQueryData **uniformList,
+ uint32_t uniListSize) {
for (uint32_t ct=0; ct < prog->getUniformCount(); ct++) {
if (data[ct].slot >= 0 && data[ct].arraySize > 1) {
@@ -55,14 +59,17 @@
if (rsc->props.mLogShaders) {
ALOGV("%s U, %s = %d, arraySize = %d\n", logTag,
- prog->getUniformName(ct).string(), data[ct].slot, data[ct].arraySize);
+ prog->getUniformName(ct).c_str(), data[ct].slot,
+ data[ct].arraySize);
}
}
}
-void RsdShaderCache::populateUniformData(RsdShader *prog, uint32_t linkedID, UniformData *data) {
+void RsdShaderCache::populateUniformData(RsdShader *prog, uint32_t linkedID,
+ UniformData *data) {
for (uint32_t ct=0; ct < prog->getUniformCount(); ct++) {
- data[ct].slot = glGetUniformLocation(linkedID, prog->getUniformName(ct));
+ data[ct].slot = glGetUniformLocation(linkedID,
+ prog->getUniformName(ct).c_str());
data[ct].arraySize = prog->getUniformArraySize(ct);
}
}
@@ -132,7 +139,7 @@
ProgramEntry *e = new ProgramEntry(vtx->getAttribCount(),
vtx->getUniformCount(),
frag->getUniformCount());
- mEntries.push(e);
+ mEntries.push_back(e);
mCurrent = e;
e->vtx = vID;
e->frag = fID;
@@ -169,10 +176,12 @@
}
for (uint32_t ct=0; ct < e->vtxAttrCount; ct++) {
- e->vtxAttrs[ct].slot = glGetAttribLocation(pgm, vtx->getAttribName(ct));
- e->vtxAttrs[ct].name = vtx->getAttribName(ct).string();
+ e->vtxAttrs[ct].slot =
+ glGetAttribLocation(pgm, vtx->getAttribName(ct).c_str());
+ e->vtxAttrs[ct].name = vtx->getAttribName(ct).c_str();
if (rsc->props.mLogShaders) {
- ALOGV("vtx A %i, %s = %d\n", ct, vtx->getAttribName(ct).string(), e->vtxAttrs[ct].slot);
+ ALOGV("vtx A %i, %s = %d\n", ct,
+ vtx->getAttribName(ct).c_str(), e->vtxAttrs[ct].slot);
}
}
@@ -228,7 +237,7 @@
return true;
}
-int32_t RsdShaderCache::vtxAttribSlot(const String8 &attrName) const {
+int32_t RsdShaderCache::vtxAttribSlot(const std::string &attrName) const {
for (uint32_t ct=0; ct < mCurrent->vtxAttrCount; ct++) {
if (attrName == mCurrent->vtxAttrs[ct].name) {
return mCurrent->vtxAttrs[ct].slot;
@@ -238,46 +247,45 @@
}
void RsdShaderCache::cleanupVertex(RsdShader *s) {
- int32_t numEntries = (int32_t)mEntries.size();
uint32_t numShaderIDs = s->getStateBasedIDCount();
for (uint32_t sId = 0; sId < numShaderIDs; sId ++) {
uint32_t id = s->getStateBasedID(sId);
- for (int32_t ct = 0; ct < numEntries; ct ++) {
- if (mEntries[ct]->vtx == id) {
- glDeleteProgram(mEntries[ct]->program);
- delete mEntries[ct];
- mEntries.removeAt(ct);
- numEntries = (int32_t)mEntries.size();
- ct --;
+ for (auto entry = mEntries.begin(); entry != mEntries.end();) {
+ if ((*entry)->vtx == id) {
+ glDeleteProgram((*entry)->program);
+
+ delete *entry;
+ entry = mEntries.erase(entry);
+ } else {
+ entry++;
}
}
}
}
void RsdShaderCache::cleanupFragment(RsdShader *s) {
- int32_t numEntries = (int32_t)mEntries.size();
uint32_t numShaderIDs = s->getStateBasedIDCount();
for (uint32_t sId = 0; sId < numShaderIDs; sId ++) {
uint32_t id = s->getStateBasedID(sId);
- for (int32_t ct = 0; ct < numEntries; ct ++) {
- if (mEntries[ct]->frag == id) {
- glDeleteProgram(mEntries[ct]->program);
- delete mEntries[ct];
- mEntries.removeAt(ct);
- numEntries = (int32_t)mEntries.size();
- ct --;
+ for (auto entry = mEntries.begin(); entry != mEntries.end();) {
+ if ((*entry)->frag == id) {
+ glDeleteProgram((*entry)->program);
+
+ delete *entry;
+ entry = mEntries.erase(entry);
+ } else {
+ entry++;
}
}
}
}
void RsdShaderCache::cleanupAll() {
- for (uint32_t ct=0; ct < mEntries.size(); ct++) {
- glDeleteProgram(mEntries[ct]->program);
- free(mEntries[ct]);
+ for (auto entry : mEntries) {
+ glDeleteProgram(entry->program);
+ delete entry;
}
mEntries.clear();
}
-
diff --git a/driver/rsdShaderCache.h b/driver/rsdShaderCache.h
index 6de1d63..9b45092 100644
--- a/driver/rsdShaderCache.h
+++ b/driver/rsdShaderCache.h
@@ -17,6 +17,9 @@
#ifndef ANDROID_RSD_SHADER_CACHE_H
#define ANDROID_RSD_SHADER_CACHE_H
+#include <string>
+#include <vector>
+
namespace android {
namespace renderscript {
@@ -25,10 +28,7 @@
}
}
-#if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
-#include <utils/String8.h>
-#include <utils/Vector.h>
-#else
+#if defined(RS_SERVER) || defined(RS_COMPATIBILITY_LIB)
#include "rsUtils.h"
#endif
class RsdShader;
@@ -58,7 +58,7 @@
void cleanupAll();
- int32_t vtxAttribSlot(const android::String8 &attrName) const;
+ int32_t vtxAttribSlot(const std::string &attrName) const;
int32_t vtxUniformSlot(uint32_t a) const {return mCurrent->vtxUniforms[a].slot;}
uint32_t vtxUniformSize(uint32_t a) const {return mCurrent->vtxUniforms[a].arraySize;}
int32_t fragUniformSlot(uint32_t a) const {return mCurrent->fragUniforms[a].slot;}
@@ -143,7 +143,7 @@
UniformData *fragUniforms;
bool *fragUniformIsSTO;
};
- android::Vector<ProgramEntry*> mEntries;
+ std::vector<ProgramEntry*> mEntries;
ProgramEntry *mCurrent;
bool hasArrayUniforms(RsdShader *vtx, RsdShader *frag);
@@ -156,7 +156,3 @@
#endif //ANDROID_RSD_SHADER_CACHE_H
-
-
-
-
diff --git a/driver/rsdVertexArray.cpp b/driver/rsdVertexArray.cpp
index 4e293f6..d0a9b3e 100644
--- a/driver/rsdVertexArray.cpp
+++ b/driver/rsdVertexArray.cpp
@@ -48,7 +48,7 @@
stride = 0;
ptr = NULL;
normalized = false;
- name.setTo("");
+ name = "";
}
void RsdVertexArray::Attrib::set(uint32_t type, uint32_t size, uint32_t stride,
@@ -60,7 +60,7 @@
this->offset = offset;
this->normalized = normalized;
this->stride = stride;
- this->name.setTo(name);
+ this->name = name;
}
void RsdVertexArray::logAttrib(uint32_t idx, uint32_t slot) const {
@@ -69,7 +69,7 @@
}
ALOGV("va %i: slot=%i name=%s buf=%i ptr=%p size=%i type=0x%x stride=0x%x norm=%i offset=0x%p",
idx, slot,
- mAttribs[idx].name.string(),
+ mAttribs[idx].name.c_str(),
mAttribs[idx].buffer,
mAttribs[idx].ptr,
mAttribs[idx].size,
@@ -135,4 +135,3 @@
mAttrsEnabled[ct] = false;
}
}
-
diff --git a/driver/rsdVertexArray.h b/driver/rsdVertexArray.h
index 975121b..1bafe3b 100644
--- a/driver/rsdVertexArray.h
+++ b/driver/rsdVertexArray.h
@@ -17,6 +17,8 @@
#ifndef ANDROID_RSD_VERTEX_ARRAY_H
#define ANDROID_RSD_VERTEX_ARRAY_H
+#include <string>
+
#include "rsUtils.h"
namespace android {
@@ -39,7 +41,7 @@
uint32_t size;
uint32_t stride;
bool normalized;
- android::String8 name;
+ std::string name;
Attrib();
void clear();
@@ -74,6 +76,3 @@
#endif //ANDROID_RSD_VERTEX_ARRAY_H
-
-
-
diff --git a/driver/runtime/rs_cl.c b/driver/runtime/rs_cl.c
index fe45420..a79ad2a 100644
--- a/driver/runtime/rs_cl.c
+++ b/driver/runtime/rs_cl.c
@@ -548,7 +548,7 @@
extern float __attribute__((overloadable)) rootn(float v, int r) {
if (r == 0) {
- return posinf(0);
+ return posinf();
}
if (iszero(v)) {
diff --git a/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java b/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java
index 3acfe98..3047a56 100644
--- a/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java
+++ b/java/tests/RsTest/src/com/android/rs/test/RSTestCore.java
@@ -93,7 +93,7 @@
/*unitTests.add(new UT_program_store(this, mRes, mCtx));
unitTests.add(new UT_program_raster(this, mRes, mCtx));
unitTests.add(new UT_mesh(this, mRes, mCtx));*/
- //unitTests.add(new UT_foreach_multi(this, mRes, mCtx));
+ unitTests.add(new UT_foreach_multi(this, mRes, mCtx));
unitTests.add(new UT_fp_mad(this, mRes, mCtx));
/*
diff --git a/java/tests/RsTest/src/com/android/rs/test/UT_foreach_multi.java b/java/tests/RsTest/src/com/android/rs/test/UT_foreach_multi.java
new file mode 100644
index 0000000..1a05f80
--- /dev/null
+++ b/java/tests/RsTest/src/com/android/rs/test/UT_foreach_multi.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.test;
+
+import android.content.Context;
+import android.content.res.Resources;
+import android.renderscript.*;
+import android.util.Log;
+
+public class UT_foreach_multi extends UnitTest {
+ private Resources mRes;
+ private Allocation Ain0;
+ private Allocation Ain1;
+ private Allocation Ain2;
+ private Allocation Ain3;
+
+ private Allocation Out0;
+ private Allocation Out1;
+ private Allocation Out2;
+ private Allocation Out3;
+
+ protected UT_foreach_multi(RSTestCore rstc, Resources res, Context ctx) {
+ super(rstc, "Foreach Multi-input", ctx);
+ mRes = res;
+ }
+
+ private void initializeGlobals(RenderScript RS, ScriptC_foreach_multi s) {
+ Type.Builder type32Builder = new Type.Builder(RS, Element.U32(RS));
+ Type.Builder type16Builder = new Type.Builder(RS, Element.U16(RS));
+
+ int Xdim = 5;
+ s.set_dimX(Xdim);
+ type32Builder.setX(Xdim);
+ type16Builder.setX(Xdim);
+
+ // 32-bit input allocations
+
+ Ain0 = Allocation.createTyped(RS, type32Builder.create());
+ s.set_ain0(Ain0);
+ s.forEach_init_uint32_alloc(Ain0);
+
+ Ain1 = Allocation.createTyped(RS, type32Builder.create());
+ s.set_ain1(Ain1);
+ s.forEach_init_uint32_alloc(Ain1);
+
+ Ain2 = Allocation.createTyped(RS, type32Builder.create());
+ s.set_ain2(Ain2);
+ s.forEach_init_uint32_alloc(Ain2);
+
+ // 16-bit input allocation
+
+ Ain3 = Allocation.createTyped(RS, type16Builder.create());
+ s.set_ain3(Ain3);
+ s.forEach_init_uint16_alloc(Ain3);
+
+ // 32-bit output allocations
+
+ Out0 = Allocation.createTyped(RS, type32Builder.create());
+ s.set_aout0(Out0);
+
+ Out1 = Allocation.createTyped(RS, type32Builder.create());
+ s.set_aout1(Out1);
+
+ Out2 = Allocation.createTyped(RS, type32Builder.create());
+ s.set_aout2(Out2);
+
+ // RetStruct output allocations
+
+ ScriptField_RetStruct StructType = new ScriptField_RetStruct(RS, Xdim);
+ Out3 = StructType.getAllocation();
+ s.set_aout3(Out3);
+
+ return;
+ }
+
+ public void run() {
+ RenderScript pRS = RenderScript.create(mCtx);
+ ScriptC_foreach_multi s = new ScriptC_foreach_multi(pRS);
+
+ pRS.setMessageHandler(mRsMessage);
+
+ initializeGlobals(pRS, s);
+
+ s.forEach_sum2(Ain0, Ain1, Out0);
+ s.forEach_sum3(Ain0, Ain1, Ain2, Out1);
+ s.forEach_sum_mixed(Ain0, Ain3, Out2);
+ s.forEach_sum2_struct(Ain0, Ain1, Out3);
+
+ s.invoke_test_outputs();
+ s.invoke_check_test_results();
+
+ pRS.finish();
+ waitForMessage();
+ pRS.destroy();
+ }
+}
diff --git a/java/tests/RsTest/src/com/android/rs/test/foreach_multi.rs b/java/tests/RsTest/src/com/android/rs/test/foreach_multi.rs
new file mode 100644
index 0000000..0857e86
--- /dev/null
+++ b/java/tests/RsTest/src/com/android/rs/test/foreach_multi.rs
@@ -0,0 +1,178 @@
+#include "shared.rsh"
+
+struct RetStruct {
+ uint32_t i0;
+ uint32_t i1;
+ uint32_t i2;
+ uint32_t i3;
+ uint32_t i4;
+ uint32_t i5;
+ uint32_t i6;
+ uint32_t i7;
+};
+
+rs_allocation ain0, ain1, ain2;
+rs_allocation ain3;
+
+rs_allocation aout0, aout1, aout2, aout3;
+
+uint32_t dimX;
+
+static bool failed = false;
+
+uint32_t RS_KERNEL init_uint32_alloc(uint32_t x) {
+ return x;
+}
+
+uint16_t RS_KERNEL init_uint16_alloc(uint32_t x) {
+ return x;
+}
+
+uint32_t RS_KERNEL sum2(uint32_t in0, uint32_t in1, uint32_t x) {
+ _RS_ASSERT(in0 == x);
+ _RS_ASSERT(in1 == x);
+
+ return in0 + in1;
+}
+
+struct RetStruct RS_KERNEL
+sum2_struct(uint32_t in0, uint32_t in1, uint32_t x) {
+
+ _RS_ASSERT(in0 == x);
+ _RS_ASSERT(in1 == x);
+
+ struct RetStruct retval;
+
+ retval.i0 = in0 + in1;
+ retval.i1 = in0 + in1;
+ retval.i2 = in0 + in1;
+ retval.i3 = in0 + in1;
+ retval.i4 = in0 + in1;
+ retval.i5 = in0 + in1;
+ retval.i6 = in0 + in1;
+ retval.i7 = in0 + in1;
+
+ return retval;
+}
+
+uint32_t RS_KERNEL sum3(uint32_t in0, uint32_t in1, uint32_t in2, uint32_t x) {
+ _RS_ASSERT(in0 == x);
+ _RS_ASSERT(in1 == x);
+ _RS_ASSERT(in2 == x);
+
+ return in0 + in1 + in2;
+}
+
+
+uint32_t RS_KERNEL sum_mixed(uint32_t in0, uint16_t in1, uint32_t x) {
+ _RS_ASSERT(in0 == x);
+ _RS_ASSERT(in1 == x);
+
+ return in0 + in1;
+}
+
+static bool test_sum2_output() {
+ bool failed = false;
+ uint32_t i;
+
+ for (i = 0; i < dimX; i++) {
+ _RS_ASSERT(rsGetElementAt_uint(aout0, i) ==
+ (rsGetElementAt_uint(ain0, i) +
+ rsGetElementAt_uint(ain1, i)));
+ }
+
+ if (failed) {
+ rsDebug("test_sum2_output FAILED", 0);
+ }
+ else {
+ rsDebug("test_sum2_output PASSED", 0);
+ }
+
+ return failed;
+}
+
+static bool test_sum3_output() {
+ bool failed = false;
+ uint32_t i;
+
+ for (i = 0; i < dimX; i++) {
+ _RS_ASSERT(rsGetElementAt_uint(aout1, i) ==
+ (rsGetElementAt_uint(ain0, i) +
+ rsGetElementAt_uint(ain1, i) +
+ rsGetElementAt_uint(ain2, i)));
+ }
+
+ if (failed) {
+ rsDebug("test_sum3_output FAILED", 0);
+ }
+ else {
+ rsDebug("test_sum3_output PASSED", 0);
+ }
+
+ return failed;
+}
+
+static bool test_sum_mixed_output() {
+ bool failed = false;
+ uint32_t i;
+
+ for (i = 0; i < dimX; i++) {
+ _RS_ASSERT(rsGetElementAt_uint(aout2, i) ==
+ (rsGetElementAt_uint(ain0, i) +
+ rsGetElementAt_ushort(ain3, i)));
+ }
+
+ if (failed) {
+ rsDebug("test_sum_mixed_output FAILED", 0);
+ }
+ else {
+ rsDebug("test_sum_mixed_output PASSED", 0);
+ }
+
+ return failed;
+}
+
+static bool test_sum2_struct_output() {
+ bool failed = false;
+ uint32_t i;
+
+ for (i = 0; i < dimX; i++) {
+ struct RetStruct *result = (struct RetStruct*)rsGetElementAt(aout3, i);
+
+ uint32_t sum = rsGetElementAt_uint(ain0, i) +
+ rsGetElementAt_uint(ain1, i);
+
+ _RS_ASSERT(result->i0 == sum);
+ _RS_ASSERT(result->i1 == sum);
+ _RS_ASSERT(result->i2 == sum);
+ _RS_ASSERT(result->i3 == sum);
+ _RS_ASSERT(result->i4 == sum);
+ _RS_ASSERT(result->i5 == sum);
+ _RS_ASSERT(result->i6 == sum);
+ _RS_ASSERT(result->i7 == sum);
+ }
+
+ if (failed) {
+ rsDebug("test_sum2_struct_output FAILED", 0);
+ }
+ else {
+ rsDebug("test_sum2_struct_output PASSED", 0);
+ }
+
+ return failed;
+}
+
+void test_outputs() {
+ failed |= test_sum2_output();
+ failed |= test_sum3_output();
+ failed |= test_sum_mixed_output();
+ failed |= test_sum2_struct_output();
+}
+
+void check_test_results() {
+ if (failed) {
+ rsSendToClientBlocking(RS_MSG_TEST_FAILED);
+ } else {
+ rsSendToClientBlocking(RS_MSG_TEST_PASSED);
+ }
+}
diff --git a/rsAllocation.cpp b/rsAllocation.cpp
index 0443ee8..b13467a 100644
--- a/rsAllocation.cpp
+++ b/rsAllocation.cpp
@@ -223,34 +223,40 @@
}
if (y >= mHal.drvState.lod[0].dimY) {
- rsc->setError(RS_ERROR_BAD_VALUE, "subElementData X offset out of range.");
+ rsc->setError(RS_ERROR_BAD_VALUE,
+ "subElementData X offset out of range.");
return;
}
if (cIdx >= mHal.state.type->getElement()->getFieldCount()) {
- rsc->setError(RS_ERROR_BAD_VALUE, "subElementData component out of range.");
+ rsc->setError(RS_ERROR_BAD_VALUE,
+ "subElementData component out of range.");
return;
}
const Element * e = mHal.state.type->getElement()->getField(cIdx);
- uint32_t elemArraySize = mHal.state.type->getElement()->getFieldArraySize(cIdx);
+ uint32_t elemArraySize =
+ mHal.state.type->getElement()->getFieldArraySize(cIdx);
if (sizeBytes != e->getSizeBytes() * elemArraySize) {
rsc->setError(RS_ERROR_BAD_VALUE, "subElementData bad size.");
return;
}
- rsc->mHal.funcs.allocation.elementData2D(rsc, this, x, y, data, cIdx, sizeBytes);
+ rsc->mHal.funcs.allocation.elementData2D(rsc, this, x, y, data, cIdx,
+ sizeBytes);
sendDirty(rsc);
}
void Allocation::addProgramToDirty(const Program *p) {
- mToDirtyList.push(p);
+ mToDirtyList.push_back(p);
}
void Allocation::removeProgramToDirty(const Program *p) {
- for (size_t ct=0; ct < mToDirtyList.size(); ct++) {
- if (mToDirtyList[ct] == p) {
- mToDirtyList.removeAt(ct);
+ for (auto entryIter = mToDirtyList.begin(), endIter = mToDirtyList.end();
+ entryIter != endIter; entryIter++) {
+
+ if (p == *entryIter) {
+ mToDirtyList.erase(entryIter);
return;
}
}
@@ -268,7 +274,8 @@
}
}
ALOGV("%s allocation ptr=%p mUsageFlags=0x04%x, mMipmapControl=0x%04x",
- prefix, mHal.drvState.lod[0].mallocPtr, mHal.state.usageFlags, mHal.state.mipmapControl);
+ prefix, mHal.drvState.lod[0].mallocPtr, mHal.state.usageFlags,
+ mHal.state.mipmapControl);
}
uint32_t Allocation::getPackedSize() const {
diff --git a/rsAllocation.h b/rsAllocation.h
index f197efc..47344d8 100644
--- a/rsAllocation.h
+++ b/rsAllocation.h
@@ -170,7 +170,7 @@
bool hasSameDims(const Allocation *Other) const;
protected:
- Vector<const Program *> mToDirtyList;
+ std::vector<const Program *> mToDirtyList;
ObjectBaseRef<const Type> mType;
void setType(const Type *t) {
mType.set(t);
diff --git a/rsContext.cpp b/rsContext.cpp
index 2112ace..0d9ca05 100644
--- a/rsContext.cpp
+++ b/rsContext.cpp
@@ -747,13 +747,15 @@
void Context::assignName(ObjectBase *obj, const char *name, uint32_t len) {
rsAssert(!obj->getName());
obj->setName(name, len);
- mNames.add(obj);
+ mNames.push_back(obj);
}
void Context::removeName(ObjectBase *obj) {
- for (size_t ct=0; ct < mNames.size(); ct++) {
- if (obj == mNames[ct]) {
- mNames.removeAt(ct);
+ for (auto nameIter = mNames.begin(), endIter = mNames.end();
+ nameIter != endIter; nameIter++) {
+
+ if (obj == *nameIter) {
+ mNames.erase(nameIter);
return;
}
}
@@ -988,4 +990,3 @@
ObjectBase *ob = static_cast<ObjectBase *>(obj);
(*name) = ob->getName();
}
-
diff --git a/rsContext.h b/rsContext.h
index b382358..f750670 100644
--- a/rsContext.h
+++ b/rsContext.h
@@ -297,7 +297,7 @@
bool mHasSurface;
bool mIsContextLite;
- Vector<ObjectBase *> mNames;
+ std::vector<ObjectBase *> mNames;
uint64_t mTimers[_RS_TIMER_TOTAL];
Timers mTimerActive;
diff --git a/rsCppUtils.h b/rsCppUtils.h
index 71cf077..7432109 100644
--- a/rsCppUtils.h
+++ b/rsCppUtils.h
@@ -19,8 +19,6 @@
#if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
#include <utils/Log.h>
-#include <utils/String8.h>
-#include <utils/Vector.h>
#include <cutils/atomic.h>
#endif
@@ -54,96 +52,6 @@
#define ALOGV(...) \
__android_log_print(ANDROID_LOG_VERBOSE, LOG_TAG, __VA_ARGS__);
-namespace android {
-
- // server has no Vector or String8 classes; implement on top of STL
- class String8: public std::string {
- public:
- String8(const char *ptr) : std::string(ptr) {
-
- }
- String8(const char *ptr, size_t len) : std::string(ptr, len) {
-
- }
- String8() : std::string() {
-
- }
-
- const char* string() const {
- return this->c_str();
- }
-
- void setTo(const char* str, ssize_t len) {
- this->assign(str, len);
- }
- void setTo(const char* str) {
- this->assign(str);
- }
- String8 getPathDir(void) const {
- const char* cp;
- const char*const str = this->c_str();
-
- cp = strrchr(str, OS_PATH_SEPARATOR);
- if (cp == NULL)
- return String8("");
- else
- return String8(str, cp - str);
- }
- };
-
- template <class T> class Vector: public std::vector<T> {
- public:
- void push(T obj) {
- this->push_back(obj);
- }
- void removeAt(uint32_t index) {
- this->erase(this->begin() + index);
- }
- ssize_t add(const T& obj) {
- this->push_back(obj);
- return this->size() - 1;
- }
- void setCapacity(ssize_t capacity) {
- this->resize(capacity);
- }
-
- T* editArray() {
- return (T*)(this->begin());
- }
-
- const T* array() {
- return (const T*)(this->begin());
- }
-
- };
-
- template<> class Vector<bool>: public std::vector<char> {
- public:
- void push(bool obj) {
- this->push_back(obj);
- }
- void removeAt(uint32_t index) {
- this->erase(this->begin() + index);
- }
- ssize_t add(const bool& obj) {
- this->push_back(obj);
- return this->size() - 1;
- }
- void setCapacity(ssize_t capacity) {
- this->resize(capacity);
- }
-
- bool* editArray() {
- return (bool*)(this->begin());
- }
-
- const bool* array() {
- return (const bool*)(this->begin());
- }
- };
-
-}
-
typedef int64_t nsecs_t; // nano-seconds
enum {
@@ -286,5 +194,3 @@
}
#endif //ANDROID_RS_OBJECT_BASE_H
-
-
diff --git a/rsDefines.h b/rsDefines.h
index 1259610..e7e869e 100644
--- a/rsDefines.h
+++ b/rsDefines.h
@@ -367,7 +367,10 @@
RS_SCRIPT_INTRINSIC_ID_HISTOGRAM = 9,
// unused 10
RS_SCRIPT_INTRINSIC_ID_LOOP_FILTER = 11,
- RS_SCRIPT_INTRINSIC_ID_RESIZE = 12
+ RS_SCRIPT_INTRINSIC_ID_RESIZE = 12,
+
+
+ RS_SCRIPT_INTRINSIC_ID_OEM_START = 0x10000000
};
typedef struct {
diff --git a/rsDevice.cpp b/rsDevice.cpp
index 2688890..1ba005a 100644
--- a/rsDevice.cpp
+++ b/rsDevice.cpp
@@ -28,14 +28,16 @@
}
void Device::addContext(Context *rsc) {
- mContexts.push(rsc);
+ mContexts.push_back(rsc);
}
void Device::removeContext(Context *rsc) {
- for (size_t idx=0; idx < mContexts.size(); idx++) {
- if (mContexts[idx] == rsc) {
- mContexts.removeAt(idx);
- break;
+ for (auto ctxIter = mContexts.begin(), endIter = mContexts.end();
+ ctxIter != endIter; ctxIter++) {
+
+ if (rsc == *ctxIter) {
+ mContexts.erase(ctxIter);
+ return;
}
}
}
@@ -58,4 +60,3 @@
}
rsAssert(0);
}
-
diff --git a/rsDevice.h b/rsDevice.h
index ffb514b..5961336 100644
--- a/rsDevice.h
+++ b/rsDevice.h
@@ -17,6 +17,8 @@
#ifndef ANDROID_RS_DEVICE_H
#define ANDROID_RS_DEVICE_H
+#include <vector>
+
#include "rsUtils.h"
// ---------------------------------------------------------------------------
@@ -36,7 +38,7 @@
bool mForceSW;
protected:
- Vector<Context *> mContexts;
+ std::vector<Context *> mContexts;
};
}
diff --git a/rsElement.cpp b/rsElement.cpp
index f7b064a..0da8096 100644
--- a/rsElement.cpp
+++ b/rsElement.cpp
@@ -42,10 +42,14 @@
}
void Element::preDestroy() const {
- for (uint32_t ct = 0; ct < mRSC->mStateElement.mElements.size(); ct++) {
- if (mRSC->mStateElement.mElements[ct] == this) {
- mRSC->mStateElement.mElements.removeAt(ct);
- break;
+ auto &elements = mRSC->mStateElement.mElements;
+
+ for (auto elIter = elements.begin(), endIter = elements.end();
+ elIter != endIter; elIter++) {
+
+ if (this == *elIter) {
+ elements.erase(elIter);
+ return;
}
}
}
@@ -264,7 +268,7 @@
ObjectBase::asyncLock();
- rsc->mStateElement.mElements.push(e);
+ rsc->mStateElement.mElements.push_back(e);
ObjectBase::asyncUnlock();
return returnRef;
@@ -339,7 +343,7 @@
e->compute();
ObjectBase::asyncLock();
- rsc->mStateElement.mElements.push(e);
+ rsc->mStateElement.mElements.push_back(e);
ObjectBase::asyncUnlock();
return returnRef;
diff --git a/rsElement.h b/rsElement.h
index 5a3bc13..2ae9404 100644
--- a/rsElement.h
+++ b/rsElement.h
@@ -17,6 +17,8 @@
#ifndef ANDROID_STRUCTURED_ELEMENT_H
#define ANDROID_STRUCTURED_ELEMENT_H
+#include <vector>
+
#include "rsComponent.h"
#include "rsUtils.h"
#include "rsDefines.h"
@@ -170,7 +172,7 @@
~ElementState();
// Cache of all existing elements.
- Vector<Element *> mElements;
+ std::vector<Element *> mElements;
};
diff --git a/rsFileA3D.cpp b/rsFileA3D.cpp
index a589033..ef5730f 100644
--- a/rsFileA3D.cpp
+++ b/rsFileA3D.cpp
@@ -87,7 +87,7 @@
entry->mLength = headerStream->loadU32();
}
entry->mRsObj = NULL;
- mIndex.push(entry);
+ mIndex.push_back(entry);
}
}
@@ -379,7 +379,7 @@
indexEntry->mType = obj->getClassId();
indexEntry->mOffset = mWriteStream->getPos();
indexEntry->mRsObj = obj;
- mWriteIndex.push(indexEntry);
+ mWriteIndex.push_back(indexEntry);
obj->serialize(con, mWriteStream);
indexEntry->mLength = mWriteStream->getPos() - indexEntry->mOffset;
mWriteStream->align(4);
diff --git a/rsFileA3D.h b/rsFileA3D.h
index 8bf36b9..0c8b3d6 100644
--- a/rsFileA3D.h
+++ b/rsFileA3D.h
@@ -88,15 +88,13 @@
Asset *mAsset;
OStream *mWriteStream;
- Vector<A3DIndexEntry*> mWriteIndex;
+ std::vector<A3DIndexEntry*> mWriteIndex;
IStream *mReadStream;
- Vector<A3DIndexEntry*> mIndex;
+ std::vector<A3DIndexEntry*> mIndex;
};
}
}
#endif //ANDROID_RS_FILE_A3D_H
-
-
diff --git a/rsFont.cpp b/rsFont.cpp
index 8feef2d..71399af 100644
--- a/rsFont.cpp
+++ b/rsFont.cpp
@@ -33,7 +33,7 @@
using namespace android;
using namespace android::renderscript;
-Font::Font(Context *rsc) : ObjectBase(rsc), mCachedGlyphs(NULL) {
+Font::Font(Context *rsc) : ObjectBase(rsc) {
mInitialized = false;
mHasKerning = false;
mFace = NULL;
@@ -76,17 +76,21 @@
}
void Font::preDestroy() const {
- for (uint32_t ct = 0; ct < mRSC->mStateFont.mActiveFonts.size(); ct++) {
- if (mRSC->mStateFont.mActiveFonts[ct] == this) {
- mRSC->mStateFont.mActiveFonts.removeAt(ct);
- break;
+ auto &activeFonts = mRSC->mStateFont.mActiveFonts;
+
+ for (auto font = activeFonts.begin(), end = activeFonts.end(); font != end;
+ font++) {
+
+ if (this == *font) {
+ activeFonts.erase(font);
+ return;
}
}
}
void Font::invalidateTextureCache() {
for (uint32_t i = 0; i < mCachedGlyphs.size(); i ++) {
- mCachedGlyphs.valueAt(i)->mIsValid = false;
+ mCachedGlyphs[i]->mIsValid = false;
}
}
@@ -224,7 +228,7 @@
Font::CachedGlyphInfo* Font::getCachedUTFChar(int32_t utfChar) {
- CachedGlyphInfo *cachedGlyph = mCachedGlyphs.valueFor((uint32_t)utfChar);
+ CachedGlyphInfo *cachedGlyph = mCachedGlyphs[(uint32_t)utfChar];
if (cachedGlyph == NULL) {
cachedGlyph = cacheGlyph((uint32_t)utfChar);
}
@@ -283,7 +287,7 @@
Font::CachedGlyphInfo *Font::cacheGlyph(uint32_t glyph) {
CachedGlyphInfo *newGlyph = new CachedGlyphInfo();
- mCachedGlyphs.add(glyph, newGlyph);
+ mCachedGlyphs[glyph] = newGlyph;
#ifndef ANDROID_RS_SERIALIZE
newGlyph->mGlyphIndex = FT_Get_Char_Index(mFace, glyph);
newGlyph->mIsValid = false;
@@ -296,11 +300,14 @@
Font * Font::create(Context *rsc, const char *name, float fontSize, uint32_t dpi,
const void *data, uint32_t dataLen) {
rsc->mStateFont.checkInit();
- Vector<Font*> &activeFonts = rsc->mStateFont.mActiveFonts;
+ std::vector<Font*> &activeFonts = rsc->mStateFont.mActiveFonts;
for (uint32_t i = 0; i < activeFonts.size(); i ++) {
Font *ithFont = activeFonts[i];
- if (ithFont->mFontName == name && ithFont->mFontSize == fontSize && ithFont->mDpi == dpi) {
+ if (ithFont->mFontName == name &&
+ ithFont->mFontSize == fontSize &&
+ ithFont->mDpi == dpi) {
+
return ithFont;
}
}
@@ -308,7 +315,7 @@
Font *newFont = new Font(rsc);
bool isInitialized = newFont->init(name, fontSize, dpi, data, dataLen);
if (isInitialized) {
- activeFonts.push(newFont);
+ activeFonts.push_back(newFont);
rsc->mStateFont.precacheLatin(newFont);
return newFont;
}
@@ -325,7 +332,7 @@
#endif
for (uint32_t i = 0; i < mCachedGlyphs.size(); i ++) {
- CachedGlyphInfo *glyph = mCachedGlyphs.valueAt(i);
+ CachedGlyphInfo *glyph = mCachedGlyphs[i];
delete glyph;
}
}
@@ -551,29 +558,39 @@
mCacheHeight = 256;
mCacheWidth = 1024;
ObjectBaseRef<Type> texType = Type::getTypeRef(mRSC, alphaElem.get(),
- mCacheWidth, mCacheHeight, 0, false, false, 0);
+ mCacheWidth, mCacheHeight,
+ 0, false, false, 0);
+
mCacheBuffer = new uint8_t[mCacheWidth * mCacheHeight];
- Allocation *cacheAlloc = Allocation::createAllocation(mRSC, texType.get(),
- RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE);
+ Allocation *cacheAlloc =
+ Allocation::createAllocation(mRSC, texType.get(),
+ RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE);
mTextTexture.set(cacheAlloc);
// Split up our cache texture into lines of certain widths
int32_t nextLine = 0;
- mCacheLines.push(new CacheTextureLine(16, texType->getDimX(), nextLine, 0));
- nextLine += mCacheLines.top()->mMaxHeight;
- mCacheLines.push(new CacheTextureLine(24, texType->getDimX(), nextLine, 0));
- nextLine += mCacheLines.top()->mMaxHeight;
- mCacheLines.push(new CacheTextureLine(24, texType->getDimX(), nextLine, 0));
- nextLine += mCacheLines.top()->mMaxHeight;
- mCacheLines.push(new CacheTextureLine(32, texType->getDimX(), nextLine, 0));
- nextLine += mCacheLines.top()->mMaxHeight;
- mCacheLines.push(new CacheTextureLine(32, texType->getDimX(), nextLine, 0));
- nextLine += mCacheLines.top()->mMaxHeight;
- mCacheLines.push(new CacheTextureLine(40, texType->getDimX(), nextLine, 0));
- nextLine += mCacheLines.top()->mMaxHeight;
- mCacheLines.push(new CacheTextureLine(texType->getDimY() - nextLine, texType->getDimX(), nextLine, 0));
+ mCacheLines.push_back(new CacheTextureLine(16, texType->getDimX(),
+ nextLine, 0));
+ nextLine += mCacheLines.back()->mMaxHeight;
+ mCacheLines.push_back(new CacheTextureLine(24, texType->getDimX(),
+ nextLine, 0));
+ nextLine += mCacheLines.back()->mMaxHeight;
+ mCacheLines.push_back(new CacheTextureLine(24, texType->getDimX(),
+ nextLine, 0));
+ nextLine += mCacheLines.back()->mMaxHeight;
+ mCacheLines.push_back(new CacheTextureLine(32, texType->getDimX(),
+ nextLine, 0));
+ nextLine += mCacheLines.back()->mMaxHeight;
+ mCacheLines.push_back(new CacheTextureLine(32, texType->getDimX(),
+ nextLine, 0));
+ nextLine += mCacheLines.back()->mMaxHeight;
+ mCacheLines.push_back(new CacheTextureLine(40, texType->getDimX(),
+ nextLine, 0));
+ nextLine += mCacheLines.back()->mMaxHeight;
+ mCacheLines.push_back(new CacheTextureLine(texType->getDimY() - nextLine,
+ texType->getDimX(), nextLine, 0));
}
// Avoid having to reallocate memory and render quad by quad
diff --git a/rsFont.h b/rsFont.h
index 7bac508..71e6fb4 100644
--- a/rsFont.h
+++ b/rsFont.h
@@ -17,9 +17,10 @@
#ifndef ANDROID_RS_FONT_H
#define ANDROID_RS_FONT_H
+#include <map>
+#include <vector>
+
#include "rsStream.h"
-#include <utils/Vector.h>
-#include <utils/KeyedVector.h>
struct FT_LibraryRec_;
struct FT_FaceRec_;
@@ -124,7 +125,7 @@
bool mInitialized;
bool mHasKerning;
- DefaultKeyedVector<uint32_t, CachedGlyphInfo* > mCachedGlyphs;
+ std::map<uint32_t, CachedGlyphInfo* > mCachedGlyphs;
CachedGlyphInfo* getCachedUTFChar(int32_t utfChar);
CachedGlyphInfo *cacheGlyph(uint32_t glyph);
@@ -178,7 +179,7 @@
bool fitBitmap(FT_Bitmap_ *bitmap, uint32_t *retOriginX, uint32_t *retOriginY);
};
- Vector<CacheTextureLine*> mCacheLines;
+ std::vector<CacheTextureLine*> mCacheLines;
uint32_t getRemainingCacheCapacity();
void precacheLatin(Font *font);
@@ -203,7 +204,7 @@
FT_LibraryRec_ *mLibrary;
FT_LibraryRec_ *getLib();
#endif //ANDROID_RS_SERIALIZE
- Vector<Font*> mActiveFonts;
+ std::vector<Font*> mActiveFonts;
// Render state for the font
ObjectBaseRef<Allocation> mFontShaderFConstant;
diff --git a/rsGrallocConsumer.h b/rsGrallocConsumer.h
index 9e4fc58..6f3f879 100644
--- a/rsGrallocConsumer.h
+++ b/rsGrallocConsumer.h
@@ -17,12 +17,12 @@
#ifndef ANDROID_RS_GRALLOC_CONSUMER_H
#define ANDROID_RS_GRALLOC_CONSUMER_H
+#include <vector>
+
#include <gui/ConsumerBase.h>
#include <ui/GraphicBuffer.h>
-#include <utils/String8.h>
-#include <utils/Vector.h>
#include <utils/threads.h>
@@ -75,4 +75,3 @@
} // namespace android
#endif // ANDROID_RS_GRALLOC_CONSUMER_H
-
diff --git a/rsProgramFragment.h b/rsProgramFragment.h
index e7456b9..1357bfc 100644
--- a/rsProgramFragment.h
+++ b/rsProgramFragment.h
@@ -55,7 +55,7 @@
void deinit(Context *rsc);
ObjectBaseRef<ProgramFragment> mDefault;
- Vector<ProgramFragment *> mPrograms;
+ std::vector<ProgramFragment *> mPrograms;
ObjectBaseRef<ProgramFragment> mLast;
};
@@ -63,7 +63,3 @@
}
}
#endif
-
-
-
-
diff --git a/rsProgramRaster.cpp b/rsProgramRaster.cpp
index 4f27f2e..d2d0602 100644
--- a/rsProgramRaster.cpp
+++ b/rsProgramRaster.cpp
@@ -31,10 +31,14 @@
}
void ProgramRaster::preDestroy() const {
- for (uint32_t ct = 0; ct < mRSC->mStateRaster.mRasterPrograms.size(); ct++) {
- if (mRSC->mStateRaster.mRasterPrograms[ct] == this) {
- mRSC->mStateRaster.mRasterPrograms.removeAt(ct);
- break;
+ auto &rasters = mRSC->mStateRaster.mRasterPrograms;
+
+ for (auto prIter = rasters.begin(), endIter = rasters.end();
+ prIter != endIter; prIter++) {
+
+ if (this == *prIter) {
+ rasters.erase(prIter);
+ return;
}
}
}
@@ -94,7 +98,7 @@
returnRef.set(pr);
ObjectBase::asyncLock();
- rsc->mStateRaster.mRasterPrograms.push(pr);
+ rsc->mStateRaster.mRasterPrograms.push_back(pr);
ObjectBase::asyncUnlock();
return returnRef;
@@ -111,4 +115,3 @@
}
}
-
diff --git a/rsProgramRaster.h b/rsProgramRaster.h
index e9a524b..207d74c 100644
--- a/rsProgramRaster.h
+++ b/rsProgramRaster.h
@@ -75,14 +75,10 @@
ObjectBaseRef<ProgramRaster> mLast;
// Cache of all existing raster programs.
- Vector<ProgramRaster *> mRasterPrograms;
+ std::vector<ProgramRaster *> mRasterPrograms;
};
}
}
#endif
-
-
-
-
diff --git a/rsProgramStore.cpp b/rsProgramStore.cpp
index 83c1f2c..b2d527e 100644
--- a/rsProgramStore.cpp
+++ b/rsProgramStore.cpp
@@ -42,10 +42,14 @@
}
void ProgramStore::preDestroy() const {
- for (uint32_t ct = 0; ct < mRSC->mStateFragmentStore.mStorePrograms.size(); ct++) {
- if (mRSC->mStateFragmentStore.mStorePrograms[ct] == this) {
- mRSC->mStateFragmentStore.mStorePrograms.removeAt(ct);
- break;
+ auto &stores = mRSC->mStateFragmentStore.mStorePrograms;
+
+ for (auto psIter = stores.begin(), endIter = stores.end();
+ psIter != endIter; psIter++) {
+
+ if (this == *psIter) {
+ stores.erase(psIter);
+ return;
}
}
}
@@ -118,7 +122,7 @@
pfs->init();
ObjectBase::asyncLock();
- rsc->mStateFragmentStore.mStorePrograms.push(pfs);
+ rsc->mStateFragmentStore.mStorePrograms.push_back(pfs);
ObjectBase::asyncUnlock();
return returnRef;
diff --git a/rsProgramStore.h b/rsProgramStore.h
index 9a7f7f1..06824fe 100644
--- a/rsProgramStore.h
+++ b/rsProgramStore.h
@@ -92,12 +92,9 @@
ObjectBaseRef<ProgramStore> mLast;
// Cache of all existing store programs.
- Vector<ProgramStore *> mStorePrograms;
+ std::vector<ProgramStore *> mStorePrograms;
};
}
}
#endif
-
-
-
diff --git a/rsRuntime.h b/rsRuntime.h
index eb93e25..5a05883 100644
--- a/rsRuntime.h
+++ b/rsRuntime.h
@@ -158,7 +158,7 @@
Allocation *in,
Allocation *out,
const void *usr,
- uint32_t usrBytes,
+ uint32_t usrBytes,
const RsScriptCall *call);
diff --git a/rsSampler.cpp b/rsSampler.cpp
index 0cf0b55..0ea9729 100644
--- a/rsSampler.cpp
+++ b/rsSampler.cpp
@@ -49,10 +49,14 @@
}
void Sampler::preDestroy() const {
- for (uint32_t ct = 0; ct < mRSC->mStateSampler.mAllSamplers.size(); ct++) {
- if (mRSC->mStateSampler.mAllSamplers[ct] == this) {
- mRSC->mStateSampler.mAllSamplers.removeAt(ct);
- break;
+ auto &samplers = mRSC->mStateSampler.mAllSamplers;
+
+ for (auto sampleIter = samplers.begin(), endIter = samplers.end();
+ sampleIter != endIter; sampleIter++) {
+
+ if (this == *sampleIter) {
+ samplers.erase(sampleIter);
+ return;
}
}
}
@@ -113,7 +117,7 @@
#endif
ObjectBase::asyncLock();
- rsc->mStateSampler.mAllSamplers.push(s);
+ rsc->mStateSampler.mAllSamplers.push_back(s);
ObjectBase::asyncUnlock();
return returnRef;
diff --git a/rsSampler.h b/rsSampler.h
index 2fdf707..3f5855f 100644
--- a/rsSampler.h
+++ b/rsSampler.h
@@ -96,12 +96,9 @@
}
}
// Cache of all existing raster programs.
- Vector<Sampler *> mAllSamplers;
+ std::vector<Sampler *> mAllSamplers;
};
}
}
#endif //ANDROID_RS_SAMPLER_H
-
-
-
diff --git a/rsScript.cpp b/rsScript.cpp
index dd962d1..a4fa196 100644
--- a/rsScript.cpp
+++ b/rsScript.cpp
@@ -187,38 +187,13 @@
free(tz);
}
-void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot,
- RsAllocation vain, RsAllocation vaout,
- const void *params, size_t paramLen,
- const RsScriptCall *sc, size_t scLen) {
- Script *s = static_cast<Script *>(vs);
- // The rs.spec generated code does not handle the absence of an actual
- // input for sc. Instead, it retains an existing pointer value (the prior
- // field in the packed data object). This can cause confusion because
- // drivers might now inspect bogus sc data.
- if (scLen == 0) {
- sc = NULL;
- }
- s->runForEach(rsc, slot,
- static_cast<const Allocation *>(vain), static_cast<Allocation *>(vaout),
- params, paramLen, sc);
-
-}
-
void rsi_ScriptForEachMulti(Context *rsc, RsScript vs, uint32_t slot,
RsAllocation *vains, size_t inLen,
RsAllocation vaout, const void *params,
size_t paramLen, const RsScriptCall *sc,
size_t scLen) {
- Script *s = static_cast<Script *>(vs);
- // The rs.spec generated code does not handle the absence of an actual
- // input for sc. Instead, it retains an existing pointer value (the prior
- // field in the packed data object). This can cause confusion because
- // drivers might now inspect bogus sc data.
- if (scLen == 0) {
- sc = NULL;
- }
+ Script *s = static_cast<Script *>(vs);
Allocation **ains = (Allocation**)(vains);
s->runForEach(rsc, slot,
@@ -227,6 +202,23 @@
}
+void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot,
+ RsAllocation vain, RsAllocation vaout,
+ const void *params, size_t paramLen,
+ const RsScriptCall *sc, size_t scLen) {
+
+ if (vain == NULL) {
+ rsi_ScriptForEachMulti(rsc, vs, slot, NULL, 0, vaout, params, paramLen,
+ sc, scLen);
+ } else {
+ RsAllocation ains[1] = {vain};
+
+ rsi_ScriptForEachMulti(rsc, vs, slot, ains,
+ sizeof(ains) / sizeof(RsAllocation), vaout,
+ params, paramLen, sc, scLen);
+ }
+}
+
void rsi_ScriptInvoke(Context *rsc, RsScript vs, uint32_t slot) {
Script *s = static_cast<Script *>(vs);
s->Invoke(rsc, slot, NULL, 0);
diff --git a/rsScript.h b/rsScript.h
index 1ad013f..2e232f0 100644
--- a/rsScript.h
+++ b/rsScript.h
@@ -108,17 +108,9 @@
virtual bool freeChildren();
- virtual void runForEach(Context *rsc,
- uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc = NULL) = 0;
-
virtual void runForEach(Context* rsc,
uint32_t slot,
- const Allocation** ains,
+ const Allocation ** ains,
size_t inLen,
Allocation* aout,
const void* usr,
diff --git a/rsScriptC.cpp b/rsScriptC.cpp
index e7ff8c7..96a771f 100644
--- a/rsScriptC.cpp
+++ b/rsScriptC.cpp
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include <string>
+
#include "rsContext.h"
#include "rsScriptC.h"
@@ -29,6 +31,19 @@
#include <sys/stat.h>
+#ifdef USE_MINGW
+/* Define the default path separator for the platform. */
+#define OS_PATH_SEPARATOR '\\'
+#define OS_PATH_SEPARATOR_STR "\\"
+
+#else /* not USE_MINGW */
+
+/* Define the default path separator for the platform. */
+#define OS_PATH_SEPARATOR '/'
+#define OS_PATH_SEPARATOR_STR "/"
+
+#endif
+
using namespace android;
using namespace android::renderscript;
@@ -58,29 +73,45 @@
#ifndef RS_COMPATIBILITY_LIB
bool ScriptC::createCacheDir(const char *cacheDir) {
- String8 cacheDirString, currentDir;
+ std::string currentDir;
+ const std::string cacheDirString(cacheDir);
+
struct stat statBuf;
int statReturn = stat(cacheDir, &statBuf);
if (!statReturn) {
return true;
}
- // String8 path functions strip leading /'s
- // insert if necessary
- if (cacheDir[0] == '/') {
- currentDir += "/";
- }
+ // Start from the beginning of the cacheDirString.
+ int currPos = 0;
- cacheDirString.setPathName(cacheDir);
+ // Reserve space in currentDir for the entire cacheDir path.
+ currentDir.reserve(cacheDirString.length());
- while (cacheDirString.length()) {
- currentDir += (cacheDirString.walkPath(&cacheDirString));
- statReturn = stat(currentDir.string(), &statBuf);
+ while (currPos >= 0) {
+ /*
+ * The character at currPos should be a path separator. We need to look
+ * for the next one.
+ */
+ int nextPos = cacheDirString.find(OS_PATH_SEPARATOR_STR, currPos + 1);
+
+ if (nextPos > 0) {
+ // A new path separator has been found.
+ currentDir += cacheDirString.substr(currPos, nextPos - currPos);
+ } else {
+ // There are no more path separators.
+ currentDir += cacheDirString.substr(currPos);
+ }
+
+ currPos = nextPos;
+
+ statReturn = stat(currentDir.c_str(), &statBuf);
+
if (statReturn) {
if (errno == ENOENT) {
- if (mkdir(currentDir.string(), S_IRUSR | S_IWUSR | S_IXUSR)) {
+ if (mkdir(currentDir.c_str(), S_IRUSR | S_IWUSR | S_IXUSR)) {
ALOGE("Couldn't create cache directory: %s",
- currentDir.string());
+ currentDir.c_str());
ALOGE("Error: %s", strerror(errno));
return false;
}
@@ -89,7 +120,6 @@
return false;
}
}
- currentDir += "/";
}
return true;
}
@@ -156,36 +186,6 @@
void ScriptC::runForEach(Context *rsc,
uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc) {
- // Trace this function call.
- // To avoid overhead, we only build the string, if tracing is actually
- // enabled.
- String8 *AString = NULL;
- const char *String = "";
- if (ATRACE_ENABLED()) {
- AString = new String8("runForEach_");
- AString->append(mHal.info.exportedForeachFuncList[slot].first);
- String = AString->string();
- }
- ATRACE_NAME(String);
- (void)String;
-
- Context::PushState ps(rsc);
-
- setupGLState(rsc);
- setupScript(rsc);
- rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ain, aout, usr, usrBytes, sc);
-
- if (AString)
- delete AString;
-}
-
-void ScriptC::runForEach(Context *rsc,
- uint32_t slot,
const Allocation ** ains,
size_t inLen,
Allocation * aout,
@@ -195,25 +195,36 @@
// Trace this function call.
// To avoid overhead we only build the string if tracing is actually
// enabled.
- String8 *AString = NULL;
- const char *String = "";
+ std::string *traceString = NULL;
+ const char *stringData = "";
if (ATRACE_ENABLED()) {
- AString = new String8("runForEach_");
- AString->append(mHal.info.exportedForeachFuncList[slot].first);
- String = AString->string();
+ traceString = new std::string("runForEach_");
+ traceString->append(mHal.info.exportedForeachFuncList[slot].first);
+ stringData = traceString->c_str();
}
- ATRACE_NAME(String);
- (void)String;
+ ATRACE_NAME(stringData);
Context::PushState ps(rsc);
setupGLState(rsc);
setupScript(rsc);
- rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen, aout, usr, usrBytes, sc);
+ if (rsc->mHal.funcs.script.invokeForEachMulti != NULL) {
+ rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen,
+ aout, usr, usrBytes, sc);
- if (AString)
- delete AString;
+ } else if (inLen == 1) {
+ rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ains[0], aout,
+ usr, usrBytes, sc);
+
+ } else {
+ rsc->setError(RS_ERROR_FATAL_DRIVER,
+ "Driver support for multi-input not present");
+ }
+
+ if (traceString) {
+ delete traceString;
+ }
}
void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) {
diff --git a/rsScriptC.h b/rsScriptC.h
index d3d9d51..5735bea 100644
--- a/rsScriptC.h
+++ b/rsScriptC.h
@@ -44,14 +44,6 @@
virtual void runForEach(Context *rsc,
uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc = NULL);
-
- virtual void runForEach(Context *rsc,
- uint32_t slot,
const Allocation ** ains,
size_t inLen,
Allocation * aout,
diff --git a/rsScriptC_Lib.cpp b/rsScriptC_Lib.cpp
index a41f4a7..cacb37a 100644
--- a/rsScriptC_Lib.cpp
+++ b/rsScriptC_Lib.cpp
@@ -230,7 +230,17 @@
Allocation *in, Allocation *out,
const void *usr, uint32_t usrBytes,
const RsScriptCall *call) {
- target->runForEach(rsc, /* root slot */ 0, in, out, usr, usrBytes, call);
+
+ if (in == NULL) {
+ target->runForEach(rsc, /* root slot */ 0, NULL, 0, out, usr,
+ usrBytes, call);
+
+ } else {
+ const Allocation *ins[1] = {in};
+ target->runForEach(rsc, /* root slot */ 0, ins,
+ sizeof(ins) / sizeof(RsAllocation), out, usr,
+ usrBytes, call);
+ }
}
void rsrAllocationSyncAll(Context *rsc, Allocation *a, RsAllocationUsageType usage) {
diff --git a/rsScriptGroup.cpp b/rsScriptGroup.cpp
index d1dd9d8..f41c65d 100644
--- a/rsScriptGroup.cpp
+++ b/rsScriptGroup.cpp
@@ -14,6 +14,8 @@
* limitations under the License.
*/
+#include <algorithm>
+
#include "rsContext.h"
#include <time.h>
@@ -28,8 +30,8 @@
mRSC->mHal.funcs.scriptgroup.destroy(mRSC, this);
}
- for (size_t ct=0; ct < mLinks.size(); ct++) {
- delete mLinks[ct];
+ for (auto link : mLinks) {
+ delete link;
}
}
@@ -44,148 +46,116 @@
}
ScriptGroup::Node * ScriptGroup::findNode(Script *s) const {
- //ALOGE("find %p %i", s, (int)mNodes.size());
- for (size_t ct=0; ct < mNodes.size(); ct++) {
- Node *n = mNodes[ct];
- for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
- if (n->mKernels[ct2]->mScript == s) {
- return n;
+ for (auto node : mNodes) {
+ for (auto kernelRef : node->mKernels) {
+ if (kernelRef->mScript == s) {
+ return node;
}
}
}
+
return NULL;
}
-bool ScriptGroup::calcOrderRecurse(Node *n, int depth) {
- n->mSeen = true;
- if (n->mOrder < depth) {
- n->mOrder = depth;
+bool ScriptGroup::calcOrderRecurse(Node *node0, int depth) {
+ node0->mSeen = true;
+ if (node0->mOrder < depth) {
+ node0->mOrder = depth;
}
bool ret = true;
- for (size_t ct=0; ct < n->mOutputs.size(); ct++) {
- const Link *l = n->mOutputs[ct];
- Node *nt = NULL;
- if (l->mDstField.get()) {
- nt = findNode(l->mDstField->mScript);
+
+ for (auto link : node0->mOutputs) {
+ Node *node1 = NULL;
+ if (link->mDstField.get()) {
+ node1 = findNode(link->mDstField->mScript);
} else {
- nt = findNode(l->mDstKernel->mScript);
+ node1 = findNode(link->mDstKernel->mScript);
}
- if (nt->mSeen) {
+ if (node1->mSeen) {
return false;
}
- ret &= calcOrderRecurse(nt, n->mOrder + 1);
+ ret &= calcOrderRecurse(node1, node0->mOrder + 1);
}
+
return ret;
}
-#if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
-static int CompareNodeForSort(ScriptGroup::Node *const* lhs,
- ScriptGroup::Node *const* rhs) {
- if (lhs[0]->mOrder > rhs[0]->mOrder) {
- return 1;
- }
- return 0;
-}
-#else
-class NodeCompare {
-public:
- bool operator() (const ScriptGroup::Node* lhs,
- const ScriptGroup::Node* rhs) {
- if (lhs->mOrder > rhs->mOrder) {
- return true;
- }
- return false;
- }
-};
-#endif
-
bool ScriptGroup::calcOrder() {
// Make nodes
- for (size_t ct=0; ct < mKernels.size(); ct++) {
- const ScriptKernelID *k = mKernels[ct].get();
- //ALOGE(" kernel %i, %p s=%p", (int)ct, k, mKernels[ct]->mScript);
- Node *n = findNode(k->mScript);
- //ALOGE(" n = %p", n);
- if (n == NULL) {
- n = new Node(k->mScript);
- mNodes.add(n);
+
+ for (auto kernelRef : mKernels) {
+ const ScriptKernelID *kernel = kernelRef.get();
+ Node *node = findNode(kernel->mScript);
+ if (node == NULL) {
+ node = new Node(kernel->mScript);
+ mNodes.push_back(node);
}
- n->mKernels.add(k);
+ node->mKernels.push_back(kernel);
}
// add links
- //ALOGE("link count %i", (int)mLinks.size());
- for (size_t ct=0; ct < mLinks.size(); ct++) {
- Link *l = mLinks[ct];
- //ALOGE("link %i %p", (int)ct, l);
- Node *n = findNode(l->mSource->mScript);
- //ALOGE("link n %p", n);
- n->mOutputs.add(l);
+ for (auto link : mLinks) {
+ Node *node = findNode(link->mSource->mScript);
+ node->mOutputs.push_back(link);
- if (l->mDstKernel.get()) {
- //ALOGE("l->mDstKernel.get() %p", l->mDstKernel.get());
- n = findNode(l->mDstKernel->mScript);
- //ALOGE(" n1 %p", n);
- n->mInputs.add(l);
+ if (link->mDstKernel.get()) {
+ node = findNode(link->mDstKernel->mScript);
+ node->mInputs.push_back(link);
} else {
- n = findNode(l->mDstField->mScript);
- //ALOGE(" n2 %p", n);
- n->mInputs.add(l);
+ node = findNode(link->mDstField->mScript);
+ node->mInputs.push_back(link);
}
}
- //ALOGE("node count %i", (int)mNodes.size());
// Order nodes
bool ret = true;
- for (size_t ct=0; ct < mNodes.size(); ct++) {
- Node *n = mNodes[ct];
- if (n->mInputs.size() == 0) {
- for (size_t ct2=0; ct2 < mNodes.size(); ct2++) {
- mNodes[ct2]->mSeen = false;
+ for (auto n0 : mNodes) {
+ if (n0->mInputs.size() == 0) {
+ for (auto n1 : mNodes) {
+ n1->mSeen = false;
}
- ret &= calcOrderRecurse(n, 0);
+ ret &= calcOrderRecurse(n0, 1);
}
}
- for (size_t ct=0; ct < mKernels.size(); ct++) {
- const ScriptKernelID *k = mKernels[ct].get();
- const Node *n = findNode(k->mScript);
+ for (auto kernelRef : mKernels) {
+ const ScriptKernelID *kernel = kernelRef.get();
+ const Node *node = findNode(kernel->mScript);
- if (k->mHasKernelOutput) {
+ if (kernel->mHasKernelOutput) {
bool found = false;
- for (size_t ct2=0; ct2 < n->mOutputs.size(); ct2++) {
- if (n->mOutputs[ct2]->mSource.get() == k) {
+ for (auto output : node->mOutputs) {
+ if (output->mSource.get() == kernel) {
found = true;
break;
}
}
+
if (!found) {
- //ALOGE("add io out %p", k);
- mOutputs.add(new IO(k));
+ mOutputs.push_back(new IO(kernel));
}
}
- if (k->mHasKernelInput) {
+ if (kernel->mHasKernelInput) {
bool found = false;
- for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
- if (n->mInputs[ct2]->mDstKernel.get() == k) {
+ for (auto input : node->mInputs) {
+ if (input->mDstKernel.get() == kernel) {
found = true;
break;
}
}
if (!found) {
- //ALOGE("add io in %p", k);
- mInputs.add(new IO(k));
+ mInputs.push_back(new IO(kernel));
}
}
}
// sort
-#if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB)
- mNodes.sort(&CompareNodeForSort);
-#else
- std::sort(mNodes.begin(), mNodes.end(), NodeCompare());
-#endif
+ std::stable_sort(mNodes.begin(), mNodes.end(),
+ [](const ScriptGroup::Node* lhs,
+ const ScriptGroup::Node* rhs) {
+ return lhs->mOrder < rhs->mOrder;
+ });
return ret;
}
@@ -209,7 +179,7 @@
sg->mKernels.reserve(kernelCount);
for (size_t ct=0; ct < kernelCount; ct++) {
- sg->mKernels.add(kernels[ct]);
+ sg->mKernels.push_back(kernels[ct]);
}
sg->mLinks.reserve(linkCount);
@@ -219,7 +189,7 @@
l->mSource = src[ct];
l->mDstField = dstF[ct];
l->mDstKernel = dstK[ct];
- sg->mLinks.add(l);
+ sg->mLinks.push_back(l);
}
sg->calcOrder();
@@ -254,9 +224,9 @@
}
void ScriptGroup::setInput(Context *rsc, ScriptKernelID *kid, Allocation *a) {
- for (size_t ct=0; ct < mInputs.size(); ct++) {
- if (mInputs[ct]->mKernel == kid) {
- mInputs[ct]->mAlloc = a;
+ for (auto input : mInputs) {
+ if (input->mKernel == kid) {
+ input->mAlloc = a;
if (rsc->mHal.funcs.scriptgroup.setInput) {
rsc->mHal.funcs.scriptgroup.setInput(rsc, this, kid, a);
@@ -268,9 +238,9 @@
}
void ScriptGroup::setOutput(Context *rsc, ScriptKernelID *kid, Allocation *a) {
- for (size_t ct=0; ct < mOutputs.size(); ct++) {
- if (mOutputs[ct]->mKernel == kid) {
- mOutputs[ct]->mAlloc = a;
+ for (auto output : mOutputs) {
+ if (output->mKernel == kid) {
+ output->mAlloc = a;
if (rsc->mHal.funcs.scriptgroup.setOutput) {
rsc->mHal.funcs.scriptgroup.setOutput(rsc, this, kid, a);
@@ -311,44 +281,45 @@
return;
}
- for (size_t ct=0; ct < mNodes.size(); ct++) {
- Node *n = mNodes[ct];
- //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
-
- for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
- const ScriptKernelID *k = n->mKernels[ct2];
- Allocation *ain = NULL;
+ for (auto node : mNodes) {
+ for (auto kernel : node->mKernels) {
+ Allocation *ain = NULL;
Allocation *aout = NULL;
- for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
- if (n->mInputs[ct3]->mDstKernel.get() == k) {
- ain = n->mInputs[ct3]->mAlloc.get();
- //ALOGE(" link in %p", ain);
- }
- }
- for (size_t ct3=0; ct3 < mInputs.size(); ct3++) {
- if (mInputs[ct3]->mKernel == k) {
- ain = mInputs[ct3]->mAlloc.get();
- //ALOGE(" io in %p", ain);
+ for (auto nodeInput : node->mInputs) {
+ if (nodeInput->mDstKernel.get() == kernel) {
+ ain = nodeInput->mAlloc.get();
}
}
- for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
- if (n->mOutputs[ct3]->mSource.get() == k) {
- aout = n->mOutputs[ct3]->mAlloc.get();
- //ALOGE(" link out %p", aout);
- }
- }
- for (size_t ct3=0; ct3 < mOutputs.size(); ct3++) {
- if (mOutputs[ct3]->mKernel == k) {
- aout = mOutputs[ct3]->mAlloc.get();
- //ALOGE(" io out %p", aout);
+ for (auto sgInput : mInputs) {
+ if (sgInput->mKernel == kernel) {
+ ain = sgInput->mAlloc.get();
}
}
- n->mScript->runForEach(rsc, k->mSlot, ain, aout, NULL, 0);
+ for (auto nodeOutput : node->mOutputs) {
+ if (nodeOutput->mDstKernel.get() == kernel) {
+ aout = nodeOutput->mAlloc.get();
+ }
+ }
+
+ for (auto sgOutput : mOutputs) {
+ if (sgOutput->mKernel == kernel) {
+ aout = sgOutput->mAlloc.get();
+ }
+ }
+
+ if (ain == NULL) {
+ node->mScript->runForEach(rsc, kernel->mSlot, NULL, 0, aout,
+ NULL, 0);
+ } else {
+ const Allocation *ains[1] = {ain};
+ node->mScript->runForEach(rsc, kernel->mSlot, ains,
+ sizeof(ains) / sizeof(RsAllocation),
+ aout, NULL, 0);
+ }
}
-
}
}
@@ -389,24 +360,20 @@
void rsi_ScriptGroupSetInput(Context *rsc, RsScriptGroup sg, RsScriptKernelID kid,
RsAllocation alloc) {
- //ALOGE("rsi_ScriptGroupSetInput");
ScriptGroup *s = (ScriptGroup *)sg;
s->setInput(rsc, (ScriptKernelID *)kid, (Allocation *)alloc);
}
void rsi_ScriptGroupSetOutput(Context *rsc, RsScriptGroup sg, RsScriptKernelID kid,
RsAllocation alloc) {
- //ALOGE("rsi_ScriptGroupSetOutput");
ScriptGroup *s = (ScriptGroup *)sg;
s->setOutput(rsc, (ScriptKernelID *)kid, (Allocation *)alloc);
}
void rsi_ScriptGroupExecute(Context *rsc, RsScriptGroup sg) {
- //ALOGE("rsi_ScriptGroupExecute");
ScriptGroup *s = (ScriptGroup *)sg;
s->execute(rsc);
}
}
}
-
diff --git a/rsScriptGroup.h b/rsScriptGroup.h
index af98b50..974e3ba 100644
--- a/rsScriptGroup.h
+++ b/rsScriptGroup.h
@@ -32,7 +32,7 @@
class ScriptGroup : public ObjectBase {
public:
- Vector<ObjectBaseRef<ScriptKernelID> > mKernels;
+ std::vector<ObjectBaseRef<ScriptKernelID> > mKernels;
class Link {
public:
@@ -49,9 +49,9 @@
public:
Node(Script *);
- Vector<const ScriptKernelID *> mKernels;
- Vector<Link *> mOutputs;
- Vector<Link *> mInputs;
+ std::vector<const ScriptKernelID *> mKernels;
+ std::vector<Link *> mOutputs;
+ std::vector<Link *> mInputs;
bool mSeen;
int mOrder;
Script *mScript;
@@ -65,10 +65,10 @@
ObjectBaseRef<Allocation> mAlloc;
};
- Vector<Link *> mLinks;
- Vector<Node *> mNodes;
- Vector<IO *> mInputs;
- Vector<IO *> mOutputs;
+ std::vector<Link *> mLinks;
+ std::vector<Node *> mNodes;
+ std::vector<IO *> mInputs;
+ std::vector<IO *> mOutputs;
struct Hal {
void * drv;
@@ -115,4 +115,3 @@
}
}
#endif
-
diff --git a/rsScriptIntrinsic.cpp b/rsScriptIntrinsic.cpp
index 86f1c50..7461d34 100644
--- a/rsScriptIntrinsic.cpp
+++ b/rsScriptIntrinsic.cpp
@@ -55,18 +55,6 @@
return 0;
}
-
-void ScriptIntrinsic::runForEach(Context *rsc,
- uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc) {
-
- rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ain, aout, usr, usrBytes, sc);
-}
-
void ScriptIntrinsic::runForEach(Context* rsc,
uint32_t slot,
const Allocation** ains,
@@ -76,7 +64,18 @@
size_t usrBytes,
const RsScriptCall* sc) {
- rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen, aout, usr, usrBytes, sc);
+ if (rsc->mHal.funcs.script.invokeForEachMulti != NULL) {
+ rsc->mHal.funcs.script.invokeForEachMulti(rsc, this, slot, ains, inLen,
+ aout, usr, usrBytes, sc);
+
+ } else if (inLen == 1) {
+ rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ains[0], aout,
+ usr, usrBytes, sc);
+
+ } else {
+ rsc->setError(RS_ERROR_FATAL_DRIVER,
+ "Driver support for multi-input not present");
+ }
}
void ScriptIntrinsic::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) {
@@ -107,5 +106,3 @@
}
}
-
-
diff --git a/rsScriptIntrinsic.h b/rsScriptIntrinsic.h
index 66b6031..87b7353 100644
--- a/rsScriptIntrinsic.h
+++ b/rsScriptIntrinsic.h
@@ -40,17 +40,9 @@
virtual RsA3DClassID getClassId() const;
virtual bool freeChildren();
- virtual void runForEach(Context *rsc,
- uint32_t slot,
- const Allocation * ain,
- Allocation * aout,
- const void * usr,
- size_t usrBytes,
- const RsScriptCall *sc = NULL);
-
virtual void runForEach(Context* rsc,
uint32_t slot,
- const Allocation** ains,
+ const Allocation ** ains,
size_t inLen,
Allocation* aout,
const void* usr,
@@ -69,5 +61,3 @@
}
}
#endif
-
-
diff --git a/rsType.cpp b/rsType.cpp
index 31d6ce8..d009816 100644
--- a/rsType.cpp
+++ b/rsType.cpp
@@ -33,10 +33,14 @@
}
void Type::preDestroy() const {
- for (uint32_t ct = 0; ct < mRSC->mStateType.mTypes.size(); ct++) {
- if (mRSC->mStateType.mTypes[ct] == this) {
- mRSC->mStateType.mTypes.removeAt(ct);
- break;
+ auto &types = mRSC->mStateType.mTypes;
+
+ for (auto typeIter = types.begin(), endIter = types.end();
+ typeIter != endIter; typeIter++) {
+
+ if (this == *typeIter) {
+ types.erase(typeIter);
+ return;
}
}
}
@@ -265,7 +269,7 @@
nt->compute();
ObjectBase::asyncLock();
- stc->mTypes.push(nt);
+ stc->mTypes.push_back(nt);
ObjectBase::asyncUnlock();
return returnRef;
diff --git a/rsType.h b/rsType.h
index e44e270..86d6ece 100644
--- a/rsType.h
+++ b/rsType.h
@@ -146,7 +146,7 @@
~TypeState();
// Cache of all existing types.
- Vector<Type *> mTypes;
+ std::vector<Type *> mTypes;
};
diff --git a/rsg_generator.c b/rsg_generator.c
index d0f0b7c..2558f67 100644
--- a/rsg_generator.c
+++ b/rsg_generator.c
@@ -294,7 +294,9 @@
const VarType *vt = &api->params[ct2];
needFlush += vt->ptrLevel;
if (vt->ptrLevel && hasInlineDataPointers(api)) {
- fprintf(f, " if (dataSize < io->getMaxInlineSize()) {\n");
+ fprintf(f, " if (%s_length == 0) {\n", vt->name);
+ fprintf(f, " cmd->%s = NULL;\n", vt->name);
+ fprintf(f, " } else if (dataSize < io->getMaxInlineSize()) {\n");
fprintf(f, " memcpy(payload, %s, %s_length);\n", vt->name, vt->name);
fprintf(f, " cmd->%s = (", vt->name);
printVarType(f, vt);
@@ -489,7 +491,8 @@
needFlush += vt->ptrLevel;
if (hasInlineDataPointers(api) && vt->ptrLevel) {
- fprintf(f, ",\n (const %s *)&baseData[(intptr_t)cmd->%s]", vt->typeName, vt->name);
+ fprintf(f, ",\n cmd->%s_length == 0 ? NULL : (const %s *)&baseData[(intptr_t)cmd->%s]",
+ vt->name, vt->typeName, vt->name);
} else {
fprintf(f, ",\n cmd->%s", vt->name);
}