am 0687cec6: am cb341688: Merge "Correctly align mSendBuffer to contain doubles."
* commit '0687cec66d6fe9e7524685476aa6e096cde28c0c':
Correctly align mSendBuffer to contain doubles.
diff --git a/Android.mk b/Android.mk
index 0dde1a4..5676462 100644
--- a/Android.mk
+++ b/Android.mk
@@ -2,14 +2,12 @@
LOCAL_PATH:=$(call my-dir)
rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable
-ifeq ($(ARCH_ARM_HAVE_NEON), true)
- rs_base_CFLAGS += -DARCH_ARM_HAVE_NEON
-endif
ifeq ($(TARGET_BUILD_PDK), true)
rs_base_CFLAGS += -D__RS_PDK__
endif
include $(CLEAR_VARS)
+LOCAL_CLANG := true
LOCAL_MODULE := libRSDriver
LOCAL_SRC_FILES:= \
@@ -19,6 +17,9 @@
driver/rsdFrameBuffer.cpp \
driver/rsdFrameBufferObj.cpp \
driver/rsdGL.cpp \
+ driver/rsdIntrinsics.cpp \
+ driver/rsdIntrinsicConvolve3x3.cpp \
+ driver/rsdIntrinsicColorMatrix.cpp \
driver/rsdMesh.cpp \
driver/rsdMeshObj.cpp \
driver/rsdPath.cpp \
@@ -32,8 +33,14 @@
driver/rsdShaderCache.cpp \
driver/rsdVertexArray.cpp
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+ LOCAL_CFLAGS += -DARCH_ARM_HAVE_NEON
+ LOCAL_SRC_FILES+= \
+ driver/rsdIntrinsics_Convolve.S
+endif
+
LOCAL_SHARED_LIBRARIES += libcutils libutils libEGL libGLESv1_CM libGLESv2
-LOCAL_SHARED_LIBRARIES += libbcc libbcinfo libgui
+LOCAL_SHARED_LIBRARIES += libbcc libbcinfo libgui libsync
LOCAL_C_INCLUDES += frameworks/compile/libbcc/include
@@ -66,6 +73,7 @@
RSG_GENERATOR:=$(LOCAL_BUILT_MODULE)
include $(CLEAR_VARS)
+LOCAL_CLANG := true
LOCAL_MODULE := libRS
LOCAL_MODULE_CLASS := SHARED_LIBRARIES
@@ -135,13 +143,14 @@
rsScriptC.cpp \
rsScriptC_Lib.cpp \
rsScriptC_LibGL.cpp \
+ rsScriptIntrinsic.cpp \
rsSignal.cpp \
rsStream.cpp \
rsThreadIO.cpp \
rsType.cpp
LOCAL_SHARED_LIBRARIES += libcutils libutils libEGL libGLESv1_CM libGLESv2 libbcc
-LOCAL_SHARED_LIBRARIES += libui libbcinfo libgui
+LOCAL_SHARED_LIBRARIES += libui libbcinfo libgui libsync
LOCAL_STATIC_LIBRARIES := libft2 libRSDriver
@@ -226,6 +235,7 @@
rsScriptC.cpp \
rsScriptC_Lib.cpp \
rsScriptC_LibGL.cpp \
+ rsScriptIntrinsic.cpp \
rsSignal.cpp \
rsStream.cpp \
rsThreadIO.cpp \
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index 4eb5a46..8240b10 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -78,6 +78,17 @@
return 0;
}
+uint8_t *GetOffsetPtr(const android::renderscript::Allocation *alloc,
+ uint32_t xoff, uint32_t yoff, uint32_t lod,
+ RsAllocationCubemapFace face) {
+ DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
+ uint8_t *ptr = (uint8_t *)drv->lod[lod].mallocPtr;
+ ptr += face * drv->faceOffset;
+ ptr += yoff * drv->lod[lod].stride;
+ ptr += xoff * alloc->mHal.state.elementSizeBytes;
+ return ptr;
+}
+
static void Update2DTexture(const Context *rsc, const Allocation *alloc, const void *ptr,
uint32_t xoff, uint32_t yoff, uint32_t lod,
@@ -109,8 +120,7 @@
rsdGLCheckError(rsc, "Upload2DTexture 1 ");
for (uint32_t face = 0; face < faceCount; face ++) {
for (uint32_t lod = 0; lod < alloc->mHal.state.type->getLODCount(); lod++) {
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.mallocPtr;
- p += alloc->mHal.state.type->getLODFaceOffset(lod, (RsAllocationCubemapFace)face, 0, 0);
+ const uint8_t *p = GetOffsetPtr(alloc, 0, 0, lod, (RsAllocationCubemapFace)face);
GLenum t = GL_TEXTURE_2D;
if (alloc->mHal.state.hasFaces) {
@@ -151,7 +161,7 @@
return;
}
- if (!alloc->getPtr()) {
+ if (!drv->lod[0].mallocPtr) {
return;
}
@@ -165,9 +175,10 @@
Upload2DTexture(rsc, alloc, isFirstUpload);
if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_SCRIPT)) {
- if (alloc->mHal.drvState.mallocPtr) {
- free(alloc->mHal.drvState.mallocPtr);
- alloc->mHal.drvState.mallocPtr = NULL;
+ if (alloc->mHal.drvState.mallocPtrLOD0) {
+ free(alloc->mHal.drvState.mallocPtrLOD0);
+ alloc->mHal.drvState.mallocPtrLOD0 = NULL;
+ drv->lod[0].mallocPtr = NULL;
}
}
rsdGLCheckError(rsc, "UploadToTexture");
@@ -214,26 +225,85 @@
}
RSD_CALL_GL(glBindBuffer, drv->glTarget, drv->bufferID);
RSD_CALL_GL(glBufferData, drv->glTarget, alloc->mHal.state.type->getSizeBytes(),
- alloc->mHal.drvState.mallocPtr, GL_DYNAMIC_DRAW);
+ alloc->mHal.drvState.mallocPtrLOD0, GL_DYNAMIC_DRAW);
RSD_CALL_GL(glBindBuffer, drv->glTarget, 0);
rsdGLCheckError(rsc, "UploadToBufferObject");
}
+static size_t AllocationBuildPointerTable(const Context *rsc, const Allocation *alloc,
+ const Type *type, uint8_t *ptr) {
+
+ DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
+
+ drv->lod[0].dimX = type->getDimX();
+ drv->lod[0].dimY = type->getDimY();
+ drv->lod[0].mallocPtr = 0;
+ drv->lod[0].stride = drv->lod[0].dimX * type->getElementSizeBytes();
+ drv->lodCount = type->getLODCount();
+ drv->faceCount = type->getDimFaces();
+
+ size_t offsets[Allocation::MAX_LOD];
+ memset(offsets, 0, sizeof(offsets));
+
+ size_t o = drv->lod[0].stride * rsMax(drv->lod[0].dimY, 1u) * rsMax(drv->lod[0].dimZ, 1u);
+ if(drv->lodCount > 1) {
+ uint32_t tx = drv->lod[0].dimX;
+ uint32_t ty = drv->lod[0].dimY;
+ uint32_t tz = drv->lod[0].dimZ;
+ for (uint32_t lod=1; lod < drv->lodCount; lod++) {
+ drv->lod[lod].dimX = tx;
+ drv->lod[lod].dimY = ty;
+ drv->lod[lod].dimZ = tz;
+ drv->lod[lod].stride = tx * type->getElementSizeBytes();
+ offsets[lod] = o;
+ o += drv->lod[lod].stride * rsMax(ty, 1u) * rsMax(tz, 1u);
+ if (tx > 1) tx >>= 1;
+ if (ty > 1) ty >>= 1;
+ if (tz > 1) tz >>= 1;
+ }
+ }
+ drv->faceOffset = o;
+
+ drv->lod[0].mallocPtr = ptr;
+ for (uint32_t lod=1; lod < drv->lodCount; lod++) {
+ drv->lod[lod].mallocPtr = ptr + offsets[lod];
+ }
+ alloc->mHal.drvState.strideLOD0 = drv->lod[0].stride;
+ alloc->mHal.drvState.mallocPtrLOD0 = ptr;
+
+ size_t allocSize = drv->faceOffset;
+ if(drv->faceCount) {
+ allocSize *= 6;
+ }
+
+ return allocSize;
+}
+
bool rsdAllocationInit(const Context *rsc, Allocation *alloc, bool forceZero) {
DrvAllocation *drv = (DrvAllocation *)calloc(1, sizeof(DrvAllocation));
if (!drv) {
return false;
}
+ alloc->mHal.drv = drv;
- void * ptr = NULL;
+ // Calculate the object size.
+ size_t allocSize = AllocationBuildPointerTable(rsc, alloc, alloc->getType(), NULL);
+
+ uint8_t * ptr = NULL;
if (alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_IO_OUTPUT) {
} else {
- ptr = malloc(alloc->mHal.state.type->getSizeBytes());
+
+ ptr = (uint8_t *)malloc(allocSize);
if (!ptr) {
free(drv);
return false;
}
}
+ // Build the pointer tables
+ size_t verifySize = AllocationBuildPointerTable(rsc, alloc, alloc->getType(), ptr);
+ if(allocSize != verifySize) {
+ rsAssert(!"Size mismatch");
+ }
drv->glTarget = GL_NONE;
if (alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE) {
@@ -251,10 +321,6 @@
drv->glType = rsdTypeToGLType(alloc->mHal.state.type->getElement()->getComponent().getType());
drv->glFormat = rsdKindToGLFormat(alloc->mHal.state.type->getElement()->getComponent().getKind());
-
- alloc->mHal.drvState.mallocPtr = ptr;
- alloc->mHal.drvState.stride = alloc->mHal.state.dimensionX * alloc->mHal.state.elementSizeBytes;
- alloc->mHal.drv = drv;
if (forceZero && ptr) {
memset(ptr, 0, alloc->mHal.state.type->getSizeBytes());
}
@@ -263,8 +329,6 @@
drv->uploadDeferred = true;
}
- drv->width = alloc->getType()->getDimX();
- drv->height = alloc->getType()->getDimY();
drv->readBackFBO = NULL;
@@ -289,9 +353,9 @@
drv->renderTargetID = 0;
}
- if (alloc->mHal.drvState.mallocPtr) {
- free(alloc->mHal.drvState.mallocPtr);
- alloc->mHal.drvState.mallocPtr = NULL;
+ if (alloc->mHal.drvState.mallocPtrLOD0) {
+ free(alloc->mHal.drvState.mallocPtrLOD0);
+ alloc->mHal.drvState.mallocPtrLOD0 = NULL;
}
if (drv->readBackFBO != NULL) {
delete drv->readBackFBO;
@@ -305,16 +369,22 @@
const Type *newType, bool zeroNew) {
DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
- alloc->mHal.drvState.mallocPtr = (uint8_t *)realloc(
- alloc->mHal.drvState.mallocPtr, newType->getSizeBytes());
+ void * oldPtr = drv->lod[0].mallocPtr;
+ // Calculate the object size
+ size_t s = AllocationBuildPointerTable(rsc, alloc, newType, NULL);
+ uint8_t *ptr = (uint8_t *)realloc(oldPtr, s);
+ // Build the relative pointer tables.
+ size_t verifySize = AllocationBuildPointerTable(rsc, alloc, newType, ptr);
+ if(s != verifySize) {
+ rsAssert(!"Size mismatch");
+ }
const uint32_t oldDimX = alloc->mHal.state.dimensionX;
const uint32_t dimX = newType->getDimX();
if (dimX > oldDimX) {
- const Element *e = alloc->mHal.state.type->getElement();
- uint32_t stride = e->getSizeBytes();
- memset(((uint8_t *)alloc->mHal.drvState.mallocPtr) + stride * oldDimX,
+ uint32_t stride = alloc->mHal.state.elementSizeBytes;
+ memset(((uint8_t *)alloc->mHal.drvState.mallocPtrLOD0) + stride * oldDimX,
0, stride * (dimX - oldDimX));
}
}
@@ -342,8 +412,8 @@
drv->readBackFBO->setActive(rsc);
// Do the readback
- RSD_CALL_GL(glReadPixels, 0, 0, alloc->getType()->getDimX(), alloc->getType()->getDimY(),
- drv->glFormat, drv->glType, alloc->getPtr());
+ RSD_CALL_GL(glReadPixels, 0, 0, drv->lod[0].dimX, drv->lod[0].dimY,
+ drv->glFormat, drv->glType, drv->lod[0].mallocPtr);
// Revert framebuffer to its original
lastFbo->setActive(rsc);
@@ -399,19 +469,12 @@
static bool IoGetBuffer(const Context *rsc, Allocation *alloc, ANativeWindow *nw) {
DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
- int32_t r = nw->dequeueBuffer(nw, &drv->wndBuffer);
+ int32_t r = native_window_dequeue_buffer_and_wait(nw, &drv->wndBuffer);
if (r) {
rsc->setError(RS_ERROR_DRIVER, "Error getting next IO output buffer.");
return false;
}
- // This lock is implicitly released by the queue buffer in IoSend
- r = nw->lockBuffer(nw, drv->wndBuffer);
- if (r) {
- rsc->setError(RS_ERROR_DRIVER, "Error locking next IO output buffer.");
- return false;
- }
-
// Must lock the whole surface
GraphicBufferMapper &mapper = GraphicBufferMapper::get();
Rect bounds(drv->wndBuffer->width, drv->wndBuffer->height);
@@ -420,8 +483,9 @@
mapper.lock(drv->wndBuffer->handle,
GRALLOC_USAGE_SW_READ_NEVER | GRALLOC_USAGE_SW_WRITE_OFTEN,
bounds, &dst);
- alloc->mHal.drvState.mallocPtr = dst;
- alloc->mHal.drvState.stride = drv->wndBuffer->stride * alloc->mHal.state.elementSizeBytes;
+ drv->lod[0].mallocPtr = dst;
+ alloc->mHal.drvState.mallocPtrLOD0 = dst;
+ drv->lod[0].stride = drv->wndBuffer->stride * alloc->mHal.state.elementSizeBytes;
return true;
}
@@ -443,7 +507,7 @@
ANativeWindow *old = alloc->mHal.state.wndSurface;
GraphicBufferMapper &mapper = GraphicBufferMapper::get();
mapper.unlock(drv->wndBuffer->handle);
- old->queueBuffer(old, drv->wndBuffer);
+ old->queueBuffer(old, drv->wndBuffer, -1);
}
if (nw != NULL) {
@@ -492,7 +556,7 @@
if (alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_SCRIPT) {
GraphicBufferMapper &mapper = GraphicBufferMapper::get();
mapper.unlock(drv->wndBuffer->handle);
- int32_t r = nw->queueBuffer(nw, drv->wndBuffer);
+ int32_t r = nw->queueBuffer(nw, drv->wndBuffer, -1);
if (r) {
rsc->setError(RS_ERROR_DRIVER, "Error sending IO output buffer.");
return;
@@ -514,8 +578,7 @@
DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
const uint32_t eSize = alloc->mHal.state.type->getElementSizeBytes();
- uint8_t * ptr = (uint8_t *)alloc->mHal.drvState.mallocPtr;
- ptr += eSize * xoff;
+ uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
uint32_t size = count * eSize;
if (alloc->mHal.state.hasReferences) {
@@ -535,10 +598,9 @@
uint32_t eSize = alloc->mHal.state.elementSizeBytes;
uint32_t lineSize = eSize * w;
- if (alloc->mHal.drvState.mallocPtr) {
+ if (drv->lod[0].mallocPtr) {
const uint8_t *src = static_cast<const uint8_t *>(data);
- uint8_t *dst = (uint8_t *)alloc->mHal.drvState.mallocPtr;
- dst += alloc->mHal.state.type->getLODFaceOffset(lod, face, xoff, yoff);
+ uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, lod, face);
for (uint32_t line=yoff; line < (yoff+h); line++) {
if (alloc->mHal.state.hasReferences) {
@@ -547,7 +609,7 @@
}
memcpy(dst, src, lineSize);
src += lineSize;
- dst += alloc->mHal.drvState.stride;
+ dst += drv->lod[lod].stride;
}
drv->uploadDeferred = true;
} else {
@@ -562,6 +624,56 @@
}
+void rsdAllocationRead1D(const Context *rsc, const Allocation *alloc,
+ uint32_t xoff, uint32_t lod, uint32_t count,
+ void *data, size_t sizeBytes) {
+ DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
+
+ const uint32_t eSize = alloc->mHal.state.type->getElementSizeBytes();
+ const uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
+ memcpy(data, ptr, count * eSize);
+}
+
+void rsdAllocationRead2D(const Context *rsc, const Allocation *alloc,
+ uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, void *data, size_t sizeBytes) {
+ DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
+
+ uint32_t eSize = alloc->mHal.state.elementSizeBytes;
+ uint32_t lineSize = eSize * w;
+
+ if (drv->lod[0].mallocPtr) {
+ uint8_t *dst = static_cast<uint8_t *>(data);
+ const uint8_t *src = GetOffsetPtr(alloc, xoff, yoff, lod, face);
+
+ for (uint32_t line=yoff; line < (yoff+h); line++) {
+ memcpy(dst, src, lineSize);
+ dst += lineSize;
+ src += drv->lod[lod].stride;
+ }
+ } else {
+ ALOGE("Add code to readback from non-script memory");
+ }
+}
+
+void rsdAllocationRead3D(const Context *rsc, const Allocation *alloc,
+ uint32_t xoff, uint32_t yoff, uint32_t zoff,
+ uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, uint32_t d, void *data, uint32_t sizeBytes) {
+
+}
+
+void * rsdAllocationLock1D(const android::renderscript::Context *rsc,
+ const android::renderscript::Allocation *alloc) {
+ DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
+ return drv->lod[0].mallocPtr;
+}
+
+void rsdAllocationUnlock1D(const android::renderscript::Context *rsc,
+ const android::renderscript::Allocation *alloc) {
+
+}
+
void rsdAllocationData1D_alloc(const android::renderscript::Context *rsc,
const android::renderscript::Allocation *dstAlloc,
uint32_t dstXoff, uint32_t dstLod, uint32_t count,
@@ -569,20 +681,6 @@
uint32_t srcXoff, uint32_t srcLod) {
}
-uint8_t *getOffsetPtr(const android::renderscript::Allocation *alloc,
- uint32_t xoff, uint32_t yoff, uint32_t lod,
- RsAllocationCubemapFace face) {
- uint8_t *ptr = static_cast<uint8_t *>(alloc->getPtr());
- ptr += alloc->getType()->getLODOffset(lod, xoff, yoff);
-
- if (face != 0) {
- uint32_t totalSizeBytes = alloc->getType()->getSizeBytes();
- uint32_t faceOffset = totalSizeBytes / 6;
- ptr += faceOffset * (uint32_t)face;
- }
- return ptr;
-}
-
void rsdAllocationData2D_alloc_script(const android::renderscript::Context *rsc,
const android::renderscript::Allocation *dstAlloc,
@@ -593,8 +691,8 @@
RsAllocationCubemapFace srcFace) {
uint32_t elementSize = dstAlloc->getType()->getElementSizeBytes();
for (uint32_t i = 0; i < h; i ++) {
- uint8_t *dstPtr = getOffsetPtr(dstAlloc, dstXoff, dstYoff + i, dstLod, dstFace);
- uint8_t *srcPtr = getOffsetPtr(srcAlloc, srcXoff, srcYoff + i, srcLod, srcFace);
+ uint8_t *dstPtr = GetOffsetPtr(dstAlloc, dstXoff, dstYoff + i, dstLod, dstFace);
+ uint8_t *srcPtr = GetOffsetPtr(srcAlloc, srcXoff, srcYoff + i, srcLod, srcFace);
memcpy(dstPtr, srcPtr, w * elementSize);
//ALOGE("COPIED dstXoff(%u), dstYoff(%u), dstLod(%u), dstFace(%u), w(%u), h(%u), srcXoff(%u), srcYoff(%u), srcLod(%u), srcFace(%u)",
@@ -635,8 +733,7 @@
DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
uint32_t eSize = alloc->mHal.state.elementSizeBytes;
- uint8_t * ptr = (uint8_t *)alloc->mHal.drvState.mallocPtr;
- ptr += eSize * x;
+ uint8_t * ptr = GetOffsetPtr(alloc, x, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
const Element * e = alloc->mHal.state.type->getElement()->getField(cIdx);
ptr += alloc->mHal.state.type->getElement()->getFieldOffsetBytes(cIdx);
@@ -656,8 +753,7 @@
DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
uint32_t eSize = alloc->mHal.state.elementSizeBytes;
- uint8_t * ptr = (uint8_t *)alloc->mHal.drvState.mallocPtr;
- ptr += (eSize * x) + (y * alloc->mHal.drvState.stride);
+ uint8_t * ptr = GetOffsetPtr(alloc, x, y, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
const Element * e = alloc->mHal.state.type->getElement()->getField(cIdx);
ptr += alloc->mHal.state.type->getElement()->getFieldOffsetBytes(cIdx);
@@ -671,4 +767,84 @@
drv->uploadDeferred = true;
}
+static void mip565(const Allocation *alloc, int lod, RsAllocationCubemapFace face) {
+ DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
+ uint32_t w = drv->lod[lod + 1].dimX;
+ uint32_t h = drv->lod[lod + 1].dimY;
+
+ for (uint32_t y=0; y < h; y++) {
+ uint16_t *oPtr = (uint16_t *)GetOffsetPtr(alloc, 0, y, lod + 1, face);
+ const uint16_t *i1 = (uint16_t *)GetOffsetPtr(alloc, 0, y*2, lod, face);
+ const uint16_t *i2 = (uint16_t *)GetOffsetPtr(alloc, 0, y*2+1, lod, face);
+
+ for (uint32_t x=0; x < w; x++) {
+ *oPtr = rsBoxFilter565(i1[0], i1[1], i2[0], i2[1]);
+ oPtr ++;
+ i1 += 2;
+ i2 += 2;
+ }
+ }
+}
+
+static void mip8888(const Allocation *alloc, int lod, RsAllocationCubemapFace face) {
+ DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
+ uint32_t w = drv->lod[lod + 1].dimX;
+ uint32_t h = drv->lod[lod + 1].dimY;
+
+ for (uint32_t y=0; y < h; y++) {
+ uint32_t *oPtr = (uint32_t *)GetOffsetPtr(alloc, 0, y, lod + 1, face);
+ const uint32_t *i1 = (uint32_t *)GetOffsetPtr(alloc, 0, y*2, lod, face);
+ const uint32_t *i2 = (uint32_t *)GetOffsetPtr(alloc, 0, y*2+1, lod, face);
+
+ for (uint32_t x=0; x < w; x++) {
+ *oPtr = rsBoxFilter8888(i1[0], i1[1], i2[0], i2[1]);
+ oPtr ++;
+ i1 += 2;
+ i2 += 2;
+ }
+ }
+}
+
+static void mip8(const Allocation *alloc, int lod, RsAllocationCubemapFace face) {
+ DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
+ uint32_t w = drv->lod[lod + 1].dimX;
+ uint32_t h = drv->lod[lod + 1].dimY;
+
+ for (uint32_t y=0; y < h; y++) {
+ uint8_t *oPtr = GetOffsetPtr(alloc, 0, y, lod + 1, face);
+ const uint8_t *i1 = GetOffsetPtr(alloc, 0, y*2, lod, face);
+ const uint8_t *i2 = GetOffsetPtr(alloc, 0, y*2+1, lod, face);
+
+ for (uint32_t x=0; x < w; x++) {
+ *oPtr = (uint8_t)(((uint32_t)i1[0] + i1[1] + i2[0] + i2[1]) * 0.25f);
+ oPtr ++;
+ i1 += 2;
+ i2 += 2;
+ }
+ }
+}
+
+void rsdAllocationGenerateMipmaps(const Context *rsc, const Allocation *alloc) {
+ DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv;
+ if(!drv->lod[0].mallocPtr) {
+ return;
+ }
+ uint32_t numFaces = alloc->getType()->getDimFaces() ? 6 : 1;
+ for (uint32_t face = 0; face < numFaces; face ++) {
+ for (uint32_t lod=0; lod < (alloc->getType()->getLODCount() -1); lod++) {
+ switch (alloc->getType()->getElement()->getSizeBits()) {
+ case 32:
+ mip8888(alloc, lod, (RsAllocationCubemapFace)face);
+ break;
+ case 16:
+ mip565(alloc, lod, (RsAllocationCubemapFace)face);
+ break;
+ case 8:
+ mip8(alloc, lod, (RsAllocationCubemapFace)face);
+ break;
+ }
+ }
+ }
+}
+
diff --git a/driver/rsdAllocation.h b/driver/rsdAllocation.h
index 0b196a1..e6488b9 100644
--- a/driver/rsdAllocation.h
+++ b/driver/rsdAllocation.h
@@ -19,6 +19,7 @@
#include <rs_hal.h>
#include <rsRuntime.h>
+#include <rsAllocation.h>
#include <GLES/gl.h>
#include <GLES2/gl2.h>
@@ -39,9 +40,6 @@
// Is this a legal structure to be used as an FBO render target
uint32_t renderTargetID;
- uint32_t width;
- uint32_t height;
-
GLenum glTarget;
GLenum glType;
GLenum glFormat;
@@ -51,6 +49,19 @@
RsdFrameBufferObj * readBackFBO;
ANativeWindow *wnd;
ANativeWindowBuffer *wndBuffer;
+
+ struct LodState {
+ void * mallocPtr;
+ size_t stride;
+ uint32_t dimX;
+ uint32_t dimY;
+ uint32_t dimZ;
+ } lod[android::renderscript::Allocation::MAX_LOD];
+ size_t faceOffset;
+ uint32_t lodCount;
+ uint32_t faceCount;
+
+
};
GLenum rsdTypeToGLType(RsDataType t);
@@ -95,6 +106,27 @@
uint32_t lod, RsAllocationCubemapFace face,
uint32_t w, uint32_t h, uint32_t d, const void *data, uint32_t sizeBytes);
+void rsdAllocationRead1D(const android::renderscript::Context *rsc,
+ const android::renderscript::Allocation *alloc,
+ uint32_t xoff, uint32_t lod, uint32_t count,
+ void *data, uint32_t sizeBytes);
+void rsdAllocationRead2D(const android::renderscript::Context *rsc,
+ const android::renderscript::Allocation *alloc,
+ uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h,
+ void *data, uint32_t sizeBytes);
+void rsdAllocationRead3D(const android::renderscript::Context *rsc,
+ const android::renderscript::Allocation *alloc,
+ uint32_t xoff, uint32_t yoff, uint32_t zoff,
+ uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, uint32_t d, void *data, uint32_t sizeBytes);
+
+void * rsdAllocationLock1D(const android::renderscript::Context *rsc,
+ const android::renderscript::Allocation *alloc);
+void rsdAllocationUnlock1D(const android::renderscript::Context *rsc,
+ const android::renderscript::Allocation *alloc);
+
+
void rsdAllocationData1D_alloc(const android::renderscript::Context *rsc,
const android::renderscript::Allocation *dstAlloc,
uint32_t dstXoff, uint32_t dstLod, uint32_t count,
@@ -125,6 +157,8 @@
uint32_t x, uint32_t y,
const void *data, uint32_t elementOff, uint32_t sizeBytes);
+void rsdAllocationGenerateMipmaps(const android::renderscript::Context *rsc,
+ const android::renderscript::Allocation *alloc);
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index 457a99d..35d6f3b 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -17,13 +17,19 @@
#include "rsdCore.h"
#include "rsdBcc.h"
#include "rsdRuntime.h"
+#include "rsdAllocation.h"
+#include "rsdIntrinsics.h"
-#include <bcinfo/MetadataExtractor.h>
+#include <bcc/BCCContext.h>
+#include <bcc/Renderscript/RSCompilerDriver.h>
+#include <bcc/Renderscript/RSExecutable.h>
+#include <bcc/Renderscript/RSInfo.h>
#include "rsContext.h"
#include "rsElement.h"
#include "rsScriptC.h"
+#include "utils/Vector.h"
#include "utils/Timers.h"
#include "utils/StopWatch.h"
@@ -31,23 +37,19 @@
using namespace android::renderscript;
struct DrvScript {
+ RsScriptIntrinsicID mIntrinsicID;
int (*mRoot)();
int (*mRootExpand)();
void (*mInit)();
void (*mFreeChildren)();
- BCCScriptRef mBccScript;
+ bcc::BCCContext *mCompilerContext;
+ bcc::RSCompilerDriver *mCompilerDriver;
+ bcc::RSExecutable *mExecutable;
- bcinfo::MetadataExtractor *ME;
-
- InvokeFunc_t *mInvokeFunctions;
- ForEachFunc_t *mForEachFunctions;
- void ** mFieldAddress;
- bool * mFieldIsObject;
- const uint32_t *mExportForEachSignatureList;
-
- const uint8_t * mScriptText;
- uint32_t mScriptTextLength;
+ Allocation **mBoundAllocs;
+ RsdIntriniscFuncs_t mIntrinsicFuncs;
+ void * mIntrinsicData;
};
typedef void (*outer_foreach_t)(
@@ -72,127 +74,81 @@
size_t bitcodeSize,
uint32_t flags) {
//ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
+ //ALOGE("rsdScriptInit %p %p", rsc, script);
pthread_mutex_lock(&rsdgInitMutex);
- size_t exportFuncCount = 0;
- size_t exportVarCount = 0;
- size_t objectSlotCount = 0;
- size_t exportForEachSignatureCount = 0;
-
- const char* coreLib = "/system/lib/libclcore.bc";
- bcinfo::RSFloatPrecision prec;
-
+ bcc::RSExecutable *exec;
+ const bcc::RSInfo *info;
DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
if (drv == NULL) {
goto error;
}
script->mHal.drv = drv;
- drv->mBccScript = bccCreateScript();
+ drv->mCompilerContext = NULL;
+ drv->mCompilerDriver = NULL;
+ drv->mExecutable = NULL;
+
+ drv->mCompilerContext = new bcc::BCCContext();
+ if (drv->mCompilerContext == NULL) {
+ ALOGE("bcc: FAILS to create compiler context (out of memory)");
+ goto error;
+ }
+
+ drv->mCompilerDriver = new bcc::RSCompilerDriver();
+ if (drv->mCompilerDriver == NULL) {
+ ALOGE("bcc: FAILS to create compiler driver (out of memory)");
+ goto error;
+ }
+
script->mHal.info.isThreadable = true;
- drv->mScriptText = bitcode;
- drv->mScriptTextLength = bitcodeSize;
+ drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub);
+ drv->mCompilerDriver->setRSRuntimeLookupContext(script);
- drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText,
- drv->mScriptTextLength);
- if (!drv->ME->extract()) {
- ALOGE("bcinfo: failed to read script metadata");
- goto error;
- }
+ exec = drv->mCompilerDriver->build(*drv->mCompilerContext,
+ cacheDir, resName,
+ (const char *)bitcode, bitcodeSize);
- //ALOGE("mBccScript %p", script->mBccScript);
-
- if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
- ALOGE("bcc: FAILS to register symbol callback");
+ if (exec == NULL) {
+ ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
goto error;
}
- if (bccReadBC(drv->mBccScript,
- resName,
- (char const *)drv->mScriptText,
- drv->mScriptTextLength, 0) != 0) {
- ALOGE("bcc: FAILS to read bitcode");
- goto error;
+ drv->mExecutable = exec;
+
+ exec->setThreadable(script->mHal.info.isThreadable);
+ if (!exec->syncInfo()) {
+ ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
}
- // NEON-capable devices can use an accelerated math library for all
- // reduced precision scripts.
-#if defined(ARCH_ARM_HAVE_NEON)
- prec = drv->ME->getRSFloatPrecision();
- if (prec != bcinfo::RS_FP_Full) {
- coreLib = "/system/lib/libclcore_neon.bc";
- }
-#endif
+ drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
+ drv->mRootExpand =
+ reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
+ drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
+ drv->mFreeChildren =
+ reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
- if (bccLinkFile(drv->mBccScript, coreLib, 0) != 0) {
- ALOGE("bcc: FAILS to link bitcode");
- goto error;
- }
-
- if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) {
- ALOGE("bcc: FAILS to prepare executable");
- goto error;
- }
-
- drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
- drv->mRootExpand = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root.expand"));
- drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
- drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor"));
-
- exportFuncCount = drv->ME->getExportFuncCount();
- if (exportFuncCount > 0) {
- drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount,
- sizeof(InvokeFunc_t));
- bccGetExportFuncList(drv->mBccScript, exportFuncCount,
- (void **) drv->mInvokeFunctions);
- } else {
- drv->mInvokeFunctions = NULL;
- }
-
- exportVarCount = drv->ME->getExportVarCount();
- if (exportVarCount > 0) {
- drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*));
- drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool));
- bccGetExportVarList(drv->mBccScript, exportVarCount,
- (void **) drv->mFieldAddress);
- } else {
- drv->mFieldAddress = NULL;
- drv->mFieldIsObject = NULL;
- }
-
- objectSlotCount = drv->ME->getObjectSlotCount();
- if (objectSlotCount > 0) {
- const uint32_t *objectSlotList = drv->ME->getObjectSlotList();
- for (uint32_t ct=0; ct < objectSlotCount; ct++) {
- drv->mFieldIsObject[objectSlotList[ct]] = true;
- }
- }
-
- exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount();
- drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList();
- if (exportForEachSignatureCount > 0) {
- drv->mForEachFunctions =
- (ForEachFunc_t*) calloc(exportForEachSignatureCount,
- sizeof(ForEachFunc_t));
- bccGetExportForEachList(drv->mBccScript, exportForEachSignatureCount,
- (void **) drv->mForEachFunctions);
- } else {
- drv->mForEachFunctions = NULL;
- }
-
+ info = &drv->mExecutable->getInfo();
// Copy info over to runtime
- script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
- script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
- script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount();
- script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList();
- script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList();
+ script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
+ script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
+ script->mHal.info.exportedPragmaCount = info->getPragmas().size();
+ script->mHal.info.exportedPragmaKeyList =
+ const_cast<const char**>(exec->getPragmaKeys().array());
+ script->mHal.info.exportedPragmaValueList =
+ const_cast<const char**>(exec->getPragmaValues().array());
if (drv->mRootExpand) {
- script->mHal.info.root = drv->mRootExpand;
+ script->mHal.info.root = drv->mRootExpand;
} else {
- script->mHal.info.root = drv->mRoot;
+ script->mHal.info.root = drv->mRoot;
+ }
+
+ if (script->mHal.info.exportedVariableCount) {
+ drv->mBoundAllocs = new Allocation *[script->mHal.info.exportedVariableCount];
+ memset(drv->mBoundAllocs, 0, sizeof(void *) * script->mHal.info.exportedVariableCount);
}
pthread_mutex_unlock(&rsdgInitMutex);
@@ -201,36 +157,50 @@
error:
pthread_mutex_unlock(&rsdgInitMutex);
- if (drv->ME) {
- delete drv->ME;
- drv->ME = NULL;
+ if (drv) {
+ delete drv->mCompilerContext;
+ delete drv->mCompilerDriver;
+ delete drv->mExecutable;
+ delete[] drv->mBoundAllocs;
+ free(drv);
}
- free(drv);
+ script->mHal.drv = NULL;
return false;
}
+bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) {
+ pthread_mutex_lock(&rsdgInitMutex);
+
+ DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
+ if (drv == NULL) {
+ goto error;
+ }
+ s->mHal.drv = drv;
+ drv->mIntrinsicID = iid;
+ drv->mIntrinsicData = rsdIntrinsic_Init(rsc, s, iid, &drv->mIntrinsicFuncs);
+
+ pthread_mutex_unlock(&rsdgInitMutex);
+ return true;
+
+error:
+ pthread_mutex_unlock(&rsdgInitMutex);
+ return false;
+}
+
typedef struct {
+ RsForEachStubParamStruct fep;
+
Context *rsc;
Script *script;
ForEachFunc_t kernel;
uint32_t sig;
const Allocation * ain;
Allocation * aout;
- const void * usr;
- size_t usrLen;
uint32_t mSliceSize;
volatile int mSliceNum;
- const uint8_t *ptrIn;
- uint32_t eStrideIn;
- uint8_t *ptrOut;
- uint32_t eStrideOut;
-
- uint32_t yStrideIn;
- uint32_t yStrideOut;
-
uint32_t xStart;
uint32_t xEnd;
uint32_t yStart;
@@ -239,20 +209,13 @@
uint32_t zEnd;
uint32_t arrayStart;
uint32_t arrayEnd;
-
- uint32_t dimX;
- uint32_t dimY;
- uint32_t dimZ;
- uint32_t dimArray;
} MTLaunchStruct;
typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
static void wc_xy(void *usr, uint32_t idx) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
RsForEachStubParamStruct p;
- memset(&p, 0, sizeof(p));
- p.usr = mtls->usr;
- p.usr_len = mtls->usrLen;
+ memcpy(&p, &mtls->fep, sizeof(p));
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
uint32_t sig = mtls->sig;
@@ -269,9 +232,9 @@
//ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
//ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
for (p.y = yStart; p.y < yEnd; p.y++) {
- p.out = mtls->ptrOut + (mtls->yStrideOut * p.y);
- p.in = mtls->ptrIn + (mtls->yStrideIn * p.y);
- fn(&p, mtls->xStart, mtls->xEnd, mtls->eStrideIn, mtls->eStrideOut);
+ p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y);
+ p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y);
+ fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
}
}
}
@@ -279,9 +242,7 @@
static void wc_x(void *usr, uint32_t idx) {
MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
RsForEachStubParamStruct p;
- memset(&p, 0, sizeof(p));
- p.usr = mtls->usr;
- p.usr_len = mtls->usrLen;
+ memcpy(&p, &mtls->fep, sizeof(p));
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
uint32_t sig = mtls->sig;
@@ -298,9 +259,9 @@
//ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd);
//ALOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
- p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
- p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
- fn(&p, xStart, xEnd, mtls->eStrideIn, mtls->eStrideOut);
+ p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
+ p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
+ fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
}
}
@@ -318,22 +279,30 @@
MTLaunchStruct mtls;
memset(&mtls, 0, sizeof(mtls));
+ //ALOGE("for each script %p in %p out %p", s, ain, aout);
+
DrvScript *drv = (DrvScript *)s->mHal.drv;
- mtls.kernel = drv->mForEachFunctions[slot];
- rsAssert(mtls.kernel != NULL);
- mtls.sig = 0x1f; // temp fix for old apps, full table in slang_rs_export_foreach.cpp
- if (drv->mExportForEachSignatureList) {
- mtls.sig = drv->mExportForEachSignatureList[slot];
+
+ if (drv->mIntrinsicID) {
+ mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root;
+ usr = drv->mIntrinsicData;
+ } else {
+ rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size());
+ mtls.kernel = reinterpret_cast<ForEachFunc_t>(
+ drv->mExecutable->getExportForeachFuncAddrs()[slot]);
+ rsAssert(mtls.kernel != NULL);
+ mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
}
+
if (ain) {
- mtls.dimX = ain->getType()->getDimX();
- mtls.dimY = ain->getType()->getDimY();
- mtls.dimZ = ain->getType()->getDimZ();
+ mtls.fep.dimX = ain->getType()->getDimX();
+ mtls.fep.dimY = ain->getType()->getDimY();
+ mtls.fep.dimZ = ain->getType()->getDimZ();
//mtls.dimArray = ain->getType()->getDimArray();
} else if (aout) {
- mtls.dimX = aout->getType()->getDimX();
- mtls.dimY = aout->getType()->getDimY();
- mtls.dimZ = aout->getType()->getDimZ();
+ mtls.fep.dimX = aout->getType()->getDimX();
+ mtls.fep.dimY = aout->getType()->getDimY();
+ mtls.fep.dimZ = aout->getType()->getDimZ();
//mtls.dimArray = aout->getType()->getDimArray();
} else {
rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
@@ -341,24 +310,24 @@
}
if (!sc || (sc->xEnd == 0)) {
- mtls.xEnd = mtls.dimX;
+ mtls.xEnd = mtls.fep.dimX;
} else {
- rsAssert(sc->xStart < mtls.dimX);
- rsAssert(sc->xEnd <= mtls.dimX);
+ rsAssert(sc->xStart < mtls.fep.dimX);
+ rsAssert(sc->xEnd <= mtls.fep.dimX);
rsAssert(sc->xStart < sc->xEnd);
- mtls.xStart = rsMin(mtls.dimX, sc->xStart);
- mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
+ mtls.xStart = rsMin(mtls.fep.dimX, sc->xStart);
+ mtls.xEnd = rsMin(mtls.fep.dimX, sc->xEnd);
if (mtls.xStart >= mtls.xEnd) return;
}
if (!sc || (sc->yEnd == 0)) {
- mtls.yEnd = mtls.dimY;
+ mtls.yEnd = mtls.fep.dimY;
} else {
- rsAssert(sc->yStart < mtls.dimY);
- rsAssert(sc->yEnd <= mtls.dimY);
+ rsAssert(sc->yStart < mtls.fep.dimY);
+ rsAssert(sc->yEnd <= mtls.fep.dimY);
rsAssert(sc->yStart < sc->yEnd);
- mtls.yStart = rsMin(mtls.dimY, sc->yStart);
- mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
+ mtls.yStart = rsMin(mtls.fep.dimY, sc->yStart);
+ mtls.yEnd = rsMin(mtls.fep.dimY, sc->yEnd);
if (mtls.yStart >= mtls.yEnd) return;
}
@@ -376,32 +345,45 @@
mtls.ain = ain;
mtls.aout = aout;
mtls.script = s;
- mtls.usr = usr;
- mtls.usrLen = usrLen;
+ mtls.fep.usr = usr;
+ mtls.fep.usrLen = usrLen;
mtls.mSliceSize = 10;
mtls.mSliceNum = 0;
- mtls.ptrIn = NULL;
- mtls.eStrideIn = 0;
+ mtls.fep.ptrIn = NULL;
+ mtls.fep.eStrideIn = 0;
if (ain) {
- mtls.ptrIn = (const uint8_t *)ain->getPtr();
- mtls.eStrideIn = ain->getType()->getElementSizeBytes();
- mtls.yStrideIn = ain->mHal.drvState.stride;
+ DrvAllocation *aindrv = (DrvAllocation *)ain->mHal.drv;
+ mtls.fep.ptrIn = (const uint8_t *)aindrv->lod[0].mallocPtr;
+ mtls.fep.eStrideIn = ain->getType()->getElementSizeBytes();
+ mtls.fep.yStrideIn = aindrv->lod[0].stride;
}
- mtls.ptrOut = NULL;
- mtls.eStrideOut = 0;
+ mtls.fep.ptrOut = NULL;
+ mtls.fep.eStrideOut = 0;
if (aout) {
- mtls.ptrOut = (uint8_t *)aout->getPtr();
- mtls.eStrideOut = aout->getType()->getElementSizeBytes();
- mtls.yStrideOut = aout->mHal.drvState.stride;
+ DrvAllocation *aoutdrv = (DrvAllocation *)aout->mHal.drv;
+ mtls.fep.ptrOut = (uint8_t *)aoutdrv->lod[0].mallocPtr;
+ mtls.fep.eStrideOut = aout->getType()->getElementSizeBytes();
+ mtls.fep.yStrideOut = aoutdrv->lod[0].stride;
}
+
if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable && !dc->mInForEach) {
dc->mInForEach = true;
- if (mtls.dimY > 1) {
+ if (mtls.fep.dimY > 1) {
+ mtls.mSliceSize = mtls.fep.dimY / (dc->mWorkers.mCount * 4);
+ if(mtls.mSliceSize < 1) {
+ mtls.mSliceSize = 1;
+ }
+
rsdLaunchThreads(mrsc, wc_xy, &mtls);
} else {
+ mtls.mSliceSize = mtls.fep.dimX / (dc->mWorkers.mCount * 4);
+ if(mtls.mSliceSize < 1) {
+ mtls.mSliceSize = 1;
+ }
+
rsdLaunchThreads(mrsc, wc_x, &mtls);
}
dc->mInForEach = false;
@@ -409,9 +391,7 @@
//ALOGE("launch 1");
} else {
RsForEachStubParamStruct p;
- memset(&p, 0, sizeof(p));
- p.usr = mtls.usr;
- p.usr_len = mtls.usrLen;
+ memcpy(&p, &mtls.fep, sizeof(p));
uint32_t sig = mtls.sig;
//ALOGE("launch 3");
@@ -419,13 +399,11 @@
for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
- uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
- mtls.dimX * mtls.dimY * p.z +
- mtls.dimX * p.y;
- p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
- p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
- fn(&p, mtls.xStart, mtls.xEnd, mtls.eStrideIn,
- mtls.eStrideOut);
+ uint32_t offset = mtls.fep.dimY * mtls.fep.dimZ * p.ar[0] +
+ mtls.fep.dimY * p.z + p.y;
+ p.out = mtls.fep.ptrOut + (mtls.fep.yStrideOut * offset);
+ p.in = mtls.fep.ptrIn + (mtls.fep.yStrideIn * offset);
+ fn(&p, mtls.xStart, mtls.xEnd, mtls.fep.eStrideIn, mtls.fep.eStrideOut);
}
}
}
@@ -469,8 +447,8 @@
//ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
Script * oldTLS = setTLS(script);
- ((void (*)(const void *, uint32_t))
- drv->mInvokeFunctions[slot])(params, paramLength);
+ reinterpret_cast<void (*)(const void *, uint32_t)>(
+ drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
setTLS(oldTLS);
}
@@ -480,7 +458,13 @@
//rsAssert(!script->mFieldIsObject[slot]);
//ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
- int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
+ if (drv->mIntrinsicID) {
+ drv->mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
+ return;
+ }
+
+ int32_t *destPtr = reinterpret_cast<int32_t *>(
+ drv->mExecutable->getExportVarAddrs()[slot]);
if (!destPtr) {
//ALOGV("Calling setVar on slot = %i which is null", slot);
return;
@@ -497,7 +481,8 @@
const size_t *dims, size_t dimLength) {
DrvScript *drv = (DrvScript *)script->mHal.drv;
- int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
+ int32_t *destPtr = reinterpret_cast<int32_t *>(
+ drv->mExecutable->getExportVarAddrs()[slot]);
if (!destPtr) {
//ALOGV("Calling setVar on slot = %i which is null", slot);
return;
@@ -529,18 +514,31 @@
memcpy(destPtr, data, dataLength);
}
-void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
+void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, Allocation *data) {
DrvScript *drv = (DrvScript *)script->mHal.drv;
+
//rsAssert(!script->mFieldIsObject[slot]);
//ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
- int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
+ if (drv->mIntrinsicID) {
+ drv->mIntrinsicFuncs.bind(dc, script, drv->mIntrinsicData, slot, data);
+ return;
+ }
+
+ int32_t *destPtr = reinterpret_cast<int32_t *>(
+ drv->mExecutable->getExportVarAddrs()[slot]);
if (!destPtr) {
//ALOGV("Calling setVar on slot = %i which is null", slot);
return;
}
- memcpy(destPtr, &data, sizeof(void *));
+ void *ptr = NULL;
+ drv->mBoundAllocs[slot] = data;
+ if(data) {
+ DrvAllocation *allocDrv = (DrvAllocation *)data->mHal.drv;
+ ptr = allocDrv->lod[0].mallocPtr;
+ }
+ memcpy(destPtr, &ptr, sizeof(void *));
}
void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
@@ -548,7 +546,8 @@
//rsAssert(script->mFieldIsObject[slot]);
//ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
- int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
+ int32_t *destPtr = reinterpret_cast<int32_t *>(
+ drv->mExecutable->getExportVarAddrs()[slot]);
if (!destPtr) {
//ALOGV("Calling setVar on slot = %i which is null", slot);
return;
@@ -560,38 +559,62 @@
void rsdScriptDestroy(const Context *dc, Script *script) {
DrvScript *drv = (DrvScript *)script->mHal.drv;
- if (drv->mFieldAddress) {
- size_t exportVarCount = drv->ME->getExportVarCount();
- for (size_t ct = 0; ct < exportVarCount; ct++) {
- if (drv->mFieldIsObject[ct]) {
- // The field address can be NULL if the script-side has
- // optimized the corresponding global variable away.
- if (drv->mFieldAddress[ct]) {
- rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
+ if (drv == NULL) {
+ return;
+ }
+
+ if (drv->mExecutable) {
+ Vector<void *>::const_iterator var_addr_iter =
+ drv->mExecutable->getExportVarAddrs().begin();
+ Vector<void *>::const_iterator var_addr_end =
+ drv->mExecutable->getExportVarAddrs().end();
+
+ bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
+ drv->mExecutable->getInfo().getObjectSlots().begin();
+ bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
+ drv->mExecutable->getInfo().getObjectSlots().end();
+
+ while ((var_addr_iter != var_addr_end) &&
+ (is_object_iter != is_object_end)) {
+ // The field address can be NULL if the script-side has optimized
+ // the corresponding global variable away.
+ ObjectBase **obj_addr =
+ reinterpret_cast<ObjectBase **>(*var_addr_iter);
+ if (*is_object_iter) {
+ if (*var_addr_iter != NULL) {
+ rsrClearObject(dc, script, obj_addr);
}
}
+ var_addr_iter++;
+ is_object_iter++;
}
- free(drv->mFieldAddress);
- drv->mFieldAddress = NULL;
- free(drv->mFieldIsObject);
- drv->mFieldIsObject = NULL; }
-
- if (drv->mInvokeFunctions) {
- free(drv->mInvokeFunctions);
- drv->mInvokeFunctions = NULL;
}
- if (drv->mForEachFunctions) {
- free(drv->mForEachFunctions);
- drv->mForEachFunctions = NULL;
- }
-
- delete drv->ME;
- drv->ME = NULL;
-
+ delete drv->mCompilerContext;
+ delete drv->mCompilerDriver;
+ delete drv->mExecutable;
+ delete[] drv->mBoundAllocs;
free(drv);
script->mHal.drv = NULL;
-
}
+Allocation * rsdScriptGetAllocationForPointer(const android::renderscript::Context *dc,
+ const android::renderscript::Script *sc,
+ const void *ptr) {
+ DrvScript *drv = (DrvScript *)sc->mHal.drv;
+ if (!ptr) {
+ return NULL;
+ }
+
+ for (uint32_t ct=0; ct < sc->mHal.info.exportedVariableCount; ct++) {
+ Allocation *a = drv->mBoundAllocs[ct];
+ if (!a) continue;
+ DrvAllocation *adrv = (DrvAllocation *)a->mHal.drv;
+ if (adrv->lod[0].mallocPtr == ptr) {
+ return a;
+ }
+ }
+ ALOGE("rsGetAllocation, failed to find %p", ptr);
+ return NULL;
+}
diff --git a/driver/rsdBcc.h b/driver/rsdBcc.h
index 7a4b138..114e6cf 100644
--- a/driver/rsdBcc.h
+++ b/driver/rsdBcc.h
@@ -24,6 +24,11 @@
bool rsdScriptInit(const android::renderscript::Context *, android::renderscript::ScriptC *,
char const *resName, char const *cacheDir,
uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags);
+bool rsdInitIntrinsic(const android::renderscript::Context *rsc,
+ android::renderscript::Script *s,
+ RsScriptIntrinsicID iid,
+ android::renderscript::Element *e);
+
void rsdScriptInvokeFunction(const android::renderscript::Context *dc,
android::renderscript::Script *script,
uint32_t slot,
@@ -58,7 +63,7 @@
size_t dimLength);
void rsdScriptSetGlobalBind(const android::renderscript::Context *,
const android::renderscript::Script *,
- uint32_t slot, void *data);
+ uint32_t slot, android::renderscript::Allocation *data);
void rsdScriptSetGlobalObj(const android::renderscript::Context *,
const android::renderscript::Script *,
uint32_t slot, android::renderscript::ObjectBase *data);
@@ -76,5 +81,9 @@
void rsdScriptDestroy(const android::renderscript::Context *dc,
android::renderscript::Script *script);
+android::renderscript::Allocation * rsdScriptGetAllocationForPointer(
+ const android::renderscript::Context *dc,
+ const android::renderscript::Script *script,
+ const void *);
#endif
diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp
index f325087..d580a3d 100644
--- a/driver/rsdCore.cpp
+++ b/driver/rsdCore.cpp
@@ -36,7 +36,6 @@
#include <cutils/properties.h>
#include <sys/syscall.h>
#include <string.h>
-#include <bcc/bcc.h>
using namespace android;
using namespace android::renderscript;
@@ -55,6 +54,7 @@
SetPriority,
{
rsdScriptInit,
+ rsdInitIntrinsic,
rsdScriptInvokeFunction,
rsdScriptInvokeRoot,
rsdScriptInvokeForEach,
@@ -80,11 +80,17 @@
rsdAllocationData1D,
rsdAllocationData2D,
rsdAllocationData3D,
+ rsdAllocationRead1D,
+ rsdAllocationRead2D,
+ rsdAllocationRead3D,
+ rsdAllocationLock1D,
+ rsdAllocationUnlock1D,
rsdAllocationData1D_alloc,
rsdAllocationData2D_alloc,
rsdAllocationData3D_alloc,
rsdAllocationElementData1D,
- rsdAllocationElementData2D
+ rsdAllocationElementData2D,
+ rsdAllocationGenerateMipmaps
},
@@ -228,7 +234,7 @@
int cpu = sysconf(_SC_NPROCESSORS_ONLN);
- if(rsc->props.mDebugMaxThreads && (cpu > (int)rsc->props.mDebugMaxThreads)) {
+ if(rsc->props.mDebugMaxThreads) {
cpu = rsc->props.mDebugMaxThreads;
}
if (cpu < 2) {
diff --git a/driver/rsdIntrinsicColorMatrix.cpp b/driver/rsdIntrinsicColorMatrix.cpp
new file mode 100644
index 0000000..bbeb1ef
--- /dev/null
+++ b/driver/rsdIntrinsicColorMatrix.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "rsdCore.h"
+#include "rsdIntrinsics.h"
+#include "rsdAllocation.h"
+
+#include "rsdIntrinsicInlines.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+struct ConvolveParams {
+ float fp[16];
+ short ip[16];
+};
+
+static void ColorMatrix_SetVar(const Context *dc, const Script *script, void * intrinsicData,
+ uint32_t slot, void *data, size_t dataLength) {
+ ConvolveParams *cp = (ConvolveParams *)intrinsicData;
+
+ rsAssert(slot == 0);
+ memcpy (cp->fp, data, dataLength);
+ for(int ct=0; ct < 16; ct++) {
+ cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
+ }
+}
+
+extern "C" void rsdIntrinsicColorMatrix4x4_K(void *dst, const void *src, const short *coef, uint32_t count);
+extern "C" void rsdIntrinsicColorMatrix3x3_K(void *dst, const void *src, const short *coef, uint32_t count);
+
+static void One(const RsForEachStubParamStruct *p, uchar4 *out,
+ const uchar4 *py, const float* coeff) {
+ float4 i = convert_float4(py[0]);
+
+ float4 sum;
+ sum.x = i.x * coeff[0] +
+ i.y * coeff[4] +
+ i.z * coeff[8] +
+ i.w * coeff[12];
+ sum.y = i.x * coeff[1] +
+ i.y * coeff[5] +
+ i.z * coeff[9] +
+ i.w * coeff[13];
+ sum.z = i.x * coeff[2] +
+ i.y * coeff[6] +
+ i.z * coeff[10] +
+ i.w * coeff[14];
+ sum.w = i.x * coeff[3] +
+ i.y * coeff[7] +
+ i.z * coeff[11] +
+ i.w * coeff[15];
+
+ sum.x = sum.x < 0 ? 0 : (sum.x > 255 ? 255 : sum.x);
+ sum.y = sum.y < 0 ? 0 : (sum.y > 255 ? 255 : sum.y);
+ sum.z = sum.z < 0 ? 0 : (sum.z > 255 ? 255 : sum.z);
+ sum.w = sum.w < 0 ? 0 : (sum.w > 255 ? 255 : sum.w);
+
+ *out = convert_uchar4(sum);
+}
+
+static void ColorMatrix_uchar4(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+ ConvolveParams *cp = (ConvolveParams *)p->usr;
+ uchar4 *out = (uchar4 *)p->out;
+ uchar4 *in = (uchar4 *)p->in;
+ uint32_t x1 = xstart;
+ uint32_t x2 = xend;
+
+ in += xstart;
+ out += xstart;
+
+ if(x2 > x1) {
+#if defined(ARCH_ARM_HAVE_NEON)
+ int32_t len = (x2 - x1) >> 2;
+ if(len > 0) {
+ rsdIntrinsicColorMatrix4x4_K(out, in, cp->ip, len);
+ x1 += len << 2;
+ out += len << 2;
+ in += len << 2;
+ }
+#endif
+
+ while(x1 != x2) {
+ One(p, out++, in++, cp->fp);
+ x1++;
+ }
+ }
+}
+
+void * rsdIntrinsic_InitColorMatrix(const android::renderscript::Context *dc,
+ android::renderscript::Script *script,
+ RsdIntriniscFuncs_t *funcs) {
+
+ script->mHal.info.exportedVariableCount = 1;
+ funcs->setVar = ColorMatrix_SetVar;
+ funcs->root = ColorMatrix_uchar4;
+
+ ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
+ cp->fp[0] = 1.f;
+ cp->fp[5] = 1.f;
+ cp->fp[10] = 1.f;
+ cp->fp[15] = 1.f;
+ for(int ct=0; ct < 16; ct++) {
+ cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
+ }
+ return cp;
+}
+
+
diff --git a/driver/rsdIntrinsicConvolve3x3.cpp b/driver/rsdIntrinsicConvolve3x3.cpp
new file mode 100644
index 0000000..2aa7849
--- /dev/null
+++ b/driver/rsdIntrinsicConvolve3x3.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "rsdCore.h"
+#include "rsdIntrinsics.h"
+#include "rsdAllocation.h"
+
+#include "rsdIntrinsicInlines.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+struct ConvolveParams {
+ float fp[16];
+ short ip[16];
+ ObjectBaseRef<Allocation> alloc;
+};
+
+static void Convolve3x3_Bind(const Context *dc, const Script *script,
+ void * intrinsicData, uint32_t slot, Allocation *data) {
+ ConvolveParams *cp = (ConvolveParams *)intrinsicData;
+ rsAssert(slot == 1);
+ cp->alloc.set(data);
+}
+
+static void Convolve3x3_SetVar(const Context *dc, const Script *script, void * intrinsicData,
+ uint32_t slot, void *data, size_t dataLength) {
+ ConvolveParams *cp = (ConvolveParams *)intrinsicData;
+
+ rsAssert(slot == 0);
+ memcpy (cp->fp, data, dataLength);
+ for(int ct=0; ct < 9; ct++) {
+ cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
+ }
+}
+
+extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1, const void *y2, const short *coef, uint32_t count);
+
+
+static void ConvolveOne(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out,
+ const uchar4 *py0, const uchar4 *py1, const uchar4 *py2,
+ const float* coeff) {
+
+ uint32_t x1 = rsMin((int32_t)x+1, (int32_t)p->dimX);
+ uint32_t x2 = rsMax((int32_t)x-1, 0);
+
+ float4 px = convert_float4(py0[x1]) * coeff[0] +
+ convert_float4(py0[x]) * coeff[1] +
+ convert_float4(py0[x2]) * coeff[2] +
+ convert_float4(py1[x1]) * coeff[3] +
+ convert_float4(py1[x]) * coeff[4] +
+ convert_float4(py1[x2]) * coeff[5] +
+ convert_float4(py2[x1]) * coeff[6] +
+ convert_float4(py2[x]) * coeff[7] +
+ convert_float4(py2[x2]) * coeff[8];
+
+ //px = clamp(px, 0.f, 255.f);
+ px.x = px.x < 0 ? 0 : (px.x > 255 ? 255 : px.x);
+ px.y = px.y < 0 ? 0 : (px.y > 255 ? 255 : px.y);
+ px.z = px.z < 0 ? 0 : (px.z > 255 ? 255 : px.z);
+ px.w = px.w < 0 ? 0 : (px.w > 255 ? 255 : px.w);
+
+ uchar4 o = {(uchar)px.x, (uchar)px.y, (uchar)px.z, (uchar)px.w};
+ *out = o;
+}
+
+static void Convolve3x3_uchar4(const RsForEachStubParamStruct *p,
+ uint32_t xstart, uint32_t xend,
+ uint32_t instep, uint32_t outstep) {
+ ConvolveParams *cp = (ConvolveParams *)p->usr;
+ DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv;
+ const uchar *pin = (const uchar *)din->lod[0].mallocPtr;
+
+ uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1));
+ uint32_t y2 = rsMax((int32_t)p->y - 1, 0);
+ const uchar4 *py0 = (const uchar4 *)(pin + din->lod[0].stride * y2);
+ const uchar4 *py1 = (const uchar4 *)(pin + din->lod[0].stride * p->y);
+ const uchar4 *py2 = (const uchar4 *)(pin + din->lod[0].stride * y1);
+
+ uchar4 *out = (uchar4 *)p->out;
+ uint32_t x1 = xstart;
+ uint32_t x2 = xend;
+ if(x1 == 0) {
+ ConvolveOne(p, 0, out, py0, py1, py2, cp->fp);
+ x1 ++;
+ out++;
+ }
+
+ if(x2 > x1) {
+#if defined(ARCH_ARM_HAVE_NEON)
+ int32_t len = (x2 - x1 - 1) >> 1;
+ if(len > 0) {
+ rsdIntrinsicConvolve3x3_K(out, &py0[x1], &py1[x1], &py2[x1], cp->ip, len);
+ x1 += len << 1;
+ out += len << 1;
+ }
+#endif
+
+ while(x1 != x2) {
+ ConvolveOne(p, x1, out, py0, py1, py2, cp->fp);
+ out++;
+ x1++;
+ }
+ }
+}
+
+void * rsdIntrinsic_InitConvolve3x3(const android::renderscript::Context *dc,
+ android::renderscript::Script *script,
+ RsdIntriniscFuncs_t *funcs) {
+
+ script->mHal.info.exportedVariableCount = 2;
+ funcs->bind = Convolve3x3_Bind;
+ funcs->setVar = Convolve3x3_SetVar;
+ funcs->root = Convolve3x3_uchar4;
+
+ ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
+ for(int ct=0; ct < 9; ct++) {
+ cp->fp[ct] = 1.f / 9.f;
+ cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f);
+ }
+ return cp;
+}
+
+
diff --git a/driver/rsdIntrinsicInlines.h b/driver/rsdIntrinsicInlines.h
new file mode 100644
index 0000000..10dcb1c
--- /dev/null
+++ b/driver/rsdIntrinsicInlines.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+typedef uint8_t uchar;
+typedef uint16_t ushort;
+typedef uint32_t uint;
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef uchar uchar2 __attribute__((ext_vector_type(2)));
+typedef uchar uchar3 __attribute__((ext_vector_type(3)));
+typedef uchar uchar4 __attribute__((ext_vector_type(4)));
+typedef ushort ushort2 __attribute__((ext_vector_type(2)));
+typedef ushort ushort3 __attribute__((ext_vector_type(3)));
+typedef ushort ushort4 __attribute__((ext_vector_type(4)));
+typedef uint uint2 __attribute__((ext_vector_type(2)));
+typedef uint uint3 __attribute__((ext_vector_type(3)));
+typedef uint uint4 __attribute__((ext_vector_type(4)));
+typedef char char2 __attribute__((ext_vector_type(2)));
+typedef char char3 __attribute__((ext_vector_type(3)));
+typedef char char4 __attribute__((ext_vector_type(4)));
+typedef short short2 __attribute__((ext_vector_type(2)));
+typedef short short3 __attribute__((ext_vector_type(3)));
+typedef short short4 __attribute__((ext_vector_type(4)));
+typedef int int2 __attribute__((ext_vector_type(2)));
+typedef int int3 __attribute__((ext_vector_type(3)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef long long2 __attribute__((ext_vector_type(2)));
+typedef long long3 __attribute__((ext_vector_type(3)));
+typedef long long4 __attribute__((ext_vector_type(4)));
+
+enum IntrinsicEnums {
+ INTRINSIC_UNDEFINED,
+ INTRINSIC_CONVOLVE_3x3,
+ INTRINXIC_COLORMATRIX
+
+};
+
+static inline int4 convert_int4(uchar4 i) {
+ int4 f4 = {i.x, i.y, i.z, i.w};
+ return f4;
+}
+
+static inline short4 convert_short4(uchar4 i) {
+ short4 f4 = {i.x, i.y, i.z, i.w};
+ return f4;
+}
+
+static inline float4 convert_float4(uchar4 i) {
+ float4 f4 = {i.x, i.y, i.z, i.w};
+ return f4;
+}
+
+static inline uchar4 convert_uchar4(int4 i) {
+ uchar4 f4 = {(uchar)i.x, (uchar)i.y, (uchar)i.z, (uchar)i.w};
+ return f4;
+}
+
+static inline uchar4 convert_uchar4(float4 i) {
+ uchar4 f4 = {(uchar)i.x, (uchar)i.y, (uchar)i.z, (uchar)i.w};
+ return f4;
+}
+
+
+static inline int4 clamp(int4 amount, int low, int high) {
+ int4 r;
+ r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
+ r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
+ r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);
+ r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);
+ return r;
+}
+
diff --git a/driver/rsdIntrinsics.cpp b/driver/rsdIntrinsics.cpp
new file mode 100644
index 0000000..f53d08b
--- /dev/null
+++ b/driver/rsdIntrinsics.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "rsdCore.h"
+#include "rsdIntrinsics.h"
+#include "rsdAllocation.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+enum IntrinsicEnums {
+ INTRINSIC_UNDEFINED,
+ INTRINSIC_CONVOLVE_3x3,
+ INTRINXIC_COLORMATRIX
+
+};
+
+
+void * rsdIntrinsic_InitConvolve3x3(const Context *, Script *, RsdIntriniscFuncs_t *);
+void * rsdIntrinsic_InitColorMatrix(const Context *, Script *, RsdIntriniscFuncs_t *);
+
+
+static void Bind(const Context *, const Script *, void *, uint32_t, Allocation *) {
+ rsAssert(!"Intrinsic_Bind unexpectedly called");
+}
+
+static void SetVar(const Context *, const Script *, void *, uint32_t, void *, size_t) {
+ rsAssert(!"Intrinsic_Bind unexpectedly called");
+}
+
+static void Destroy(const Context *dc, const Script *script, void * intrinsicData) {
+ free(intrinsicData);
+}
+
+void * rsdIntrinsic_Init(const android::renderscript::Context *dc,
+ android::renderscript::Script *script,
+ RsScriptIntrinsicID iid,
+ RsdIntriniscFuncs_t *funcs) {
+
+ funcs->bind = Bind;
+ funcs->setVar = SetVar;
+ funcs->destroy = Destroy;
+
+ switch(iid) {
+ case INTRINSIC_CONVOLVE_3x3:
+ return rsdIntrinsic_InitConvolve3x3(dc, script, funcs);
+ case INTRINXIC_COLORMATRIX:
+ return rsdIntrinsic_InitColorMatrix(dc, script, funcs);
+
+ default:
+ return NULL;
+ }
+ return NULL;
+}
+
+
+
diff --git a/driver/rsdIntrinsics.h b/driver/rsdIntrinsics.h
new file mode 100644
index 0000000..4a1a4a2
--- /dev/null
+++ b/driver/rsdIntrinsics.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSD_INTRINSICS_H
+#define RSD_INTRINSICS_H
+
+#include <rs_hal.h>
+
+typedef struct RsdIntriniscFuncs_rec {
+
+ void (*bind)(const android::renderscript::Context *dc,
+ const android::renderscript::Script *script,
+ void * intrinsicData,
+ uint32_t slot, android::renderscript::Allocation *data);
+ void (*setVar)(const android::renderscript::Context *dc,
+ const android::renderscript::Script *script,
+ void * intrinsicData,
+ uint32_t slot, void *data, size_t dataLength);
+ void (*root)(const android::renderscript::RsForEachStubParamStruct *,
+ uint32_t x1, uint32_t x2, uint32_t instep, uint32_t outstep);
+
+ void (*destroy)(const android::renderscript::Context *dc,
+ const android::renderscript::Script *script,
+ void * intrinsicData);
+} RsdIntriniscFuncs_t;
+
+void * rsdIntrinsic_Init(const android::renderscript::Context *dc,
+ android::renderscript::Script *script,
+ RsScriptIntrinsicID id, RsdIntriniscFuncs_t *funcs);
+
+
+
+#endif // RSD_INTRINSICS_H
+
diff --git a/driver/rsdIntrinsics_Convolve.S b/driver/rsdIntrinsics_Convolve.S
new file mode 100644
index 0000000..a08658d
--- /dev/null
+++ b/driver/rsdIntrinsics_Convolve.S
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
+/*
+ r0 = dst
+ r1 = y0 base pointer
+ r2 = y1 base pointer
+ r3 = y2 base pointer
+ sp = coeffs
+ sp = length / 2
+*/
+
+ENTRY(rsdIntrinsicConvolve3x3_K)
+ .save {r4, lr}
+ stmfd sp!, {r4, lr}
+ vpush {q4-q7}
+
+ ldr r4, [sp, #8+64]
+ vld1.16 {q0}, [r4]!
+ vld1.16 {q1}, [r4]
+ ldr r4, [sp, #12+64]
+
+1:
+ vld1.8 {q13}, [r1]
+ vld1.8 {q14}, [r2]
+ vld1.8 {q15}, [r3]
+ add r1, r1, #8
+ add r2, r2, #8
+ add r3, r3, #8
+ PLD (r1, #8)
+ PLD (r2, #8)
+ PLD (r3, #8)
+
+ vmovl.u8 q2, d26
+ vmovl.u8 q3, d27
+ vmovl.u8 q4, d28
+ vmovl.u8 q5, d29
+ vmovl.u8 q6, d30
+ vmovl.u8 q7, d31
+
+/*
+ The two pixel source array is
+ d4, d5, d6, d7
+ d8, d9, d10, d11
+ d12, d13, d14, d15
+*/
+
+ vmull.s16 q8, d4, d0[0]
+ vmull.s16 q9, d5, d0[0]
+
+ vmlal.s16 q8, d5, d0[1]
+ vmlal.s16 q9, d6, d0[1]
+
+ vmlal.s16 q8, d6, d0[2]
+ vmlal.s16 q9, d7, d0[2]
+
+ vmlal.s16 q8, d4, d0[3]
+ vmlal.s16 q9, d9, d0[3]
+
+ vmlal.s16 q8, d9, d1[0]
+ vmlal.s16 q9, d10, d1[0]
+
+ vmlal.s16 q8, d10, d1[1]
+ vmlal.s16 q9, d11, d1[1]
+
+ vmlal.s16 q8, d12, d1[2]
+ vmlal.s16 q9, d13, d1[2]
+
+ vmlal.s16 q8, d13, d1[3]
+ vmlal.s16 q9, d14, d1[3]
+
+ vmlal.s16 q8, d14, d2[0]
+ vmlal.s16 q9, d15, d2[0]
+
+ vshrn.i32 d16, q8, #8
+ vshrn.i32 d17, q9, #8
+
+ vqmovun.s16 d16, q8
+ vst1.8 d16, [r0]!
+
+ subs r4, r4, #1
+ bne 1b
+
+
+ vpop {q4-q7}
+ ldmfd sp!, {r4, lr}
+ bx lr
+END(TestConvolveK)
+
+
+/*
+ r0 = dst
+ r1 = src
+ r2 = matrx
+ r3 = length
+*/
+ENTRY(rsdIntrinsicColorMatrix4x4_K)
+ .save {r4, lr}
+ stmfd sp!, {r4, lr}
+ vpush {q4-q7}
+
+ vld1.16 {q2}, [r2]!
+ vld1.16 {q3}, [r2]!
+
+1:
+ vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
+ vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
+ vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
+ vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
+
+ vmovl.u8 q12, d0
+ vmovl.u8 q13, d1
+ vmovl.u8 q14, d2
+ vmovl.u8 q15, d3
+
+ vmull.s16 q8, d24, d4[0]
+ vmull.s16 q9, d26, d4[1]
+ vmull.s16 q10, d28, d4[2]
+ vmull.s16 q11, d30, d4[3]
+
+ vmlal.s16 q8, d24, d5[0]
+ vmlal.s16 q9, d26, d5[1]
+ vmlal.s16 q10, d28, d5[2]
+ vmlal.s16 q11, d30, d5[3]
+
+ vmlal.s16 q8, d24, d6[0]
+ vmlal.s16 q9, d26, d6[1]
+ vmlal.s16 q10, d28, d6[2]
+ vmlal.s16 q11, d30, d6[3]
+
+ vmlal.s16 q8, d24, d7[0]
+ vmlal.s16 q9, d26, d7[1]
+ vmlal.s16 q10, d28, d7[2]
+ vmlal.s16 q11, d30, d7[3]
+
+ vshrn.i32 d24, q8, #8
+ vshrn.i32 d26, q9, #8
+ vshrn.i32 d28, q10, #8
+ vshrn.i32 d30, q11, #8
+
+ vqmovun.s16 d0, q12
+ vqmovun.s16 d1, q13
+ vqmovun.s16 d2, q14
+ vqmovun.s16 d3, q15
+
+ vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
+ vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
+ vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
+ vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
+
+ subs r3, r3, #1
+ bne 1b
+
+ vpop {q4-q7}
+ ldmfd sp!, {r4, lr}
+ bx lr
+END(rsdIntrinsicColorMatrix4x4K)
+
+/*
+ r0 = dst
+ r1 = src
+ r2 = matrx
+ r3 = length
+*/
+ENTRY(rsdIntrinsicColorMatrix3x3K)
+ .save {r4, lr}
+ stmfd sp!, {r4, lr}
+ vpush {q4-q7}
+
+ vld1.16 {q2}, [r2]!
+ vld1.16 {q3}, [r2]!
+
+1:
+ vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
+ vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
+ vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
+ vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
+
+ vmovl.u8 q12, d0
+ vmovl.u8 q13, d1
+ vmovl.u8 q14, d2
+
+ vmull.s16 q8, d24, d4[0]
+ vmull.s16 q9, d26, d4[1]
+ vmull.s16 q10, d28, d4[2]
+
+ vmlal.s16 q8, d24, d5[0]
+ vmlal.s16 q9, d26, d5[1]
+ vmlal.s16 q10, d28, d5[2]
+
+ vmlal.s16 q8, d24, d6[0]
+ vmlal.s16 q9, d26, d6[1]
+ vmlal.s16 q10, d28, d6[2]
+
+ vshrn.i32 d24, q8, #8
+ vshrn.i32 d26, q9, #8
+ vshrn.i32 d28, q10, #8
+
+ vqmovun.s16 d0, q12
+ vqmovun.s16 d1, q13
+ vqmovun.s16 d2, q14
+
+ vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
+ vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
+ vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
+ vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
+
+ subs r3, r3, #1
+ bne 1b
+
+ vpop {q4-q7}
+ ldmfd sp!, {r4, lr}
+ bx lr
+END(rsdIntrinsicColorMatrix3x3K)
+
diff --git a/driver/rsdMeshObj.cpp b/driver/rsdMeshObj.cpp
index 5dab84b..92e02be 100644
--- a/driver/rsdMeshObj.cpp
+++ b/driver/rsdMeshObj.cpp
@@ -151,7 +151,7 @@
mAttribs[ct].ptr = NULL;
} else {
mAttribs[ct].buffer = 0;
- mAttribs[ct].ptr = (const uint8_t*)alloc->mHal.drvState.mallocPtr;
+ mAttribs[ct].ptr = (const uint8_t*)alloc->mHal.drvState.mallocPtrLOD0;
}
}
@@ -172,7 +172,7 @@
} else {
RSD_CALL_GL(glBindBuffer, GL_ELEMENT_ARRAY_BUFFER, 0);
RSD_CALL_GL(glDrawElements, mGLPrimitives[primIndex], len, GL_UNSIGNED_SHORT,
- idxAlloc->mHal.drvState.mallocPtr);
+ idxAlloc->mHal.drvState.mallocPtrLOD0);
}
} else {
RSD_CALL_GL(glDrawArrays, mGLPrimitives[primIndex], start, len);
diff --git a/driver/rsdPath.cpp b/driver/rsdPath.cpp
index e04bc02..79ec487 100644
--- a/driver/rsdPath.cpp
+++ b/driver/rsdPath.cpp
@@ -112,7 +112,7 @@
DrvPathStatic::DrvPathStatic(const Allocation *vtx, const Allocation *loops) {
mSegmentCount = vtx->getType()->getDimX() / 3;
mSegments = new segment_t[mSegmentCount];
-
+/*
const float *fin = (const float *)vtx->getPtr();
for (uint32_t ct=0; ct < mSegmentCount; ct++) {
segment_t *s = &mSegments[ct];
@@ -126,6 +126,7 @@
s->y2 = fin[5];
fin += 6;
}
+ */
}
DrvPathStatic::~DrvPathStatic() {
diff --git a/driver/rsdRuntime.h b/driver/rsdRuntime.h
index 840eced..dc84032 100644
--- a/driver/rsdRuntime.h
+++ b/driver/rsdRuntime.h
@@ -18,7 +18,6 @@
#define RSD_RUNTIME_STUBS_H
#include <rs_hal.h>
-#include <bcc/bcc.h>
#include "rsMutex.h"
diff --git a/driver/rsdRuntimeMath.cpp b/driver/rsdRuntimeMath.cpp
index 0a233f6..48a1e85 100644
--- a/driver/rsdRuntimeMath.cpp
+++ b/driver/rsdRuntimeMath.cpp
@@ -44,10 +44,6 @@
return log10(v) / log10(2.f);
}
-static float SC_mad(float v1, float v2, float v3) {
- return v1 * v2 + v3;
-}
-
#if 0
static float SC_pown(float v, int p) {
return powf(v, (float)p);
@@ -109,10 +105,6 @@
return amount < low ? low : (amount > high ? high : amount);
}
-static float SC_degrees(float radians) {
- return radians * (180.f / M_PI);
-}
-
static float SC_max_f32(float v, float v2) {
return rsMax(v, v2);
}
@@ -121,15 +113,6 @@
return rsMin(v, v2);
}
-static float SC_mix_f32(float start, float stop, float amount) {
- //ALOGE("lerpf %f %f %f", start, stop, amount);
- return start + (stop - start) * amount;
-}
-
-static float SC_radians(float degrees) {
- return degrees * (M_PI / 180.f);
-}
-
static float SC_step_f32(float edge, float v) {
if (v < edge) return 0.f;
return 1.f;
@@ -430,7 +413,6 @@
{ "_Z5log10f", (void *)&log10f, true },
{ "_Z5log1pf", (void *)&log1pf, true },
{ "_Z4logbf", (void *)&logbf, true },
- { "_Z3madfff", (void *)&SC_mad, true },
{ "_Z4modffPf", (void *)&modff, true },
//{ "_Z3nanj", (void *)&SC_nan, true },
{ "_Z9nextafterff", (void *)&nextafterf, true },
@@ -473,11 +455,8 @@
{ "_Z3mincc", (void *)&SC_min_i8, true },
{ "_Z5clampfff", (void *)&SC_clamp_f32, true },
- { "_Z7degreesf", (void *)&SC_degrees, true },
{ "_Z3maxff", (void *)&SC_max_f32, true },
{ "_Z3minff", (void *)&SC_min_f32, true },
- { "_Z3mixfff", (void *)&SC_mix_f32, true },
- { "_Z7radiansf", (void *)&SC_radians, true },
{ "_Z4stepff", (void *)&SC_step_f32, true },
//{ "smoothstep", (void *)&, true },
{ "_Z4signf", (void *)&SC_sign_f32, true },
diff --git a/driver/rsdRuntimeStubs.cpp b/driver/rsdRuntimeStubs.cpp
index a24bba8..da92839 100644
--- a/driver/rsdRuntimeStubs.cpp
+++ b/driver/rsdRuntimeStubs.cpp
@@ -23,6 +23,7 @@
#include "utils/Timers.h"
#include "rsdCore.h"
+#include "rsdBcc.h"
#include "rsdRuntime.h"
#include "rsdPath.h"
@@ -38,6 +39,33 @@
Context * rsc = tls->mContext; \
ScriptC * sc = (ScriptC *) tls->mScript
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef char char2 __attribute__((ext_vector_type(2)));
+typedef char char3 __attribute__((ext_vector_type(3)));
+typedef char char4 __attribute__((ext_vector_type(4)));
+typedef unsigned char uchar2 __attribute__((ext_vector_type(2)));
+typedef unsigned char uchar3 __attribute__((ext_vector_type(3)));
+typedef unsigned char uchar4 __attribute__((ext_vector_type(4)));
+typedef short short2 __attribute__((ext_vector_type(2)));
+typedef short short3 __attribute__((ext_vector_type(3)));
+typedef short short4 __attribute__((ext_vector_type(4)));
+typedef unsigned short ushort2 __attribute__((ext_vector_type(2)));
+typedef unsigned short ushort3 __attribute__((ext_vector_type(3)));
+typedef unsigned short ushort4 __attribute__((ext_vector_type(4)));
+typedef int32_t int2 __attribute__((ext_vector_type(2)));
+typedef int32_t int3 __attribute__((ext_vector_type(3)));
+typedef int32_t int4 __attribute__((ext_vector_type(4)));
+typedef uint32_t uint2 __attribute__((ext_vector_type(2)));
+typedef uint32_t uint3 __attribute__((ext_vector_type(3)));
+typedef uint32_t uint4 __attribute__((ext_vector_type(4)));
+typedef long long long2 __attribute__((ext_vector_type(2)));
+typedef long long long3 __attribute__((ext_vector_type(3)));
+typedef long long long4 __attribute__((ext_vector_type(4)));
+typedef unsigned long long ulong2 __attribute__((ext_vector_type(2)));
+typedef unsigned long long ulong3 __attribute__((ext_vector_type(3)));
+typedef unsigned long long ulong4 __attribute__((ext_vector_type(4)));
//////////////////////////////////////////////////////////////////////////////
@@ -353,7 +381,7 @@
static const Allocation * SC_GetAllocation(const void *ptr) {
GET_TLS();
- return rsrGetAllocation(rsc, sc, ptr);
+ return rsdScriptGetAllocationForPointer(rsc, sc, ptr);
}
static void SC_ForEach_SAA(Script *target,
@@ -482,6 +510,15 @@
static void SC_debugFv4(const char *s, float f1, float f2, float f3, float f4) {
ALOGD("%s {%f, %f, %f, %f}", s, f1, f2, f3, f4);
}
+static void SC_debugF2(const char *s, float2 f) {
+ ALOGD("%s {%f, %f}", s, f.x, f.y);
+}
+static void SC_debugF3(const char *s, float3 f) {
+ ALOGD("%s {%f, %f, %f}", s, f.x, f.y, f.z);
+}
+static void SC_debugF4(const char *s, float4 f) {
+ ALOGD("%s {%f, %f, %f, %f}", s, f.x, f.y, f.z, f.w);
+}
static void SC_debugD(const char *s, double d) {
ALOGD("%s %f, 0x%08llx", s, d, *((long long *) (&d)));
}
@@ -500,20 +537,102 @@
ALOGD("%s {%f, %f", s, f[0], f[2]);
ALOGD("%s %f, %f}",s, f[1], f[3]);
}
-
+static void SC_debugI8(const char *s, char c) {
+ ALOGD("%s %hhd 0x%hhx", s, c, (unsigned char)c);
+}
+static void SC_debugC2(const char *s, char2 c) {
+ ALOGD("%s {%hhd, %hhd} 0x%hhx 0x%hhx", s, c.x, c.y, (unsigned char)c.x, (unsigned char)c.y);
+}
+static void SC_debugC3(const char *s, char3 c) {
+ ALOGD("%s {%hhd, %hhd, %hhd} 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z);
+}
+static void SC_debugC4(const char *s, char4 c) {
+ ALOGD("%s {%hhd, %hhd, %hhd, %hhd} 0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z, (unsigned char)c.w);
+}
+static void SC_debugU8(const char *s, unsigned char c) {
+ ALOGD("%s %hhu 0x%hhx", s, c, c);
+}
+static void SC_debugUC2(const char *s, uchar2 c) {
+ ALOGD("%s {%hhu, %hhu} 0x%hhx 0x%hhx", s, c.x, c.y, c.x, c.y);
+}
+static void SC_debugUC3(const char *s, uchar3 c) {
+ ALOGD("%s {%hhu, %hhu, %hhu} 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.x, c.y, c.z);
+}
+static void SC_debugUC4(const char *s, uchar4 c) {
+ ALOGD("%s {%hhu, %hhu, %hhu, %hhu} 0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w);
+}
+static void SC_debugI16(const char *s, short c) {
+ ALOGD("%s %hd 0x%hx", s, c, c);
+}
+static void SC_debugS2(const char *s, short2 c) {
+ ALOGD("%s {%hd, %hd} 0x%hx 0x%hx", s, c.x, c.y, c.x, c.y);
+}
+static void SC_debugS3(const char *s, short3 c) {
+ ALOGD("%s {%hd, %hd, %hd} 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z);
+}
+static void SC_debugS4(const char *s, short4 c) {
+ ALOGD("%s {%hd, %hd, %hd, %hd} 0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w);
+}
+static void SC_debugU16(const char *s, unsigned short c) {
+ ALOGD("%s %hu 0x%hx", s, c, c);
+}
+static void SC_debugUS2(const char *s, ushort2 c) {
+ ALOGD("%s {%hu, %hu} 0x%hx 0x%hx", s, c.x, c.y, c.x, c.y);
+}
+static void SC_debugUS3(const char *s, ushort3 c) {
+ ALOGD("%s {%hu, %hu, %hu} 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z);
+}
+static void SC_debugUS4(const char *s, ushort4 c) {
+ ALOGD("%s {%hu, %hu, %hu, %hu} 0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w);
+}
static void SC_debugI32(const char *s, int32_t i) {
- ALOGD("%s %i 0x%x", s, i, i);
+ ALOGD("%s %d 0x%x", s, i, i);
+}
+static void SC_debugI2(const char *s, int2 i) {
+ ALOGD("%s {%d, %d} 0x%x 0x%x", s, i.x, i.y, i.x, i.y);
+}
+static void SC_debugI3(const char *s, int3 i) {
+ ALOGD("%s {%d, %d, %d} 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z);
+}
+static void SC_debugI4(const char *s, int4 i) {
+ ALOGD("%s {%d, %d, %d, %d} 0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w);
}
static void SC_debugU32(const char *s, uint32_t i) {
ALOGD("%s %u 0x%x", s, i, i);
}
+static void SC_debugUI2(const char *s, uint2 i) {
+ ALOGD("%s {%u, %u} 0x%x 0x%x", s, i.x, i.y, i.x, i.y);
+}
+static void SC_debugUI3(const char *s, uint3 i) {
+ ALOGD("%s {%u, %u, %u} 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z);
+}
+static void SC_debugUI4(const char *s, uint4 i) {
+ ALOGD("%s {%u, %u, %u, %u} 0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w);
+}
static void SC_debugLL64(const char *s, long long ll) {
ALOGD("%s %lld 0x%llx", s, ll, ll);
}
+static void SC_debugL2(const char *s, long2 ll) {
+ ALOGD("%s {%lld, %lld} 0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y);
+}
+static void SC_debugL3(const char *s, long3 ll) {
+ ALOGD("%s {%lld, %lld, %lld} 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z);
+}
+static void SC_debugL4(const char *s, long4 ll) {
+ ALOGD("%s {%lld, %lld, %lld, %lld} 0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w);
+}
static void SC_debugULL64(const char *s, unsigned long long ll) {
ALOGD("%s %llu 0x%llx", s, ll, ll);
}
-
+static void SC_debugUL2(const char *s, ulong2 ll) {
+ ALOGD("%s {%llu, %llu} 0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y);
+}
+static void SC_debugUL3(const char *s, ulong3 ll) {
+ ALOGD("%s {%llu, %llu, %llu} 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z);
+}
+static void SC_debugUL4(const char *s, ulong4 ll) {
+ ALOGD("%s {%llu, %llu, %llu, %llu} 0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w);
+}
static void SC_debugP(const char *s, const void *p) {
ALOGD("%s %p", s, p);
}
@@ -683,19 +802,56 @@
{ "_Z7rsDebugPKcff", (void *)&SC_debugFv2, true },
{ "_Z7rsDebugPKcfff", (void *)&SC_debugFv3, true },
{ "_Z7rsDebugPKcffff", (void *)&SC_debugFv4, true },
+ { "_Z7rsDebugPKcDv2_f", (void *)&SC_debugF2, true },
+ { "_Z7rsDebugPKcDv3_f", (void *)&SC_debugF3, true },
+ { "_Z7rsDebugPKcDv4_f", (void *)&SC_debugF4, true },
{ "_Z7rsDebugPKcd", (void *)&SC_debugD, true },
{ "_Z7rsDebugPKcPK12rs_matrix4x4", (void *)&SC_debugFM4v4, true },
{ "_Z7rsDebugPKcPK12rs_matrix3x3", (void *)&SC_debugFM3v3, true },
{ "_Z7rsDebugPKcPK12rs_matrix2x2", (void *)&SC_debugFM2v2, true },
+ { "_Z7rsDebugPKcc", (void *)&SC_debugI8, true },
+ { "_Z7rsDebugPKcDv2_c", (void *)&SC_debugC2, true },
+ { "_Z7rsDebugPKcDv3_c", (void *)&SC_debugC3, true },
+ { "_Z7rsDebugPKcDv4_c", (void *)&SC_debugC4, true },
+ { "_Z7rsDebugPKch", (void *)&SC_debugU8, true },
+ { "_Z7rsDebugPKcDv2_h", (void *)&SC_debugUC2, true },
+ { "_Z7rsDebugPKcDv3_h", (void *)&SC_debugUC3, true },
+ { "_Z7rsDebugPKcDv4_h", (void *)&SC_debugUC4, true },
+ { "_Z7rsDebugPKcs", (void *)&SC_debugI16, true },
+ { "_Z7rsDebugPKcDv2_s", (void *)&SC_debugS2, true },
+ { "_Z7rsDebugPKcDv3_s", (void *)&SC_debugS3, true },
+ { "_Z7rsDebugPKcDv4_s", (void *)&SC_debugS4, true },
+ { "_Z7rsDebugPKct", (void *)&SC_debugU16, true },
+ { "_Z7rsDebugPKcDv2_t", (void *)&SC_debugUS2, true },
+ { "_Z7rsDebugPKcDv3_t", (void *)&SC_debugUS3, true },
+ { "_Z7rsDebugPKcDv4_t", (void *)&SC_debugUS4, true },
{ "_Z7rsDebugPKci", (void *)&SC_debugI32, true },
+ { "_Z7rsDebugPKcDv2_i", (void *)&SC_debugI2, true },
+ { "_Z7rsDebugPKcDv3_i", (void *)&SC_debugI3, true },
+ { "_Z7rsDebugPKcDv4_i", (void *)&SC_debugI4, true },
{ "_Z7rsDebugPKcj", (void *)&SC_debugU32, true },
+ { "_Z7rsDebugPKcDv2_j", (void *)&SC_debugUI2, true },
+ { "_Z7rsDebugPKcDv3_j", (void *)&SC_debugUI3, true },
+ { "_Z7rsDebugPKcDv4_j", (void *)&SC_debugUI4, true },
// Both "long" and "unsigned long" need to be redirected to their
// 64-bit counterparts, since we have hacked Slang to use 64-bit
// for "long" on Arm (to be similar to Java).
{ "_Z7rsDebugPKcl", (void *)&SC_debugLL64, true },
+ { "_Z7rsDebugPKcDv2_l", (void *)&SC_debugL2, true },
+ { "_Z7rsDebugPKcDv3_l", (void *)&SC_debugL3, true },
+ { "_Z7rsDebugPKcDv4_l", (void *)&SC_debugL4, true },
{ "_Z7rsDebugPKcm", (void *)&SC_debugULL64, true },
+ { "_Z7rsDebugPKcDv2_m", (void *)&SC_debugUL2, true },
+ { "_Z7rsDebugPKcDv3_m", (void *)&SC_debugUL3, true },
+ { "_Z7rsDebugPKcDv4_m", (void *)&SC_debugUL4, true },
{ "_Z7rsDebugPKcx", (void *)&SC_debugLL64, true },
+ { "_Z7rsDebugPKcDv2_x", (void *)&SC_debugL2, true },
+ { "_Z7rsDebugPKcDv3_x", (void *)&SC_debugL3, true },
+ { "_Z7rsDebugPKcDv4_x", (void *)&SC_debugL4, true },
{ "_Z7rsDebugPKcy", (void *)&SC_debugULL64, true },
+ { "_Z7rsDebugPKcDv2_y", (void *)&SC_debugUL2, true },
+ { "_Z7rsDebugPKcDv3_y", (void *)&SC_debugUL3, true },
+ { "_Z7rsDebugPKcDv4_y", (void *)&SC_debugUL4, true },
{ "_Z7rsDebugPKcPKv", (void *)&SC_debugP, true },
{ NULL, NULL, false }
@@ -704,13 +860,6 @@
void* rsdLookupRuntimeStub(void* pContext, char const* name) {
ScriptC *s = (ScriptC *)pContext;
- if (!strcmp(name, "__isThreadable")) {
- return (void*) s->mHal.info.isThreadable;
- } else if (!strcmp(name, "__clearThreadable")) {
- s->mHal.info.isThreadable = false;
- return NULL;
- }
-
RsdSymbolTable *syms = gSyms;
const RsdSymbolTable *sym = rsdLookupSymbolMath(name);
diff --git a/driver/rsdShader.cpp b/driver/rsdShader.cpp
index d39bdb8..3654090 100644
--- a/driver/rsdShader.cpp
+++ b/driver/rsdShader.cpp
@@ -516,6 +516,7 @@
uint32_t uidx = 0;
for (uint32_t ct=0; ct < mRSProgram->mHal.state.constantsCount; ct++) {
Allocation *alloc = mRSProgram->mHal.state.constants[ct];
+
if (!alloc) {
ALOGE("Attempting to set constants on shader id %u, but alloc at slot %u is not set",
(uint32_t)this, ct);
@@ -523,7 +524,8 @@
continue;
}
- const uint8_t *data = static_cast<const uint8_t *>(alloc->getPtr());
+ DrvAllocation *adrv = (DrvAllocation *)alloc->mHal.drv;
+ const uint8_t *data = static_cast<const uint8_t *>(adrv->lod[0].mallocPtr);
const Element *e = mRSProgram->mHal.state.constantTypes[ct]->getElement();
for (uint32_t field=0; field < e->mHal.state.fieldsCount; field++) {
const Element *f = e->mHal.state.fields[field];
diff --git a/rs.spec b/rs.spec
index f32443f..607f7dc 100644
--- a/rs.spec
+++ b/rs.spec
@@ -346,6 +346,11 @@
ret RsScript
}
+ScriptIntrinsicCreate {
+ param uint32_t id
+ param RsElement eid
+ ret RsScript
+ }
ProgramStoreCreate {
direct
diff --git a/rsAdapter.cpp b/rsAdapter.cpp
index 41811ae..13a728f 100644
--- a/rsAdapter.cpp
+++ b/rsAdapter.cpp
@@ -34,33 +34,14 @@
mY = 0;
mZ = 0;
mLOD = 0;
- mFace = 0;
+ mFace = RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X;
}
-void * Adapter1D::getElement(uint32_t x) {
- rsAssert(mAllocation.get());
- rsAssert(mAllocation->getPtr());
- rsAssert(mAllocation->getType());
- uint8_t * ptr = static_cast<uint8_t *>(mAllocation->getPtr());
- ptr += mAllocation->getType()->getLODOffset(mLOD, x, mY);
- return ptr;
+void Adapter1D::data(Context *rsc, uint32_t x, uint32_t count, const void *data, size_t sizeBytes) {
+ mAllocation->data(rsc, x, mY, mLOD, mFace, count, 1, data, sizeBytes);
}
-void Adapter1D::subData(uint32_t xoff, uint32_t count, const void *data) {
- if (mAllocation.get() && mAllocation.get()->getType()) {
- void *ptr = getElement(xoff);
- count *= mAllocation.get()->getType()->getElementSizeBytes();
- memcpy(ptr, data, count);
- }
-}
-
-void Adapter1D::data(const void *data) {
- memcpy(getElement(0),
- data,
- mAllocation.get()->getType()->getSizeBytes());
-}
-
-void Adapter1D::serialize(OStream *stream) const {
+void Adapter1D::serialize(Context *rsc, OStream *stream) const {
}
Adapter1D *Adapter1D::createFromStream(Context *rsc, IStream *stream) {
@@ -98,7 +79,7 @@
a->setLOD(value);
break;
case RS_DIMENSION_FACE:
- a->setFace(value);
+ a->setFace((RsAllocationCubemapFace)value);
break;
default:
rsAssert(!"Unimplemented constraint");
@@ -106,16 +87,6 @@
}
}
-void rsi_Adapter1DSubData(Context *rsc, RsAdapter1D va, uint32_t xoff, uint32_t count, const void *data) {
- Adapter1D * a = static_cast<Adapter1D *>(va);
- a->subData(xoff, count, data);
-}
-
-void rsi_Adapter1DData(Context *rsc, RsAdapter1D va, const void *data) {
- Adapter1D * a = static_cast<Adapter1D *>(va);
- a->data(data);
-}
-
}
}
@@ -133,51 +104,17 @@
void Adapter2D::reset() {
mZ = 0;
mLOD = 0;
- mFace = 0;
+ mFace = RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X;
}
-void * Adapter2D::getElement(uint32_t x, uint32_t y) const {
- rsAssert(mAllocation.get());
- rsAssert(mAllocation->getPtr());
- rsAssert(mAllocation->getType());
- if (mFace != 0 && !mAllocation->getType()->getDimFaces()) {
- ALOGE("Adapter wants cubemap face, but allocation has none");
- return NULL;
- }
- uint8_t * ptr = static_cast<uint8_t *>(mAllocation->getPtr());
- ptr += mAllocation->getType()->getLODOffset(mLOD, x, y);
-
- if (mFace != 0) {
- uint32_t totalSizeBytes = mAllocation->getType()->getSizeBytes();
- uint32_t faceOffset = totalSizeBytes / 6;
- ptr += faceOffset * mFace;
- }
- return ptr;
+void Adapter2D::data(Context *rsc, uint32_t x, uint32_t y, uint32_t w, uint32_t h,
+ const void *data, size_t sizeBytes) {
+ mAllocation->data(rsc, x, y, mLOD, mFace, w, h, data, sizeBytes);
}
-void Adapter2D::subData(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h, const void *data) {
- rsAssert(mAllocation.get());
- rsAssert(mAllocation->getPtr());
- rsAssert(mAllocation->getType());
- uint32_t eSize = mAllocation.get()->getType()->getElementSizeBytes();
- uint32_t lineSize = eSize * w;
-
- const uint8_t *src = static_cast<const uint8_t *>(data);
- for (uint32_t line=yoff; line < (yoff+h); line++) {
- memcpy(getElement(xoff, line), src, lineSize);
- src += lineSize;
- }
-}
-
-void Adapter2D::data(const void *data) {
- memcpy(getElement(0,0),
- data,
- mAllocation.get()->getType()->getSizeBytes());
-}
-
-void Adapter2D::serialize(OStream *stream) const {
+void Adapter2D::serialize(Context *rsc, OStream *stream) const {
}
Adapter2D *Adapter2D::createFromStream(Context *rsc, IStream *stream) {
@@ -216,7 +153,7 @@
a->setLOD(value);
break;
case RS_DIMENSION_FACE:
- a->setFace(value);
+ a->setFace((RsAllocationCubemapFace)value);
break;
default:
rsAssert(!"Unimplemented constraint");
@@ -224,15 +161,6 @@
}
}
-void rsi_Adapter2DData(Context *rsc, RsAdapter2D va, const void *data) {
- Adapter2D * a = static_cast<Adapter2D *>(va);
- a->data(data);
-}
-
-void rsi_Adapter2DSubData(Context *rsc, RsAdapter2D va, uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h, const void *data) {
- Adapter2D * a = static_cast<Adapter2D *>(va);
- a->subData(xoff, yoff, w, h, data);
-}
}
}
diff --git a/rsAdapter.h b/rsAdapter.h
index d150789..7b189cf 100644
--- a/rsAdapter.h
+++ b/rsAdapter.h
@@ -32,7 +32,6 @@
Adapter1D(Context *);
Adapter1D(Context *, Allocation *);
void reset();
- void * getElement(uint32_t x);
void setAllocation(Allocation *a) {mAllocation.set(a);}
@@ -43,13 +42,11 @@
inline void setY(uint32_t y) {mY = y;}
inline void setZ(uint32_t z) {mZ = z;}
inline void setLOD(uint32_t lod) {mLOD = lod;}
- inline void setFace(uint32_t face) {mFace = face;}
- //void setArray(uint32_t num, uint32_t value);
+ inline void setFace(RsAllocationCubemapFace face) {mFace = face;}
- void subData(uint32_t xoff, uint32_t count, const void *data);
- void data(const void *data);
+ void data(Context *rsc, uint32_t xoff, uint32_t count, const void *data, size_t sizeBytes);
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ADAPTER_1D; }
static Adapter1D *createFromStream(Context *rsc, IStream *stream);
@@ -58,7 +55,7 @@
uint32_t mY;
uint32_t mZ;
uint32_t mLOD;
- uint32_t mFace;
+ RsAllocationCubemapFace mFace;
};
class Adapter2D : public ObjectBase {
@@ -69,7 +66,6 @@
Adapter2D(Context *);
Adapter2D(Context *, Allocation *);
void reset();
- void * getElement(uint32_t x, uint32_t y) const;
uint32_t getDimX() const {return mAllocation->getType()->getLODDimX(mLOD);}
uint32_t getDimY() const {return mAllocation->getType()->getLODDimY(mLOD);}
@@ -78,13 +74,12 @@
void setAllocation(Allocation *a) {mAllocation.set(a);}
inline void setZ(uint32_t z) {mZ = z;}
inline void setLOD(uint32_t lod) {mLOD = lod;}
- inline void setFace(uint32_t face) {mFace = face;}
- //void setArray(uint32_t num, uint32_t value);
+ inline void setFace(RsAllocationCubemapFace face) {mFace = face;}
- void data(const void *data);
- void subData(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h, const void *data);
+ void data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h,
+ const void *data, size_t sizeBytes);
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ADAPTER_2D; }
static Adapter2D *createFromStream(Context *rsc, IStream *stream);
@@ -92,7 +87,7 @@
ObjectBaseRef<Allocation> mAllocation;
uint32_t mZ;
uint32_t mLOD;
- uint32_t mFace;
+ RsAllocationCubemapFace mFace;
};
}
diff --git a/rsAllocation.cpp b/rsAllocation.cpp
index 5d09936..ca747e7 100644
--- a/rsAllocation.cpp
+++ b/rsAllocation.cpp
@@ -60,6 +60,7 @@
mHal.state.hasMipmaps = type->getDimLOD();
mHal.state.elementSizeBytes = type->getElementSizeBytes();
mHal.state.hasReferences = mHal.state.type->getElement()->getHasReferences();
+ mHal.state.eType = mHal.state.type->getElement()->getType();
}
Allocation::~Allocation() {
@@ -72,10 +73,6 @@
rsc->mHal.funcs.allocation.syncAll(rsc, this, src);
}
-void Allocation::read(void *data) {
- memcpy(data, getPtr(), mHal.state.type->getSizeBytes());
-}
-
void Allocation::data(Context *rsc, uint32_t xoff, uint32_t lod,
uint32_t count, const void *data, size_t sizeBytes) {
const size_t eSize = mHal.state.type->getElementSizeBytes();
@@ -113,6 +110,39 @@
uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes) {
}
+void Allocation::read(Context *rsc, uint32_t xoff, uint32_t lod,
+ uint32_t count, void *data, size_t sizeBytes) {
+ const size_t eSize = mHal.state.type->getElementSizeBytes();
+
+ if ((count * eSize) != sizeBytes) {
+ ALOGE("Allocation::read called with mismatched size expected %zu, got %zu",
+ (count * eSize), sizeBytes);
+ mHal.state.type->dumpLOGV("type info");
+ return;
+ }
+
+ rsc->mHal.funcs.allocation.read1D(rsc, this, xoff, lod, count, data, sizeBytes);
+}
+
+void Allocation::read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, void *data, size_t sizeBytes) {
+ const size_t eSize = mHal.state.elementSizeBytes;
+ const size_t lineSize = eSize * w;
+
+ if ((lineSize * h) != sizeBytes) {
+ ALOGE("Allocation size mismatch, expected %zu, got %zu", (lineSize * h), sizeBytes);
+ rsAssert(!"Allocation::read called with mismatched size");
+ return;
+ }
+
+ rsc->mHal.funcs.allocation.read2D(rsc, this, xoff, yoff, lod, face, w, h, data, sizeBytes);
+}
+
+void Allocation::read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff,
+ uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes) {
+}
+
void Allocation::elementData(Context *rsc, uint32_t x, const void *data,
uint32_t cIdx, size_t sizeBytes) {
size_t eSize = mHal.state.elementSizeBytes;
@@ -199,7 +229,7 @@
}
ALOGV("%s allocation ptr=%p mUsageFlags=0x04%x, mMipmapControl=0x%04x",
- prefix, getPtr(), mHal.state.usageFlags, mHal.state.mipmapControl);
+ prefix, mHal.drvState.mallocPtrLOD0, mHal.state.usageFlags, mHal.state.mipmapControl);
}
uint32_t Allocation::getPackedSize() const {
@@ -207,7 +237,7 @@
return numItems * mHal.state.type->getElement()->getSizeBytesUnpadded();
}
-void Allocation::writePackedData(const Type *type,
+void Allocation::writePackedData(Context *rsc, const Type *type,
uint8_t *dst, const uint8_t *src, bool dstPadded) {
const Element *elem = type->getElement();
uint32_t unpaddedBytes = elem->getSizeBytesUnpadded();
@@ -256,28 +286,30 @@
delete[] sizeUnpadded;
}
-void Allocation::unpackVec3Allocation(const void *data, size_t dataSize) {
+void Allocation::unpackVec3Allocation(Context *rsc, const void *data, size_t dataSize) {
const uint8_t *src = (const uint8_t*)data;
- uint8_t *dst = (uint8_t*)getPtr();
+ uint8_t *dst = (uint8_t *)rsc->mHal.funcs.allocation.lock1D(rsc, this);
- writePackedData(getType(), dst, src, true);
+ writePackedData(rsc, getType(), dst, src, true);
+ rsc->mHal.funcs.allocation.unlock1D(rsc, this);
}
-void Allocation::packVec3Allocation(OStream *stream) const {
+void Allocation::packVec3Allocation(Context *rsc, OStream *stream) const {
uint32_t paddedBytes = getType()->getElement()->getSizeBytes();
uint32_t unpaddedBytes = getType()->getElement()->getSizeBytesUnpadded();
uint32_t numItems = mHal.state.type->getSizeBytes() / paddedBytes;
- const uint8_t *src = (const uint8_t*)getPtr();
+ const uint8_t *src = (const uint8_t*)rsc->mHal.funcs.allocation.lock1D(rsc, this);
uint8_t *dst = new uint8_t[numItems * unpaddedBytes];
- writePackedData(getType(), dst, src, false);
+ writePackedData(rsc, getType(), dst, src, false);
stream->addByteArray(dst, getPackedSize());
delete[] dst;
+ rsc->mHal.funcs.allocation.unlock1D(rsc, this);
}
-void Allocation::serialize(OStream *stream) const {
+void Allocation::serialize(Context *rsc, OStream *stream) const {
// Need to identify ourselves
stream->addU32((uint32_t)getClassId());
@@ -286,7 +318,7 @@
// First thing we need to serialize is the type object since it will be needed
// to initialize the class
- mHal.state.type->serialize(stream);
+ mHal.state.type->serialize(rsc, stream);
uint32_t dataSize = mHal.state.type->getSizeBytes();
// 3 element vectors are padded to 4 in memory, but padding isn't serialized
@@ -295,10 +327,11 @@
stream->addU32(packedSize);
if (dataSize == packedSize) {
// Now write the data
- stream->addByteArray(getPtr(), dataSize);
+ stream->addByteArray(rsc->mHal.funcs.allocation.lock1D(rsc, this), dataSize);
+ rsc->mHal.funcs.allocation.unlock1D(rsc, this);
} else {
// Now write the data
- packVec3Allocation(stream);
+ packVec3Allocation(rsc, stream);
}
}
@@ -341,7 +374,7 @@
// Read in all of our allocation data
alloc->data(rsc, 0, 0, count, stream->getPtr() + stream->getPos(), dataSize);
} else {
- alloc->unpackVec3Allocation(stream->getPtr() + stream->getPos(), dataSize);
+ alloc->unpackVec3Allocation(rsc, stream->getPtr() + stream->getPos(), dataSize);
}
stream->reset(stream->getPos() + dataSize);
@@ -367,7 +400,9 @@
}
void Allocation::freeChildrenUnlocked () {
- decRefs(getPtr(), mHal.state.type->getSizeBytes() / mHal.state.type->getElementSizeBytes(), 0);
+ void *ptr = mRSC->mHal.funcs.allocation.lock1D(mRSC, this);
+ decRefs(ptr, mHal.state.type->getSizeBytes() / mHal.state.type->getElementSizeBytes(), 0);
+ mRSC->mHal.funcs.allocation.unlock1D(mRSC, this);
}
bool Allocation::freeChildren() {
@@ -390,7 +425,8 @@
ObjectBaseRef<Type> t = mHal.state.type->cloneAndResize1D(rsc, dimX);
if (dimX < oldDimX) {
- decRefs(getPtr(), oldDimX - dimX, dimX);
+ decRefs(rsc->mHal.funcs.allocation.lock1D(rsc, this), oldDimX - dimX, dimX);
+ rsc->mHal.funcs.allocation.unlock1D(rsc, this);
}
rsc->mHal.funcs.allocation.resize(rsc, this, t.get(), mHal.state.hasReferences);
setType(t.get());
@@ -447,76 +483,6 @@
namespace android {
namespace renderscript {
-static void AllocationGenerateScriptMips(RsContext con, RsAllocation va);
-
-static void mip565(const Adapter2D &out, const Adapter2D &in) {
- uint32_t w = out.getDimX();
- uint32_t h = out.getDimY();
-
- for (uint32_t y=0; y < h; y++) {
- uint16_t *oPtr = static_cast<uint16_t *>(out.getElement(0, y));
- const uint16_t *i1 = static_cast<uint16_t *>(in.getElement(0, y*2));
- const uint16_t *i2 = static_cast<uint16_t *>(in.getElement(0, y*2+1));
-
- for (uint32_t x=0; x < w; x++) {
- *oPtr = rsBoxFilter565(i1[0], i1[1], i2[0], i2[1]);
- oPtr ++;
- i1 += 2;
- i2 += 2;
- }
- }
-}
-
-static void mip8888(const Adapter2D &out, const Adapter2D &in) {
- uint32_t w = out.getDimX();
- uint32_t h = out.getDimY();
-
- for (uint32_t y=0; y < h; y++) {
- uint32_t *oPtr = static_cast<uint32_t *>(out.getElement(0, y));
- const uint32_t *i1 = static_cast<uint32_t *>(in.getElement(0, y*2));
- const uint32_t *i2 = static_cast<uint32_t *>(in.getElement(0, y*2+1));
-
- for (uint32_t x=0; x < w; x++) {
- *oPtr = rsBoxFilter8888(i1[0], i1[1], i2[0], i2[1]);
- oPtr ++;
- i1 += 2;
- i2 += 2;
- }
- }
-}
-
-static void mip8(const Adapter2D &out, const Adapter2D &in) {
- uint32_t w = out.getDimX();
- uint32_t h = out.getDimY();
-
- for (uint32_t y=0; y < h; y++) {
- uint8_t *oPtr = static_cast<uint8_t *>(out.getElement(0, y));
- const uint8_t *i1 = static_cast<uint8_t *>(in.getElement(0, y*2));
- const uint8_t *i2 = static_cast<uint8_t *>(in.getElement(0, y*2+1));
-
- for (uint32_t x=0; x < w; x++) {
- *oPtr = (uint8_t)(((uint32_t)i1[0] + i1[1] + i2[0] + i2[1]) * 0.25f);
- oPtr ++;
- i1 += 2;
- i2 += 2;
- }
- }
-}
-
-static void mip(const Adapter2D &out, const Adapter2D &in) {
- switch (out.getBaseType()->getElement()->getSizeBits()) {
- case 32:
- mip8888(out, in);
- break;
- case 16:
- mip565(out, in);
- break;
- case 8:
- mip8(out, in);
- break;
- }
-}
-
void rsi_AllocationSyncAll(Context *rsc, RsAllocation va, RsAllocationUsageType src) {
Allocation *a = static_cast<Allocation *>(va);
a->sendDirty(rsc);
@@ -524,21 +490,15 @@
}
void rsi_AllocationGenerateMipmaps(Context *rsc, RsAllocation va) {
- Allocation *texAlloc = static_cast<Allocation *>(va);
- AllocationGenerateScriptMips(rsc, texAlloc);
+ Allocation *alloc = static_cast<Allocation *>(va);
+ rsc->mHal.funcs.allocation.generateMipmaps(rsc, alloc);
}
-void rsi_AllocationCopyToBitmap(Context *rsc, RsAllocation va, void *data, size_t dataLen) {
- Allocation *texAlloc = static_cast<Allocation *>(va);
- const Type * t = texAlloc->getType();
-
- size_t s = t->getDimX() * t->getDimY() * t->getElementSizeBytes();
- if (s != dataLen) {
- rsc->setError(RS_ERROR_BAD_VALUE, "Bitmap size didn't match allocation size");
- return;
- }
-
- memcpy(data, texAlloc->getPtr(), s);
+void rsi_AllocationCopyToBitmap(Context *rsc, RsAllocation va, void *data, size_t sizeBytes) {
+ Allocation *a = static_cast<Allocation *>(va);
+ const Type * t = a->getType();
+ a->read(rsc, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
+ t->getDimX(), t->getDimY(), data, sizeBytes);
}
void rsi_Allocation1DData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t lod,
@@ -565,9 +525,16 @@
a->data(rsc, xoff, yoff, lod, face, w, h, data, sizeBytes);
}
-void rsi_AllocationRead(Context *rsc, RsAllocation va, void *data, size_t data_length) {
+void rsi_AllocationRead(Context *rsc, RsAllocation va, void *data, size_t sizeBytes) {
Allocation *a = static_cast<Allocation *>(va);
- a->read(data);
+ const Type * t = a->getType();
+ if(t->getDimY()) {
+ a->read(rsc, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
+ t->getDimX(), t->getDimY(), data, sizeBytes);
+ } else {
+ a->read(rsc, 0, 0, t->getDimX(), data, sizeBytes);
+ }
+
}
void rsi_AllocationResize1D(Context *rsc, RsAllocation va, uint32_t dimX) {
@@ -580,23 +547,6 @@
a->resize2D(rsc, dimX, dimY);
}
-static void AllocationGenerateScriptMips(RsContext con, RsAllocation va) {
- Context *rsc = static_cast<Context *>(con);
- Allocation *texAlloc = static_cast<Allocation *>(va);
- uint32_t numFaces = texAlloc->getType()->getDimFaces() ? 6 : 1;
- for (uint32_t face = 0; face < numFaces; face ++) {
- Adapter2D adapt(rsc, texAlloc);
- Adapter2D adapt2(rsc, texAlloc);
- adapt.setFace(face);
- adapt2.setFace(face);
- for (uint32_t lod=0; lod < (texAlloc->getType()->getLODCount() -1); lod++) {
- adapt.setLOD(lod);
- adapt2.setLOD(lod + 1);
- mip(adapt2, adapt);
- }
- }
-}
-
RsAllocation rsi_AllocationCreateTyped(Context *rsc, RsType vtype,
RsAllocationMipmapControl mips,
uint32_t usages, uint32_t ptr) {
@@ -610,7 +560,7 @@
RsAllocation rsi_AllocationCreateFromBitmap(Context *rsc, RsType vtype,
RsAllocationMipmapControl mips,
- const void *data, size_t data_length, uint32_t usages) {
+ const void *data, size_t sizeBytes, uint32_t usages) {
Type *t = static_cast<Type *>(vtype);
RsAllocation vTexAlloc = rsi_AllocationCreateTyped(rsc, vtype, mips, usages, 0);
@@ -620,9 +570,10 @@
return NULL;
}
- memcpy(texAlloc->getPtr(), data, t->getDimX() * t->getDimY() * t->getElementSizeBytes());
+ texAlloc->data(rsc, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X,
+ t->getDimX(), t->getDimY(), data, sizeBytes);
if (mips == RS_ALLOCATION_MIPMAP_FULL) {
- AllocationGenerateScriptMips(rsc, texAlloc);
+ rsc->mHal.funcs.allocation.generateMipmaps(rsc, texAlloc);
}
texAlloc->sendDirty(rsc);
@@ -631,7 +582,7 @@
RsAllocation rsi_AllocationCubeCreateFromBitmap(Context *rsc, RsType vtype,
RsAllocationMipmapControl mips,
- const void *data, size_t data_length, uint32_t usages) {
+ const void *data, size_t sizeBytes, uint32_t usages) {
Type *t = static_cast<Type *>(vtype);
// Cubemap allocation's faces should be Width by Width each.
@@ -650,11 +601,9 @@
uint8_t *sourcePtr = (uint8_t*)data;
for (uint32_t face = 0; face < 6; face ++) {
- Adapter2D faceAdapter(rsc, texAlloc);
- faceAdapter.setFace(face);
-
for (uint32_t dI = 0; dI < faceSize; dI ++) {
- memcpy(faceAdapter.getElement(0, dI), sourcePtr + strideBytes * dI, copySize);
+ texAlloc->data(rsc, 0, dI, 0, (RsAllocationCubemapFace)face,
+ t->getDimX(), 1, sourcePtr + strideBytes * dI, copySize);
}
// Move the data pointer to the next cube face
@@ -662,7 +611,7 @@
}
if (mips == RS_ALLOCATION_MIPMAP_FULL) {
- AllocationGenerateScriptMips(rsc, texAlloc);
+ rsc->mHal.funcs.allocation.generateMipmaps(rsc, texAlloc);
}
texAlloc->sendDirty(rsc);
diff --git a/rsAllocation.h b/rsAllocation.h
index dce09ed..4fccf9d 100644
--- a/rsAllocation.h
+++ b/rsAllocation.h
@@ -41,6 +41,8 @@
// The graphics equivalent of malloc. The allocation contains a structure of elements.
public:
+ const static int MAX_LOD = 16;
+
struct Hal {
void * drv;
@@ -63,12 +65,13 @@
int32_t surfaceTextureID;
ANativeWindow *wndSurface;
SurfaceTexture *surfaceTexture;
+ RsDataType eType;
};
State state;
struct DrvState {
- mutable void * mallocPtr;
- mutable uint32_t stride;
+ mutable void * mallocPtrLOD0;
+ mutable uint32_t strideLOD0;
} drvState;
};
@@ -80,7 +83,6 @@
virtual ~Allocation();
void updateCache();
- void * getPtr() const {return mHal.drvState.mallocPtr;}
const Type * getType() const {return mHal.state.type;}
void syncAll(Context *rsc, RsAllocationUsageType src);
@@ -96,18 +98,22 @@
void data(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod, RsAllocationCubemapFace face,
uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes);
+ void read(Context *rsc, uint32_t xoff, uint32_t lod, uint32_t count, void *data, size_t sizeBytes);
+ void read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, void *data, size_t sizeBytes);
+ void read(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff, uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes);
+
void elementData(Context *rsc, uint32_t x,
const void *data, uint32_t elementOff, size_t sizeBytes);
void elementData(Context *rsc, uint32_t x, uint32_t y,
const void *data, uint32_t elementOff, size_t sizeBytes);
- void read(void *data);
-
void addProgramToDirty(const Program *);
void removeProgramToDirty(const Program *);
virtual void dumpLOGV(const char *prefix) const;
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ALLOCATION; }
static Allocation *createFromStream(Context *rsc, IStream *stream);
@@ -152,9 +158,10 @@
Allocation(Context *rsc, const Type *, uint32_t usages, RsAllocationMipmapControl mc, void *ptr);
uint32_t getPackedSize() const;
- static void writePackedData(const Type *type, uint8_t *dst, const uint8_t *src, bool dstPadded);
- void unpackVec3Allocation(const void *data, size_t dataSize);
- void packVec3Allocation(OStream *stream) const;
+ static void writePackedData(Context *rsc, const Type *type, uint8_t *dst,
+ const uint8_t *src, bool dstPadded);
+ void unpackVec3Allocation(Context *rsc, const void *data, size_t dataSize);
+ void packVec3Allocation(Context *rsc, OStream *stream) const;
};
}
diff --git a/rsAnimation.cpp b/rsAnimation.cpp
index a4093d9..f6da138 100644
--- a/rsAnimation.cpp
+++ b/rsAnimation.cpp
@@ -21,7 +21,7 @@
using namespace android;
using namespace android::renderscript;
-void Animation::serialize(OStream *stream) const {
+void Animation::serialize(Context *rsc, OStream *stream) const {
}
Animation *Animation::createFromStream(Context *rsc, IStream *stream) {
diff --git a/rsAnimation.h b/rsAnimation.h
index 526a081..4e0cc89 100644
--- a/rsAnimation.h
+++ b/rsAnimation.h
@@ -36,7 +36,7 @@
float eval(float) const;
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ANIMATION; }
static Animation *createFromStream(Context *rsc, IStream *stream);
diff --git a/rsDefines.h b/rsDefines.h
index 854df08..c3540f1 100644
--- a/rsDefines.h
+++ b/rsDefines.h
@@ -333,6 +333,12 @@
RS_CULL_INVALID = 100,
};
+enum RsScriptIntrinsicID {
+ RS_SCRIPT_INTRINSIC_ID_UNDEFINED = 0,
+ RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3 = 1,
+ RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5 = 2
+};
+
typedef struct {
RsA3DClassID classID;
const char* objectName;
diff --git a/rsElement.cpp b/rsElement.cpp
index fb2892c..3126c28 100644
--- a/rsElement.cpp
+++ b/rsElement.cpp
@@ -92,7 +92,7 @@
}
}
-void Element::serialize(OStream *stream) const {
+void Element::serialize(Context *rsc, OStream *stream) const {
// Need to identify ourselves
stream->addU32((uint32_t)getClassId());
@@ -106,7 +106,7 @@
for (uint32_t ct = 0; ct < mFieldCount; ct++) {
stream->addString(&mFields[ct].name);
stream->addU32(mFields[ct].arraySize);
- mFields[ct].e->serialize(stream);
+ mFields[ct].e->serialize(rsc, stream);
}
}
@@ -130,7 +130,7 @@
component.getType(),
component.getKind(),
component.getIsNormalized(),
- component.getVectorSize());;
+ component.getVectorSize());
}
const Element **subElems = new const Element *[fieldCount];
diff --git a/rsElement.h b/rsElement.h
index b86d3bc..57698f4 100644
--- a/rsElement.h
+++ b/rsElement.h
@@ -105,7 +105,7 @@
uint32_t getBitsUnpadded() const {return mBitsUnpadded;}
void dumpLOGV(const char *prefix) const;
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ELEMENT; }
static Element *createFromStream(Context *rsc, IStream *stream);
diff --git a/rsFileA3D.cpp b/rsFileA3D.cpp
index a52bf7e..07c413f 100644
--- a/rsFileA3D.cpp
+++ b/rsFileA3D.cpp
@@ -352,7 +352,7 @@
return true;
}
-void FileA3D::appendToFile(ObjectBase *obj) {
+void FileA3D::appendToFile(Context *con, ObjectBase *obj) {
if (!obj) {
return;
}
@@ -366,7 +366,7 @@
indexEntry->mOffset = mWriteStream->getPos();
indexEntry->mRsObj = obj;
mWriteIndex.push(indexEntry);
- obj->serialize(mWriteStream);
+ obj->serialize(con, mWriteStream);
indexEntry->mLength = mWriteStream->getPos() - indexEntry->mOffset;
mWriteStream->align(4);
}
diff --git a/rsFileA3D.h b/rsFileA3D.h
index cc38c8a..06b90d7 100644
--- a/rsFileA3D.h
+++ b/rsFileA3D.h
@@ -66,12 +66,12 @@
const A3DIndexEntry* getIndexEntry(size_t index) const;
ObjectBase *initializeFromEntry(size_t index);
- void appendToFile(ObjectBase *obj);
+ void appendToFile(Context *rsc, ObjectBase *obj);
bool writeFile(const char *filename);
// Currently files do not get serialized,
// but we need to inherit from ObjectBase for ref tracking
- virtual void serialize(OStream *stream) const {
+ virtual void serialize(Context *rsc, OStream *stream) const {
}
virtual RsA3DClassID getClassId() const {
return RS_A3D_CLASS_ID_UNKNOWN;
diff --git a/rsFont.cpp b/rsFont.cpp
index 1f53c79..82fb90f 100644
--- a/rsFont.cpp
+++ b/rsFont.cpp
@@ -118,7 +118,7 @@
FontState *state = &mRSC->mStateFont;
uint32_t cacheWidth = state->getCacheTextureType()->getDimX();
- const uint8_t* cacheBuffer = state->getTextTextureData();
+ const uint8_t* cacheBuffer = state->mCacheBuffer;
uint32_t cacheX = 0, cacheY = 0;
int32_t bX = 0, bY = 0;
@@ -453,7 +453,7 @@
uint32_t cacheWidth = getCacheTextureType()->getDimX();
- uint8_t *cacheBuffer = (uint8_t*)mTextTexture->getPtr();
+ uint8_t *cacheBuffer = mCacheBuffer;
uint8_t *bitmapBuffer = bitmap->buffer;
uint32_t cacheX = 0, bX = 0, cacheY = 0, bY = 0;
@@ -467,7 +467,10 @@
// This will dirty the texture and the shader so next time
// we draw it will upload the data
- mTextTexture->sendDirty(mRSC);
+ mRSC->mHal.funcs.allocation.data2D(mRSC, mTextTexture.get(), 0, 0, 0,
+ RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X, mCacheWidth, mCacheHeight,
+ mCacheBuffer, mCacheWidth*mCacheHeight);
+
mFontShaderF->bindTexture(mRSC, 0, mTextTexture.get());
// Some debug code
@@ -539,13 +542,16 @@
RS_KIND_PIXEL_A, true, 1);
// We will allocate a texture to initially hold 32 character bitmaps
+ mCacheHeight = 256;
+ mCacheWidth = 1024;
ObjectBaseRef<Type> texType = Type::getTypeRef(mRSC, alphaElem.get(),
- 1024, 256, 0, false, false);
+ mCacheWidth, mCacheHeight, 0, false, false);
+ mCacheBuffer = new uint8_t[mCacheWidth * mCacheHeight];
+
Allocation *cacheAlloc = Allocation::createAllocation(mRSC, texType.get(),
- RS_ALLOCATION_USAGE_SCRIPT | RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE);
+ RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE);
mTextTexture.set(cacheAlloc);
- mTextTexture->syncAll(mRSC, RS_ALLOCATION_USAGE_SCRIPT);
// Split up our cache texture into lines of certain widths
int32_t nextLine = 0;
@@ -574,7 +580,7 @@
Allocation *indexAlloc = Allocation::createAllocation(mRSC, indexType.get(),
RS_ALLOCATION_USAGE_SCRIPT |
RS_ALLOCATION_USAGE_GRAPHICS_VERTEX);
- uint16_t *indexPtr = (uint16_t*)indexAlloc->getPtr();
+ uint16_t *indexPtr = (uint16_t*)mRSC->mHal.funcs.allocation.lock1D(mRSC, indexAlloc);
// Four verts, two triangles , six indices per quad
for (uint32_t i = 0; i < mMaxNumberOfQuads; i ++) {
@@ -606,12 +612,14 @@
Allocation *vertexAlloc = Allocation::createAllocation(mRSC, vertexDataType.get(),
RS_ALLOCATION_USAGE_SCRIPT);
- mTextMeshPtr = (float*)vertexAlloc->getPtr();
+ mTextMeshPtr = (float*)mRSC->mHal.funcs.allocation.lock1D(mRSC, vertexAlloc);
mMesh.set(new Mesh(mRSC, 1, 1));
mMesh->setVertexBuffer(vertexAlloc, 0);
mMesh->setPrimitive(indexAlloc, RS_PRIMITIVE_TRIANGLE, 0);
mMesh->init();
+ mRSC->mHal.funcs.allocation.unlock1D(mRSC, indexAlloc);
+ mRSC->mHal.funcs.allocation.unlock1D(mRSC, vertexAlloc);
}
// We don't want to allocate anything unless we actually draw text
diff --git a/rsFont.h b/rsFont.h
index 2bd30b7..8f43a2a 100644
--- a/rsFont.h
+++ b/rsFont.h
@@ -67,7 +67,7 @@
// Currently files do not get serialized,
// but we need to inherit from ObjectBase for ref tracking
- virtual void serialize(OStream *stream) const {
+ virtual void serialize(Context *rsc, OStream *stream) const {
}
virtual RsA3DClassID getClassId() const {
return RS_A3D_CLASS_ID_UNKNOWN;
@@ -215,10 +215,11 @@
// Texture to cache glyph bitmaps
ObjectBaseRef<Allocation> mTextTexture;
+ uint8_t *mCacheBuffer;
+ uint32_t mCacheWidth;
+ uint32_t mCacheHeight;
+
void initTextTexture();
- const uint8_t* getTextTextureData() const {
- return (uint8_t*)mTextTexture->getPtr();
- }
#ifndef ANDROID_RS_SERIALIZE
bool cacheBitmap(FT_Bitmap_ *bitmap, uint32_t *retOriginX, uint32_t *retOriginY);
diff --git a/rsMatrix2x2.h b/rsMatrix2x2.h
index 4dcb84a..4fbd1c2 100644
--- a/rsMatrix2x2.h
+++ b/rsMatrix2x2.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2011-2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -25,12 +25,12 @@
namespace renderscript {
struct Matrix2x2 : public rs_matrix2x2 {
- inline float get(uint32_t row, uint32_t col) const {
- return m[row*2 + col];
+ inline float get(uint32_t x, uint32_t y) const {
+ return m[x*2 + y];
}
- inline void set(uint32_t row, uint32_t col, float v) {
- m[row*2 + col] = v;
+ inline void set(uint32_t x, uint32_t y, float v) {
+ m[x*2 + y] = v;
}
void loadIdentity();
@@ -51,12 +51,4 @@
}
}
-
-
-
-#endif
-
-
-
-
-
+#endif // ANDROID_RS_MATRIX_2x2_H
diff --git a/rsMatrix3x3.h b/rsMatrix3x3.h
index f96d270..05249b1 100644
--- a/rsMatrix3x3.h
+++ b/rsMatrix3x3.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2011-2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -25,12 +25,12 @@
namespace renderscript {
struct Matrix3x3 : public rs_matrix3x3 {
- inline float get(uint32_t row, uint32_t col) const {
- return m[row*3 + col];
+ inline float get(uint32_t x, uint32_t y) const {
+ return m[x*3 + y];
}
- inline void set(uint32_t row, uint32_t col, float v) {
- m[row*3 + col] = v;
+ inline void set(uint32_t x, uint32_t y, float v) {
+ m[x*3 + y] = v;
}
void loadIdentity();
@@ -51,12 +51,4 @@
}
}
-
-
-
-#endif
-
-
-
-
-
+#endif // ANDROID_RS_MATRIX_3x3_H
diff --git a/rsMatrix4x4.h b/rsMatrix4x4.h
index d30184f..44c33d1 100644
--- a/rsMatrix4x4.h
+++ b/rsMatrix4x4.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009 The Android Open Source Project
+ * Copyright (C) 2009-2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -25,12 +25,12 @@
namespace renderscript {
struct Matrix4x4 : public rs_matrix4x4 {
- float get(uint32_t row, uint32_t col) const {
- return m[row*4 + col];
+ float get(uint32_t x, uint32_t y) const {
+ return m[x*4 + y];
}
- void set(uint32_t row, uint32_t col, float v) {
- m[row*4 + col] = v;
+ void set(uint32_t x, uint32_t y, float v) {
+ m[x*4 + y] = v;
}
void loadIdentity();
@@ -82,11 +82,4 @@
}
}
-
-
-
-#endif
-
-
-
-
+#endif // ANDROID_RS_MATRIX_4x4_H
diff --git a/rsMesh.cpp b/rsMesh.cpp
index 399a52b..651a8f3 100644
--- a/rsMesh.cpp
+++ b/rsMesh.cpp
@@ -78,7 +78,7 @@
#endif
}
-void Mesh::serialize(OStream *stream) const {
+void Mesh::serialize(Context *rsc, OStream *stream) const {
// Need to identify ourselves
stream->addU32((uint32_t)getClassId());
@@ -88,7 +88,7 @@
// Store number of vertex streams
stream->addU32(mHal.state.vertexBuffersCount);
for (uint32_t vCount = 0; vCount < mHal.state.vertexBuffersCount; vCount ++) {
- mHal.state.vertexBuffers[vCount]->serialize(stream);
+ mHal.state.vertexBuffers[vCount]->serialize(rsc, stream);
}
stream->addU32(mHal.state.primitivesCount);
@@ -98,7 +98,7 @@
if (mHal.state.indexBuffers[pCount]) {
stream->addU32(1);
- mHal.state.indexBuffers[pCount]->serialize(stream);
+ mHal.state.indexBuffers[pCount]->serialize(rsc, stream);
} else {
stream->addU32(0);
}
@@ -215,11 +215,12 @@
}
}
-void Mesh::computeBBox() {
+void Mesh::computeBBox(Context *rsc) {
float *posPtr = NULL;
uint32_t vectorSize = 0;
uint32_t stride = 0;
uint32_t numVerts = 0;
+ Allocation *posAlloc = NULL;
// First we need to find the position ptr and stride
for (uint32_t ct=0; ct < mHal.state.vertexBuffersCount; ct++) {
const Type *bufferType = mHal.state.vertexBuffers[ct]->getType();
@@ -230,7 +231,10 @@
vectorSize = bufferElem->getField(ct)->getComponent().getVectorSize();
stride = bufferElem->getSizeBytes() / sizeof(float);
uint32_t offset = bufferElem->getFieldOffsetBytes(ct);
- posPtr = (float*)((uint8_t*)mHal.state.vertexBuffers[ct]->getPtr() + offset);
+ posAlloc = mHal.state.vertexBuffers[ct];
+ const uint8_t *bp = (const uint8_t *)rsc->mHal.funcs.allocation.lock1D(
+ rsc, posAlloc);
+ posPtr = (float*)(bp + offset);
numVerts = bufferType->getDimX();
break;
}
@@ -256,6 +260,10 @@
}
posPtr += stride;
}
+
+ if (posAlloc) {
+ rsc->mHal.funcs.allocation.unlock1D(rsc, posAlloc);
+ }
}
namespace android {
diff --git a/rsMesh.h b/rsMesh.h
index 7ca63cf..9b61ebe 100644
--- a/rsMesh.h
+++ b/rsMesh.h
@@ -59,7 +59,7 @@
Mesh(Context *, uint32_t vertexBuffersCount, uint32_t primitivesCount);
~Mesh();
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_MESH; }
static Mesh *createFromStream(Context *rsc, IStream *stream);
void init();
@@ -83,7 +83,7 @@
// Bounding volumes
float mBBoxMin[3];
float mBBoxMax[3];
- void computeBBox();
+ void computeBBox(Context *rsc);
protected:
ObjectBaseRef<Allocation> *mVertexBuffers;
ObjectBaseRef<Allocation> *mIndexBuffers;
diff --git a/rsObjectBase.h b/rsObjectBase.h
index 586da19..f16acd9 100644
--- a/rsObjectBase.h
+++ b/rsObjectBase.h
@@ -58,7 +58,7 @@
static void dumpAll(Context *rsc);
virtual void dumpLOGV(const char *prefix) const;
- virtual void serialize(OStream *stream) const = 0;
+ virtual void serialize(Context *rsc, OStream *stream) const = 0;
virtual RsA3DClassID getClassId() const = 0;
static bool isValid(const Context *rsc, const ObjectBase *obj);
diff --git a/rsPath.cpp b/rsPath.cpp
index 055bb86..bcf4b8e 100644
--- a/rsPath.cpp
+++ b/rsPath.cpp
@@ -59,7 +59,7 @@
void Path::render(Context *rsc) {
}
-void Path::serialize(OStream *stream) const {
+void Path::serialize(Context *rsc, OStream *stream) const {
}
diff --git a/rsPath.h b/rsPath.h
index 1abfc9a..ea14335 100644
--- a/rsPath.h
+++ b/rsPath.h
@@ -43,7 +43,7 @@
~Path();
void render(Context *);
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const;
private:
diff --git a/rsProgramFragment.cpp b/rsProgramFragment.cpp
index bebde1e..438b620 100644
--- a/rsProgramFragment.cpp
+++ b/rsProgramFragment.cpp
@@ -53,8 +53,10 @@
mConstantColor[1] = g;
mConstantColor[2] = b;
mConstantColor[3] = a;
- memcpy(mHal.state.constants[0]->getPtr(), mConstantColor, 4*sizeof(float));
+ void *p = rsc->mHal.funcs.allocation.lock1D(rsc, mHal.state.constants[0]);
+ memcpy(p, mConstantColor, 4*sizeof(float));
mDirty = true;
+ rsc->mHal.funcs.allocation.unlock1D(rsc, mHal.state.constants[0]);
}
void ProgramFragment::setup(Context *rsc, ProgramFragmentState *state) {
@@ -74,7 +76,7 @@
rsc->mHal.funcs.fragment.setActive(rsc, this);
}
-void ProgramFragment::serialize(OStream *stream) const {
+void ProgramFragment::serialize(Context *rsc, OStream *stream) const {
}
ProgramFragment *ProgramFragment::createFromStream(Context *rsc, IStream *stream) {
diff --git a/rsProgramFragment.h b/rsProgramFragment.h
index 4eb28e7..d580252 100644
--- a/rsProgramFragment.h
+++ b/rsProgramFragment.h
@@ -34,7 +34,7 @@
virtual void setup(Context *, ProgramFragmentState *);
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_FRAGMENT; }
static ProgramFragment *createFromStream(Context *rsc, IStream *stream);
diff --git a/rsProgramRaster.cpp b/rsProgramRaster.cpp
index 94bfe42..4f27f2e 100644
--- a/rsProgramRaster.cpp
+++ b/rsProgramRaster.cpp
@@ -53,7 +53,7 @@
rsc->mHal.funcs.raster.setActive(rsc, this);
}
-void ProgramRaster::serialize(OStream *stream) const {
+void ProgramRaster::serialize(Context *rsc, OStream *stream) const {
}
ProgramRaster *ProgramRaster::createFromStream(Context *rsc, IStream *stream) {
diff --git a/rsProgramRaster.h b/rsProgramRaster.h
index c552ea3..e9a524b 100644
--- a/rsProgramRaster.h
+++ b/rsProgramRaster.h
@@ -46,7 +46,7 @@
Hal mHal;
virtual void setup(const Context *, ProgramRasterState *);
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_RASTER; }
static ProgramRaster *createFromStream(Context *rsc, IStream *stream);
diff --git a/rsProgramStore.cpp b/rsProgramStore.cpp
index 7e25a22..83c1f2c 100644
--- a/rsProgramStore.cpp
+++ b/rsProgramStore.cpp
@@ -63,7 +63,7 @@
rsc->mHal.funcs.store.setActive(rsc, this);
}
-void ProgramStore::serialize(OStream *stream) const {
+void ProgramStore::serialize(Context *rsc, OStream *stream) const {
}
ProgramStore *ProgramStore::createFromStream(Context *rsc, IStream *stream) {
diff --git a/rsProgramStore.h b/rsProgramStore.h
index 9bb2795..9a7f7f1 100644
--- a/rsProgramStore.h
+++ b/rsProgramStore.h
@@ -59,7 +59,7 @@
virtual void setup(const Context *, ProgramStoreState *);
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_STORE; }
static ProgramStore *createFromStream(Context *rsc, IStream *stream);
static ObjectBaseRef<ProgramStore> getProgramStore(Context *,
diff --git a/rsProgramVertex.cpp b/rsProgramVertex.cpp
index 23fcbe7..c2ce7ee 100644
--- a/rsProgramVertex.cpp
+++ b/rsProgramVertex.cpp
@@ -46,7 +46,8 @@
"Unable to set fixed function emulation matrices because allocation is missing");
return;
}
- float *f = static_cast<float *>(mHal.state.constants[0]->getPtr());
+ float *f = static_cast<float *>(rsc->mHal.funcs.allocation.lock1D(
+ rsc, mHal.state.constants[0]));
Matrix4x4 mvp;
mvp.load(&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET]);
Matrix4x4 t;
@@ -55,6 +56,7 @@
for (uint32_t i = 0; i < 16; i ++) {
f[RS_PROGRAM_VERTEX_MVP_OFFSET + i] = mvp.m[i];
}
+ rsc->mHal.funcs.allocation.unlock1D(rsc, mHal.state.constants[0]);
}
state->mLast.set(this);
@@ -73,9 +75,11 @@
"Unable to set fixed function emulation matrix projection because allocation is missing");
return;
}
- float *f = static_cast<float *>(mHal.state.constants[0]->getPtr());
+ float *f = static_cast<float *>(rsc->mHal.funcs.allocation.lock1D(
+ rsc, mHal.state.constants[0]));
memcpy(&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET], m, sizeof(rsc_Matrix));
mDirty = true;
+ rsc->mHal.funcs.allocation.unlock1D(rsc, mHal.state.constants[0]);
}
void ProgramVertex::setModelviewMatrix(Context *rsc, const rsc_Matrix *m) const {
@@ -89,9 +93,11 @@
"Unable to set fixed function emulation matrix modelview because allocation is missing");
return;
}
- float *f = static_cast<float *>(mHal.state.constants[0]->getPtr());
+ float *f = static_cast<float *>(rsc->mHal.funcs.allocation.lock1D(
+ rsc, mHal.state.constants[0]));
memcpy(&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET], m, sizeof(rsc_Matrix));
mDirty = true;
+ rsc->mHal.funcs.allocation.unlock1D(rsc, mHal.state.constants[0]);
}
void ProgramVertex::setTextureMatrix(Context *rsc, const rsc_Matrix *m) const {
@@ -105,9 +111,11 @@
"Unable to set fixed function emulation matrix texture because allocation is missing");
return;
}
- float *f = static_cast<float *>(mHal.state.constants[0]->getPtr());
+ float *f = static_cast<float *>(rsc->mHal.funcs.allocation.lock1D(
+ rsc, mHal.state.constants[0]));
memcpy(&f[RS_PROGRAM_VERTEX_TEXTURE_OFFSET], m, sizeof(rsc_Matrix));
mDirty = true;
+ rsc->mHal.funcs.allocation.unlock1D(rsc, mHal.state.constants[0]);
}
void ProgramVertex::getProjectionMatrix(Context *rsc, rsc_Matrix *m) const {
@@ -121,22 +129,26 @@
"Unable to get fixed function emulation matrix projection because allocation is missing");
return;
}
- float *f = static_cast<float *>(mHal.state.constants[0]->getPtr());
+ float *f = static_cast<float *>(
+ rsc->mHal.funcs.allocation.lock1D(rsc, mHal.state.constants[0]));
memcpy(m, &f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET], sizeof(rsc_Matrix));
+ rsc->mHal.funcs.allocation.unlock1D(rsc, mHal.state.constants[0]);
}
void ProgramVertex::transformToScreen(Context *rsc, float *v4out, const float *v3in) const {
if (isUserProgram()) {
return;
}
- float *f = static_cast<float *>(mHal.state.constants[0]->getPtr());
+ float *f = static_cast<float *>(
+ rsc->mHal.funcs.allocation.lock1D(rsc, mHal.state.constants[0]));
Matrix4x4 mvp;
mvp.loadMultiply((Matrix4x4 *)&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET],
(Matrix4x4 *)&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET]);
mvp.vectorMultiply(v4out, v3in);
+ rsc->mHal.funcs.allocation.unlock1D(rsc, mHal.state.constants[0]);
}
-void ProgramVertex::serialize(OStream *stream) const {
+void ProgramVertex::serialize(Context *rsc, OStream *stream) const {
}
ProgramVertex *ProgramVertex::createFromStream(Context *rsc, IStream *stream) {
@@ -207,7 +219,7 @@
}
void ProgramVertexState::updateSize(Context *rsc) {
- float *f = static_cast<float *>(mDefaultAlloc->getPtr());
+ float *f = static_cast<float *>(rsc->mHal.funcs.allocation.lock1D(rsc, mDefaultAlloc.get()));
float surfaceWidth = (float)rsc->getCurrentSurfaceWidth();
float surfaceHeight = (float)rsc->getCurrentSurfaceHeight();
@@ -220,6 +232,7 @@
m.loadIdentity();
memcpy(&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET], m.m, sizeof(m));
memcpy(&f[RS_PROGRAM_VERTEX_TEXTURE_OFFSET], m.m, sizeof(m));
+ rsc->mHal.funcs.allocation.unlock1D(rsc, mDefaultAlloc.get());
}
void ProgramVertexState::deinit(Context *rsc) {
diff --git a/rsProgramVertex.h b/rsProgramVertex.h
index 67c2a88..105d065 100644
--- a/rsProgramVertex.h
+++ b/rsProgramVertex.h
@@ -41,7 +41,7 @@
void transformToScreen(Context *, float *v4out, const float *v3in) const;
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_VERTEX; }
static ProgramVertex *createFromStream(Context *rsc, IStream *stream);
};
diff --git a/rsRuntime.h b/rsRuntime.h
index 64f2de8..eff691b 100644
--- a/rsRuntime.h
+++ b/rsRuntime.h
@@ -149,7 +149,6 @@
uint32_t rsrToClient(Context *, Script *, int cmdID, void *data, int len);
uint32_t rsrToClientBlocking(Context *, Script *, int cmdID, void *data, int len);
-const Allocation * rsrGetAllocation(Context *, Script *, const void *ptr);
void rsrAllocationMarkDirty(Context *, Script *, RsAllocation a);
void rsrAllocationSyncAll(Context *, Script *, Allocation *a, RsAllocationUsageType source);
diff --git a/rsSampler.cpp b/rsSampler.cpp
index c7180bd..fededb1 100644
--- a/rsSampler.cpp
+++ b/rsSampler.cpp
@@ -68,7 +68,7 @@
ss->mSamplers[slot].clear();
}
-void Sampler::serialize(OStream *stream) const {
+void Sampler::serialize(Context *rsc, OStream *stream) const {
}
Sampler *Sampler::createFromStream(Context *rsc, IStream *stream) {
diff --git a/rsSampler.h b/rsSampler.h
index dea4cb6..81220a8 100644
--- a/rsSampler.h
+++ b/rsSampler.h
@@ -61,7 +61,7 @@
void bindToContext(SamplerState *, uint32_t slot);
void unbindFromContext(SamplerState *);
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_SAMPLER; }
static Sampler *createFromStream(Context *rsc, IStream *stream);
diff --git a/rsScript.cpp b/rsScript.cpp
index d39fb5e..25ee1a0 100644
--- a/rsScript.cpp
+++ b/rsScript.cpp
@@ -48,11 +48,7 @@
}
mSlots[slot].set(a);
- if (a != NULL) {
- mRSC->mHal.funcs.script.setGlobalBind(mRSC, this, slot, a->getPtr());
- } else {
- mRSC->mHal.funcs.script.setGlobalBind(mRSC, this, slot, NULL);
- }
+ mRSC->mHal.funcs.script.setGlobalBind(mRSC, this, slot, a);
}
void Script::setVar(uint32_t slot, const void *val, size_t len) {
@@ -97,7 +93,6 @@
Script *s = static_cast<Script *>(vs);
Allocation *a = static_cast<Allocation *>(va);
s->setSlot(slot, a);
- //ALOGE("rsi_ScriptBindAllocation %i %p %p", slot, a, a->getPtr());
}
void rsi_ScriptSetTimeZone(Context * rsc, RsScript vs, const char * timeZone, size_t length) {
diff --git a/rsScriptC.cpp b/rsScriptC.cpp
index 79725b9..466c18a 100644
--- a/rsScriptC.cpp
+++ b/rsScriptC.cpp
@@ -62,31 +62,10 @@
if (!mTypes[ct].get())
continue;
- void *ptr = NULL;
- if (mSlots[ct].get()) {
- ptr = mSlots[ct]->getPtr();
- }
-
- rsc->mHal.funcs.script.setGlobalBind(rsc, this, ct, ptr);
+ rsc->mHal.funcs.script.setGlobalBind(rsc, this, ct, mSlots[ct].get());
}
}
-const Allocation *ScriptC::ptrToAllocation(const void *ptr) const {
- //ALOGE("ptr to alloc %p", ptr);
- if (!ptr) {
- return NULL;
- }
- for (uint32_t ct=0; ct < mHal.info.exportedVariableCount; ct++) {
- if (!mSlots[ct].get())
- continue;
- if (mSlots[ct]->getPtr() == ptr) {
- return mSlots[ct].get();
- }
- }
- ALOGE("ScriptC::ptrToAllocation, failed to find %p", ptr);
- return NULL;
-}
-
void ScriptC::setupGLState(Context *rsc) {
if (mEnviroment.mFragmentStore.get()) {
rsc->setProgramStore(mEnviroment.mFragmentStore.get());
diff --git a/rsScriptC.h b/rsScriptC.h
index 92e1f4f..6bc41f2 100644
--- a/rsScriptC.h
+++ b/rsScriptC.h
@@ -38,10 +38,6 @@
ScriptC(Context *);
virtual ~ScriptC();
-
- const Allocation *ptrToAllocation(const void *) const;
-
-
virtual void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len);
virtual uint32_t run(Context *);
@@ -54,7 +50,7 @@
size_t usrBytes,
const RsScriptCall *sc = NULL);
- virtual void serialize(OStream *stream) const { }
+ virtual void serialize(Context *rsc, OStream *stream) const { }
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_SCRIPT_C; }
static Type *createFromStream(Context *rsc, IStream *stream) { return NULL; }
diff --git a/rsScriptC_Lib.cpp b/rsScriptC_Lib.cpp
index 749495d..ac3dd12 100644
--- a/rsScriptC_Lib.cpp
+++ b/rsScriptC_Lib.cpp
@@ -188,10 +188,6 @@
srcAlloc, srcXoff, srcYoff, srcMip, srcFace);
}
-const Allocation * rsrGetAllocation(Context *rsc, Script *s, const void *ptr) {
- ScriptC *sc = (ScriptC *)s;
- return sc->ptrToAllocation(ptr);
-}
}
}
diff --git a/rsScriptC_LibGL.cpp b/rsScriptC_LibGL.cpp
index 21b1c42..6a897a3 100644
--- a/rsScriptC_LibGL.cpp
+++ b/rsScriptC_LibGL.cpp
@@ -251,7 +251,7 @@
float *minX, float *minY, float *minZ,
float *maxX, float *maxY, float *maxZ) {
CHECK_OBJ(sm);
- sm->computeBBox();
+ sm->computeBBox(rsc);
*minX = sm->mBBoxMin[0];
*minY = sm->mBBoxMin[1];
*minZ = sm->mBBoxMin[2];
@@ -285,9 +285,10 @@
}
void rsrDrawTextAlloc(Context *rsc, Script *sc, Allocation *a, int x, int y) {
- const char *text = (const char *)a->getPtr();
+ const char *text = (const char *)rsc->mHal.funcs.allocation.lock1D(rsc, a);
size_t allocSize = a->getType()->getSizeBytes();
rsc->mStateFont.renderText(text, allocSize, x, y);
+ rsc->mHal.funcs.allocation.unlock1D(rsc, a);
}
void rsrDrawText(Context *rsc, Script *sc, const char *text, int x, int y) {
@@ -314,11 +315,12 @@
void rsrMeasureTextAlloc(Context *rsc, Script *sc, Allocation *a,
int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) {
CHECK_OBJ(a);
- const char *text = (const char *)a->getPtr();
+ const char *text = (const char *)rsc->mHal.funcs.allocation.lock1D(rsc, a);
size_t textLen = a->getType()->getSizeBytes();
Font::Rect metrics;
rsc->mStateFont.measureText(text, textLen, &metrics);
SetMetrics(&metrics, left, right, top, bottom);
+ rsc->mHal.funcs.allocation.unlock1D(rsc, a);
}
void rsrMeasureText(Context *rsc, Script *sc, const char *text,
diff --git a/rsScriptIntrinsic.cpp b/rsScriptIntrinsic.cpp
new file mode 100644
index 0000000..51f0a5d
--- /dev/null
+++ b/rsScriptIntrinsic.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsContext.h"
+#include "rsScriptIntrinsic.h"
+#include <time.h>
+
+using namespace android;
+using namespace android::renderscript;
+
+ScriptIntrinsic::ScriptIntrinsic(Context *rsc) : Script(rsc) {
+}
+
+ScriptIntrinsic::~ScriptIntrinsic() {
+}
+
+bool ScriptIntrinsic::init(Context *rsc, RsScriptIntrinsicID iid, Element *e) {
+ mIntrinsicID = iid;
+ mElement.set(e);
+ mSlots = new ObjectBaseRef<Allocation>[2];
+ mTypes = new ObjectBaseRef<const Type>[2];
+
+ rsc->mHal.funcs.script.initIntrinsic(rsc, this, iid, e);
+
+
+ return true;
+}
+
+bool ScriptIntrinsic::freeChildren() {
+ return false;
+}
+
+void ScriptIntrinsic::setupScript(Context *rsc) {
+}
+
+uint32_t ScriptIntrinsic::run(Context *rsc) {
+ rsAssert(!"ScriptIntrinsic::run - should not happen");
+ return 0;
+}
+
+
+void ScriptIntrinsic::runForEach(Context *rsc,
+ uint32_t slot,
+ const Allocation * ain,
+ Allocation * aout,
+ const void * usr,
+ size_t usrBytes,
+ const RsScriptCall *sc) {
+
+ rsc->mHal.funcs.script.invokeForEach(rsc, this, slot, ain, aout, usr, usrBytes, sc);
+}
+
+void ScriptIntrinsic::Invoke(Context *rsc, uint32_t slot, const void *data, size_t len) {
+}
+
+void ScriptIntrinsic::serialize(Context *rsc, OStream *stream) const {
+}
+
+RsA3DClassID ScriptIntrinsic::getClassId() const {
+ return (RsA3DClassID)0;
+}
+
+
+
+namespace android {
+namespace renderscript {
+
+
+RsScript rsi_ScriptIntrinsicCreate(Context *rsc, uint32_t id, RsElement ve) {
+ ScriptIntrinsic *si = new ScriptIntrinsic(rsc);
+ ALOGE("rsi_ScriptIntrinsicCreate %i", id);
+ if (!si->init(rsc, (RsScriptIntrinsicID)id, (Element *)ve)) {
+ delete si;
+ return NULL;
+ }
+ return si;
+}
+
+}
+}
+
+
diff --git a/rsScriptIntrinsic.h b/rsScriptIntrinsic.h
new file mode 100644
index 0000000..310cbec
--- /dev/null
+++ b/rsScriptIntrinsic.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_SCRIPT_INTRINSIC_H
+#define ANDROID_RS_SCRIPT_INTRINSIC_H
+
+#include "rsScript.h"
+
+
+// ---------------------------------------------------------------------------
+namespace android {
+namespace renderscript {
+
+
+class ScriptIntrinsic : public Script {
+public:
+
+ ObjectBaseRef<const Element> mElement;
+
+ ScriptIntrinsic(Context *);
+ virtual ~ScriptIntrinsic();
+
+ bool init(Context *rsc, RsScriptIntrinsicID iid, Element *e);
+
+
+ virtual void serialize(Context *rsc, OStream *stream) const;
+ virtual RsA3DClassID getClassId() const;
+ virtual bool freeChildren();
+
+ virtual void runForEach(Context *rsc,
+ uint32_t slot,
+ const Allocation * ain,
+ Allocation * aout,
+ const void * usr,
+ size_t usrBytes,
+ const RsScriptCall *sc = NULL);
+
+ virtual void Invoke(Context *rsc, uint32_t slot, const void *data, size_t len);
+ virtual void setupScript(Context *rsc);
+ virtual uint32_t run(Context *);
+protected:
+ uint32_t mIntrinsicID;
+ float mParams[9];
+
+};
+
+
+}
+}
+#endif
+
+
diff --git a/rsType.cpp b/rsType.cpp
index e11b9c1..7ed8d97 100644
--- a/rsType.cpp
+++ b/rsType.cpp
@@ -155,14 +155,14 @@
mElement->dumpLOGV(buf);
}
-void Type::serialize(OStream *stream) const {
+void Type::serialize(Context *rsc, OStream *stream) const {
// Need to identify ourselves
stream->addU32((uint32_t)getClassId());
String8 name(getName());
stream->addString(&name);
- mElement->serialize(stream);
+ mElement->serialize(rsc, stream);
stream->addU32(mHal.state.dimX);
stream->addU32(mHal.state.dimY);
diff --git a/rsType.h b/rsType.h
index ed4aa79..1d136b4 100644
--- a/rsType.h
+++ b/rsType.h
@@ -99,7 +99,7 @@
void compute();
void dumpLOGV(const char *prefix) const;
- virtual void serialize(OStream *stream) const;
+ virtual void serialize(Context *rsc, OStream *stream) const;
virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_TYPE; }
static Type *createFromStream(Context *rsc, IStream *stream);
diff --git a/rsUtils.h b/rsUtils.h
index cbbae6c..ebfc679 100644
--- a/rsUtils.h
+++ b/rsUtils.h
@@ -43,26 +43,6 @@
#define rsAssert(v) while (0)
#endif
-typedef float rsvF_2 __attribute__ ((vector_size (8)));
-typedef float rsvF_4 __attribute__ ((vector_size (16)));
-typedef uint8_t rsvU8_4 __attribute__ ((vector_size (4)));
-
-union float2 {
- rsvF_2 v;
- float f[2];
-};
-
-union float4 {
- rsvF_4 v;
- float f[4];
-};
-
-union uchar4 {
- rsvU8_4 v;
- uint8_t f[4];
- uint32_t packed;
-};
-
template<typename T>
T rsMin(T in1, T in2)
{
diff --git a/rs_hal.h b/rs_hal.h
index b4da744..c521ef5 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -47,13 +47,25 @@
const void *in;
void *out;
const void *usr;
- size_t usr_len;
+ size_t usrLen;
uint32_t x;
uint32_t y;
uint32_t z;
uint32_t lod;
RsAllocationCubemapFace face;
uint32_t ar[16];
+
+ uint32_t dimX;
+ uint32_t dimY;
+ uint32_t dimZ;
+ uint32_t dimArray;
+
+ const uint8_t *ptrIn;
+ uint8_t *ptrOut;
+ uint32_t eStrideIn;
+ uint32_t eStrideOut;
+ uint32_t yStrideIn;
+ uint32_t yStrideOut;
} RsForEachStubParamStruct;
/**
@@ -78,6 +90,9 @@
uint8_t const *bitcode,
size_t bitcodeSize,
uint32_t flags);
+ bool (*initIntrinsic)(const Context *rsc, Script *s,
+ RsScriptIntrinsicID iid,
+ Element *e);
void (*invokeFunction)(const Context *rsc, Script *s,
uint32_t slot,
@@ -108,7 +123,7 @@
size_t dimLength);
void (*setGlobalBind)(const Context *rsc, const Script *s,
uint32_t slot,
- void *data);
+ Allocation *data);
void (*setGlobalObj)(const Context *rsc, const Script *s,
uint32_t slot,
ObjectBase *data);
@@ -142,6 +157,24 @@
uint32_t lod, RsAllocationCubemapFace face,
uint32_t w, uint32_t h, uint32_t d, const void *data, size_t sizeBytes);
+ void (*read1D)(const Context *rsc, const Allocation *alloc,
+ uint32_t xoff, uint32_t lod, uint32_t count,
+ void *data, size_t sizeBytes);
+ void (*read2D)(const Context *rsc, const Allocation *alloc,
+ uint32_t xoff, uint32_t yoff, uint32_t lod,
+ RsAllocationCubemapFace face, uint32_t w, uint32_t h,
+ void *data, size_t sizeBytes);
+ void (*read3D)(const Context *rsc, const Allocation *alloc,
+ uint32_t xoff, uint32_t yoff, uint32_t zoff,
+ uint32_t lod, RsAllocationCubemapFace face,
+ uint32_t w, uint32_t h, uint32_t d, void *data, size_t sizeBytes);
+
+ // Lock and unlock make a 1D region of memory available to the CPU
+ // for direct access by pointer. Once unlock is called control is
+ // returned to the SOC driver.
+ void * (*lock1D)(const Context *rsc, const Allocation *alloc);
+ void (*unlock1D)(const Context *rsc, const Allocation *alloc);
+
// Allocation to allocation copies
void (*allocData1D)(const Context *rsc,
const Allocation *dstAlloc,
@@ -168,7 +201,7 @@
void (*elementData2D)(const Context *rsc, const Allocation *alloc, uint32_t x, uint32_t y,
const void *data, uint32_t elementOff, size_t sizeBytes);
-
+ void (*generateMipmaps)(const Context *rsc, const Allocation *alloc);
} allocation;
struct {
diff --git a/scriptc/rs_cl.rsh b/scriptc/rs_cl.rsh
index 45d7818..ad7e56d 100644
--- a/scriptc/rs_cl.rsh
+++ b/scriptc/rs_cl.rsh
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 The Android Open Source Project
+ * Copyright (C) 2011-2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -25,11 +25,11 @@
// Conversions
#define CVT_FUNC_2(typeout, typein) \
-_RS_RUNTIME typeout##2 __attribute__((overloadable)) \
+_RS_RUNTIME typeout##2 __attribute__((overloadable)) \
convert_##typeout##2(typein##2 v); \
-_RS_RUNTIME typeout##3 __attribute__((overloadable)) \
+_RS_RUNTIME typeout##3 __attribute__((overloadable)) \
convert_##typeout##3(typein##3 v); \
-_RS_RUNTIME typeout##4 __attribute__((overloadable)) \
+_RS_RUNTIME typeout##4 __attribute__((overloadable)) \
convert_##typeout##4(typein##4 v);
@@ -41,73 +41,140 @@
CVT_FUNC_2(type, int) \
CVT_FUNC_2(type, float)
+/**
+ * Convert to char.
+ *
+ * Supports 2,3,4 components of uchar, char, ushort, short, uint, int, float.
+ */
CVT_FUNC(char)
+
+/**
+ * Convert to unsigned char.
+ *
+ * Supports 2,3,4 components of uchar, char, ushort, short, uint, int, float.
+ */
CVT_FUNC(uchar)
+
+/**
+ * Convert to short.
+ *
+ * Supports 2,3,4 components of uchar, char, ushort, short, uint, int, float.
+ */
CVT_FUNC(short)
+
+/**
+ * Convert to unsigned short.
+ *
+ * Supports 2,3,4 components of uchar, char, ushort, short, uint, int, float.
+ */
CVT_FUNC(ushort)
+
+/**
+ * Convert to int.
+ *
+ * Supports 2,3,4 components of uchar, char, ushort, short, uint, int, float.
+ */
CVT_FUNC(int)
+
+/**
+ * Convert to unsigned int.
+ *
+ * Supports 2,3,4 components of uchar, char, ushort, short, uint, int, float.
+ */
CVT_FUNC(uint)
+
+/**
+ * Convert to float.
+ *
+ * Supports 2,3,4 components of uchar, char, ushort, short, uint, int, float.
+ */
CVT_FUNC(float)
// Float ops, 6.11.2
#define FN_FUNC_FN(fnc) \
-_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v); \
-_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v); \
+_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v); \
+_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v); \
_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v);
+#define F_FUNC_FN(fnc) \
+_RS_RUNTIME float __attribute__((overloadable)) fnc(float2 v); \
+_RS_RUNTIME float __attribute__((overloadable)) fnc(float3 v); \
+_RS_RUNTIME float __attribute__((overloadable)) fnc(float4 v);
+
#define IN_FUNC_FN(fnc) \
-_RS_RUNTIME int2 __attribute__((overloadable)) fnc(float2 v); \
-_RS_RUNTIME int3 __attribute__((overloadable)) fnc(float3 v); \
+_RS_RUNTIME int2 __attribute__((overloadable)) fnc(float2 v); \
+_RS_RUNTIME int3 __attribute__((overloadable)) fnc(float3 v); \
_RS_RUNTIME int4 __attribute__((overloadable)) fnc(float4 v);
#define FN_FUNC_FN_FN(fnc) \
-_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2); \
-_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2); \
+_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2); \
+_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2); \
_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2);
+#define F_FUNC_FN_FN(fnc) \
+_RS_RUNTIME float __attribute__((overloadable)) fnc(float2 v1, float2 v2); \
+_RS_RUNTIME float __attribute__((overloadable)) fnc(float3 v1, float3 v2); \
+_RS_RUNTIME float __attribute__((overloadable)) fnc(float4 v1, float4 v2);
+
#define FN_FUNC_FN_F(fnc) \
-_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, float v2); \
-_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, float v2); \
+_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, float v2); \
+_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, float v2); \
_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, float v2);
#define FN_FUNC_FN_IN(fnc) \
-_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2); \
-_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2); \
-_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2); \
+_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2); \
+_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2); \
+_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2); \
#define FN_FUNC_FN_I(fnc) \
-_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, int v2); \
-_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, int v2); \
+_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, int v2); \
+_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, int v2); \
_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, int v2);
-#define FN_FUNC_FN_PFN(fnc) \
-_RS_RUNTIME float2 __attribute__((overloadable)) \
- fnc(float2 v1, float2 *v2); \
-_RS_RUNTIME float3 __attribute__((overloadable)) \
- fnc(float3 v1, float3 *v2); \
-_RS_RUNTIME float4 __attribute__((overloadable)) \
+#define FN_FUNC_FN_PFN(fnc) \
+_RS_RUNTIME float2 __attribute__((overloadable)) \
+ fnc(float2 v1, float2 *v2); \
+_RS_RUNTIME float3 __attribute__((overloadable)) \
+ fnc(float3 v1, float3 *v2); \
+_RS_RUNTIME float4 __attribute__((overloadable)) \
fnc(float4 v1, float4 *v2);
#define FN_FUNC_FN_PIN(fnc) \
-_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2); \
-_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2); \
+_RS_RUNTIME float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2); \
+_RS_RUNTIME float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2); \
_RS_RUNTIME float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2);
-#define FN_FUNC_FN_FN_FN(fnc) \
-_RS_RUNTIME float2 __attribute__((overloadable)) \
- fnc(float2 v1, float2 v2, float2 v3); \
-_RS_RUNTIME float3 __attribute__((overloadable)) \
- fnc(float3 v1, float3 v2, float3 v3); \
-_RS_RUNTIME float4 __attribute__((overloadable)) \
+#define FN_FUNC_FN_FN_FN(fnc) \
+_RS_RUNTIME float2 __attribute__((overloadable)) \
+ fnc(float2 v1, float2 v2, float2 v3); \
+_RS_RUNTIME float3 __attribute__((overloadable)) \
+ fnc(float3 v1, float3 v2, float3 v3); \
+_RS_RUNTIME float4 __attribute__((overloadable)) \
fnc(float4 v1, float4 v2, float4 v3);
-#define FN_FUNC_FN_FN_PIN(fnc) \
-_RS_RUNTIME float2 __attribute__((overloadable)) \
- fnc(float2 v1, float2 v2, int2 *v3); \
-_RS_RUNTIME float3 __attribute__((overloadable)) \
- fnc(float3 v1, float3 v2, int3 *v3); \
-_RS_RUNTIME float4 __attribute__((overloadable)) \
+#define FN_FUNC_FN_FN_F(fnc) \
+_RS_RUNTIME float2 __attribute__((overloadable)) \
+ fnc(float2 v1, float2 v2, float v3); \
+_RS_RUNTIME float3 __attribute__((overloadable)) \
+ fnc(float3 v1, float3 v2, float v3); \
+_RS_RUNTIME float4 __attribute__((overloadable)) \
+ fnc(float4 v1, float4 v2, float v3);
+
+#define FN_FUNC_FN_F_F(fnc) \
+_RS_RUNTIME float2 __attribute__((overloadable)) \
+ fnc(float2 v1, float v2, float v3); \
+_RS_RUNTIME float3 __attribute__((overloadable)) \
+ fnc(float3 v1, float v2, float v3); \
+_RS_RUNTIME float4 __attribute__((overloadable)) \
+ fnc(float4 v1, float v2, float v3);
+
+#define FN_FUNC_FN_FN_PIN(fnc) \
+_RS_RUNTIME float2 __attribute__((overloadable)) \
+ fnc(float2 v1, float2 v2, int2 *v3); \
+_RS_RUNTIME float3 __attribute__((overloadable)) \
+ fnc(float3 v1, float3 v2, int3 *v3); \
+_RS_RUNTIME float4 __attribute__((overloadable)) \
fnc(float4 v1, float4 v2, int4 *v3);
@@ -491,7 +558,7 @@
FN_FUNC_FN_FN_FN(mad)
/**
- * Return the integral and fractional components of a number
+ * Return the integral and fractional components of a number.
* Supports 1,2,3,4 components
*
* @param x Source value
@@ -575,7 +642,6 @@
/**
* Return (1 / sqrt(value)).
*
- * @param v The incoming value in radians
* Supports 1,2,3,4 components
*/
_RS_RUNTIME float __attribute__((overloadable)) rsqrt(float v);
@@ -663,8 +729,8 @@
#define XN_FUNC_YN(typeout, fnc, typein) \
extern typeout __attribute__((overloadable)) fnc(typein); \
-_RS_RUNTIME typeout##2 __attribute__((overloadable)) fnc(typein##2 v); \
-_RS_RUNTIME typeout##3 __attribute__((overloadable)) fnc(typein##3 v); \
+_RS_RUNTIME typeout##2 __attribute__((overloadable)) fnc(typein##2 v); \
+_RS_RUNTIME typeout##3 __attribute__((overloadable)) fnc(typein##3 v); \
_RS_RUNTIME typeout##4 __attribute__((overloadable)) fnc(typein##4 v);
#define UIN_FUNC_IN(fnc) \
@@ -682,25 +748,36 @@
#define XN_FUNC_XN_XN_BODY(type, fnc, body) \
-_RS_RUNTIME type __attribute__((overloadable)) \
+_RS_RUNTIME type __attribute__((overloadable)) \
fnc(type v1, type v2); \
-_RS_RUNTIME type##2 __attribute__((overloadable)) \
+_RS_RUNTIME type##2 __attribute__((overloadable)) \
fnc(type##2 v1, type##2 v2); \
-_RS_RUNTIME type##3 __attribute__((overloadable)) \
+_RS_RUNTIME type##3 __attribute__((overloadable)) \
fnc(type##3 v1, type##3 v2); \
-_RS_RUNTIME type##4 __attribute__((overloadable)) \
+_RS_RUNTIME type##4 __attribute__((overloadable)) \
fnc(type##4 v1, type##4 v2);
-#define IN_FUNC_IN_IN_BODY(fnc, body) \
-XN_FUNC_XN_XN_BODY(uchar, fnc, body) \
-XN_FUNC_XN_XN_BODY(char, fnc, body) \
-XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
-XN_FUNC_XN_XN_BODY(short, fnc, body) \
-XN_FUNC_XN_XN_BODY(uint, fnc, body) \
-XN_FUNC_XN_XN_BODY(int, fnc, body) \
+#define IN_FUNC_IN_IN_BODY(fnc, body) \
+XN_FUNC_XN_XN_BODY(uchar, fnc, body) \
+XN_FUNC_XN_XN_BODY(char, fnc, body) \
+XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
+XN_FUNC_XN_XN_BODY(short, fnc, body) \
+XN_FUNC_XN_XN_BODY(uint, fnc, body) \
+XN_FUNC_XN_XN_BODY(int, fnc, body) \
XN_FUNC_XN_XN_BODY(float, fnc, body)
+/**
+ * Return the absolute value of a value.
+ *
+ * Supports 1,2,3,4 components of char, short, int.
+ */
UIN_FUNC_IN(abs)
+
+/**
+ * Return the number of leading 0-bits in a value.
+ *
+ * Supports 1,2,3,4 components of uchar, char, ushort, short, uint, int.
+ */
IN_FUNC_IN(clz)
/**
@@ -727,12 +804,8 @@
* @param high High bound, must match type of low
*/
_RS_RUNTIME float __attribute__((overloadable)) clamp(float amount, float low, float high);
-_RS_RUNTIME float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high);
-_RS_RUNTIME float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high);
-_RS_RUNTIME float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
-_RS_RUNTIME float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high);
-_RS_RUNTIME float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
-_RS_RUNTIME float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
+FN_FUNC_FN_FN_FN(clamp)
+FN_FUNC_FN_F_F(clamp)
/**
* Convert from radians to degrees.
@@ -748,12 +821,8 @@
* Supports 1,2,3,4 components
*/
_RS_RUNTIME float __attribute__((overloadable)) mix(float start, float stop, float amount);
-_RS_RUNTIME float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount);
-_RS_RUNTIME float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount);
-_RS_RUNTIME float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount);
-_RS_RUNTIME float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount);
-_RS_RUNTIME float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount);
-_RS_RUNTIME float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount);
+FN_FUNC_FN_FN_FN(mix)
+FN_FUNC_FN_FN_F(mix)
/**
* Convert from degrees to radians.
@@ -772,12 +841,8 @@
* Supports 1,2,3,4 components
*/
_RS_RUNTIME float __attribute__((overloadable)) step(float edge, float v);
-_RS_RUNTIME float2 __attribute__((overloadable)) step(float2 edge, float2 v);
-_RS_RUNTIME float3 __attribute__((overloadable)) step(float3 edge, float3 v);
-_RS_RUNTIME float4 __attribute__((overloadable)) step(float4 edge, float4 v);
-_RS_RUNTIME float2 __attribute__((overloadable)) step(float2 edge, float v);
-_RS_RUNTIME float3 __attribute__((overloadable)) step(float3 edge, float v);
-_RS_RUNTIME float4 __attribute__((overloadable)) step(float4 edge, float v);
+FN_FUNC_FN_FN(step)
+FN_FUNC_FN_F(step)
// not implemented
extern float __attribute__((overloadable)) smoothstep(float, float, float);
@@ -789,6 +854,8 @@
extern float4 __attribute__((overloadable)) smoothstep(float, float, float4);
/**
+ * Return the sign of a value.
+ *
* if (v < 0) return -1.f;
* else if (v > 0) return 1.f;
* else return 0.f;
@@ -812,9 +879,7 @@
* Supports 1,2,3,4 components
*/
_RS_RUNTIME float __attribute__((overloadable)) dot(float lhs, float rhs);
-_RS_RUNTIME float __attribute__((overloadable)) dot(float2 lhs, float2 rhs);
-_RS_RUNTIME float __attribute__((overloadable)) dot(float3 lhs, float3 rhs);
-_RS_RUNTIME float __attribute__((overloadable)) dot(float4 lhs, float4 rhs);
+F_FUNC_FN_FN(dot)
/**
* Compute the length of a vector.
@@ -822,9 +887,7 @@
* Supports 1,2,3,4 components
*/
_RS_RUNTIME float __attribute__((overloadable)) length(float v);
-_RS_RUNTIME float __attribute__((overloadable)) length(float2 v);
-_RS_RUNTIME float __attribute__((overloadable)) length(float3 v);
-_RS_RUNTIME float __attribute__((overloadable)) length(float4 v);
+F_FUNC_FN(length)
/**
* Compute the distance between two points.
@@ -832,9 +895,7 @@
* Supports 1,2,3,4 components
*/
_RS_RUNTIME float __attribute__((overloadable)) distance(float lhs, float rhs);
-_RS_RUNTIME float __attribute__((overloadable)) distance(float2 lhs, float2 rhs);
-_RS_RUNTIME float __attribute__((overloadable)) distance(float3 lhs, float3 rhs);
-_RS_RUNTIME float __attribute__((overloadable)) distance(float4 lhs, float4 rhs);
+F_FUNC_FN_FN(distance)
/**
* Normalize a vector.
@@ -842,21 +903,86 @@
* Supports 1,2,3,4 components
*/
_RS_RUNTIME float __attribute__((overloadable)) normalize(float v);
-_RS_RUNTIME float2 __attribute__((overloadable)) normalize(float2 v);
-_RS_RUNTIME float3 __attribute__((overloadable)) normalize(float3 v);
-_RS_RUNTIME float4 __attribute__((overloadable)) normalize(float4 v);
+FN_FUNC_FN(normalize)
+
+
+// New approx API functions
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+
+/**
+ * Return the approximate reciprocal of a value.
+ *
+ * Supports 1,2,3,4 components
+ */
+_RS_RUNTIME float __attribute__((overloadable)) approx_recip(float);
+FN_FUNC_FN(approx_recip)
+
+/**
+ * Return the approximate square root of a value.
+ *
+ * Supports 1,2,3,4 components
+ */
+_RS_RUNTIME float __attribute__((overloadable)) approx_sqrt(float);
+FN_FUNC_FN(approx_sqrt)
+
+/**
+ * Return the approximate value of (1 / sqrt(value)).
+ *
+ * Supports 1,2,3,4 components
+ */
+_RS_RUNTIME float __attribute__((overloadable)) approx_rsqrt(float v);
+FN_FUNC_FN(approx_rsqrt)
+
+/**
+ * Compute the approximate length of a vector.
+ *
+ * Supports 1,2,3,4 components
+ */
+_RS_RUNTIME float __attribute__((overloadable)) approx_length(float v);
+F_FUNC_FN(approx_length)
+
+/**
+ * Compute the approximate distance between two points.
+ *
+ * Supports 1,2,3,4 components
+ */
+_RS_RUNTIME float __attribute__((overloadable)) approx_distance(float lhs, float rhs);
+F_FUNC_FN_FN(approx_distance)
+
+/**
+ * Approximately normalize a vector.
+ *
+ * Supports 1,2,3,4 components
+ */
+_RS_RUNTIME float __attribute__((overloadable)) approx_normalize(float v);
+F_FUNC_FN(approx_normalize)
+
+/**
+ * Compute the approximate arctangent of a value.
+ *
+ * Supports 1,2,3,4 components
+ */
+_RS_RUNTIME float __attribute__((overloadable)) approx_atan(float v);
+FN_FUNC_FN(approx_atan)
+
+#endif // (defined(RS_VERSION) && (RS_VERSION >= 17))
+
#undef CVT_FUNC
#undef CVT_FUNC_2
#undef FN_FUNC_FN
+#undef F_FUNC_FN
#undef IN_FUNC_FN
#undef FN_FUNC_FN_FN
+#undef F_FUNC_FN_FN
#undef FN_FUNC_FN_F
#undef FN_FUNC_FN_IN
#undef FN_FUNC_FN_I
#undef FN_FUNC_FN_PFN
#undef FN_FUNC_FN_PIN
#undef FN_FUNC_FN_FN_FN
+#undef FN_FUNC_FN_FN_F
+#undef FN_FUNC_FN_F_F
#undef FN_FUNC_FN_FN_PIN
#undef XN_FUNC_YN
#undef UIN_FUNC_IN
diff --git a/scriptc/rs_debug.rsh b/scriptc/rs_debug.rsh
index 074c28f..7a13c9d 100644
--- a/scriptc/rs_debug.rsh
+++ b/scriptc/rs_debug.rsh
@@ -27,7 +27,6 @@
#define __RS_DEBUG_RSH__
-
/**
* Debug function. Prints a string and value to the log.
*/
@@ -52,6 +51,21 @@
* Debug function. Prints a string and value to the log.
*/
extern void __attribute__((overloadable))
+ rsDebug(const char *, float2);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, float3);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, float4);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
rsDebug(const char *, double);
/**
* Debug function. Prints a string and value to the log.
@@ -103,21 +117,151 @@
*/
extern void __attribute__((overloadable))
rsDebug(const char *, const void *);
+
+#if (defined(RS_VERSION) && (RS_VERSION >= 17))
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, char);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, char2);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, char3);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, char4);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, unsigned char);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, uchar2);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, uchar3);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, uchar4);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, short);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, short2);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, short3);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, short4);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, unsigned short);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, ushort2);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, ushort3);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, ushort4);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, int2);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, int3);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, int4);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, uint2);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, uint3);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, uint4);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, long2);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, long3);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, long4);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, ulong2);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, ulong3);
+/**
+ * Debug function. Prints a string and value to the log.
+ */
+extern void __attribute__((overloadable))
+ rsDebug(const char *, ulong4);
+#endif // (defined(RS_VERSION) && (RS_VERSION >= 17))
+
#define RS_DEBUG(a) rsDebug(#a, a)
#define RS_DEBUG_MARKER rsDebug(__FILE__, __LINE__)
-
-/**
- * Debug function. Prints a string and value to the log.
- */
-_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float2 v);
-/**
- * Debug function. Prints a string and value to the log.
- */
-_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float3 v);
-/**
- * Debug function. Prints a string and value to the log.
- */
-_RS_RUNTIME void __attribute__((overloadable)) rsDebug(const char *s, float4 v);
-
#endif
diff --git a/scriptc/rs_math.rsh b/scriptc/rs_math.rsh
index 8117ca8..73040b3 100644
--- a/scriptc/rs_math.rsh
+++ b/scriptc/rs_math.rsh
@@ -244,5 +244,8 @@
*/
_RS_RUNTIME float4 rsUnpackColor8888(uchar4 c);
+_RS_RUNTIME uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v);
+_RS_RUNTIME float4 __attribute__((overloadable)) rsYuvToRGBA_float4(uchar y, uchar u, uchar v);
+
#endif