Cleanup type offsets which cannot be calculated for flexible YUV.
Support flexible YUV
bug 10567550
Change-Id: I4f6e5a8d86eeee635605460f1751208f3320969b
(cherry picked from commit a75372759e288be3fb8835735a830b1f7d1a4c42)
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index 4f56443..7546b38 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -107,6 +107,7 @@
};
extern "C" void rsdIntrinsicYuv_K(void *dst, const uchar *Y, const uchar *uv, uint32_t count, const short *param);
+extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t count, const short *param);
extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v, uint32_t count, const short *param);
void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p,
@@ -135,91 +136,63 @@
uint32_t x1 = xstart;
uint32_t x2 = xend;
- switch (cp->alloc->mHal.state.yuv) {
- // In API 17 there was no yuv format and the intrinsic treated everything as NV21
- case 0:
-#if !defined(RS_SERVER)
- case HAL_PIXEL_FORMAT_YCrCb_420_SP: // NV21
-#endif
- {
- const uchar *pinUV = (const uchar *)cp->alloc->mHal.drvState.lod[1].mallocPtr;
- size_t strideUV = cp->alloc->mHal.drvState.lod[1].stride;
- const uchar *uv = pinUV + ((p->y >> 1) * strideUV);
+ const size_t cstep = cp->alloc->mHal.drvState.yuv.step;
- if (pinUV == NULL) {
- // Legacy yuv support didn't fill in uv
- strideUV = strideY;
- uv = ((uint8_t *)cp->alloc->mHal.drvState.lod[0].mallocPtr) +
- (strideY * p->dimY) +
- ((p->y >> 1) * strideUV);
- }
+ const uchar *pinU = (const uchar *)cp->alloc->mHal.drvState.lod[1].mallocPtr;
+ const size_t strideU = cp->alloc->mHal.drvState.lod[1].stride;
+ const uchar *u = pinU + ((p->y >> 1) * strideU);
- if(x2 > x1) {
- if (gArchUseSIMD) {
- #if defined(ARCH_ARM_HAVE_VFP)
- int32_t len = (x2 - x1 - 1) >> 3;
- if(len > 0) {
- // ALOGE("%p, %p, %p, %d, %p", out, Y, uv, len, YuvCoeff);
- rsdIntrinsicYuv_K(out, Y, uv, len, YuvCoeff);
- x1 += len << 3;
- out += len << 3;
- }
- #endif
+ const uchar *pinV = (const uchar *)cp->alloc->mHal.drvState.lod[2].mallocPtr;
+ const size_t strideV = cp->alloc->mHal.drvState.lod[2].stride;
+ const uchar *v = pinV + ((p->y >> 1) * strideV);
+
+ if (pinU == NULL) {
+ // Legacy yuv support didn't fill in uv
+ v = ((uint8_t *)cp->alloc->mHal.drvState.lod[0].mallocPtr) +
+ (strideY * p->dimY) +
+ ((p->y >> 1) * strideY);
+ u = v + 1;
+ }
+
+#if defined(ARCH_ARM_HAVE_VFP)
+ if((x2 > x1) && gArchUseSIMD) {
+ int32_t len = (x2 - x1 - 1) >> 3;
+ if(len > 0) {
+ if (cstep == 1) {
+ rsdIntrinsicYuv2_K(out, Y, u, v, len, YuvCoeff);
+ x1 += len << 3;
+ out += len << 3;
+ } else if (cstep == 2) {
+ // Check for proper interleave
+ intptr_t ipu = (intptr_t)u;
+ intptr_t ipv = (intptr_t)v;
+
+ if (ipu == (ipv + 1)) {
+ rsdIntrinsicYuv_K(out, Y, v, len, YuvCoeff);
+ x1 += len << 3;
+ out += len << 3;
+ } else if (ipu == (ipv - 1)) {
+ rsdIntrinsicYuvR_K(out, Y, u, len, YuvCoeff);
+ x1 += len << 3;
+ out += len << 3;
}
- // ALOGE("y %i %i %i", p->y, x1, x2);
- while(x1 < x2) {
- uchar u = uv[(x1 & 0xffffe) + 1];
- uchar v = uv[(x1 & 0xffffe) + 0];
- *out = rsYuvToRGBA_uchar4(Y[x1], u, v);
- out++;
- x1++;
- *out = rsYuvToRGBA_uchar4(Y[x1], u, v);
- out++;
- x1++;
- }
}
}
- break;
-
-#if !defined(RS_SERVER)
- case HAL_PIXEL_FORMAT_YV12:
- {
- const uchar *pinU = (const uchar *)cp->alloc->mHal.drvState.lod[1].mallocPtr;
- const size_t strideU = cp->alloc->mHal.drvState.lod[1].stride;
- const uchar *u = pinU + ((p->y >> 1) * strideU);
-
- const uchar *pinV = (const uchar *)cp->alloc->mHal.drvState.lod[2].mallocPtr;
- const size_t strideV = cp->alloc->mHal.drvState.lod[2].stride;
- const uchar *v = pinV + ((p->y >> 1) * strideV);
-
- if(x2 > x1) {
- #if defined(ARCH_ARM_HAVE_VFP)
- if (gArchUseSIMD) {
- int32_t len = (x2 - x1 - 1) >> 3;
- if(len > 0) {
- rsdIntrinsicYuv2_K(out, Y, u, v, len, YuvCoeff);
- x1 += len << 3;
- out += len << 3;
- }
- }
- #endif
-
- // ALOGE("y %i %i %i", p->y, x1, x2);
- while(x1 < x2) {
- uchar ut = u[x1];
- uchar vt = v[x1];
- *out = rsYuvToRGBA_uchar4(Y[x1], ut, vt);
- out++;
- x1++;
- *out = rsYuvToRGBA_uchar4(Y[x1], ut, vt);
- out++;
- x1++;
- }
- }
- }
- break;
+ }
#endif
+
+ if(x2 > x1) {
+ // ALOGE("y %i %i %i", p->y, x1, x2);
+ while(x1 < x2) {
+ int cx = (x1 >> 1) * cstep;
+ *out = rsYuvToRGBA_uchar4(Y[x1], u[cx], v[cx]);
+ out++;
+ x1++;
+ *out = rsYuvToRGBA_uchar4(Y[x1], u[cx], v[cx]);
+ out++;
+ x1++;
+ }
}
}
diff --git a/cpu_ref/rsCpuIntrinsics_neon.S b/cpu_ref/rsCpuIntrinsics_neon.S
index 52fd565..da58f89 100644
--- a/cpu_ref/rsCpuIntrinsics_neon.S
+++ b/cpu_ref/rsCpuIntrinsics_neon.S
@@ -338,6 +338,97 @@
END(rsdIntrinsicYuv_K)
/*
+ Function called with the following arguments: dst, Y, vu, len, YuvCoeff
+ r0 = dst
+ r1 = Y
+ r2 = UV
+ r3 = length (pixels / 8)
+ ---- Args below will be in the stack ----
+ sp = YuvCoeff
+
+ This function converts 8 pixels per iteration
+*/
+ENTRY(rsdIntrinsicYuvR_K)
+ push {r4, r5, lr} @ preserve clobbered int registers
+ vpush {Q4-Q7} @ preserve Vregisters we clobber
+
+ mov r5, #16 @ Integer 16 in r5; used as an incrementing value
+
+ ldr r4, [sp, #64+12] @ load the coeffs address in memory in r4 (16*4 + 4*3)
+ vld1.16 {Q2}, [r4]! @ load the multipliers from the coeffs matrix (r4) in Q2
+ vld1.8 {d6[]}, [r4], r5 @ load y offset 16 from the coeffs matrix (r4) in d6
+ vld1.8 {d8[]}, [r4], r5 @ load V and U offset of 128 from the coeffs matrix (r4) in d8
+
+ mov r4, #8 @ Integer 8 in r4; used as an incrementing value
+
+ vdup.8 d3, d5[1] @ d3 = 255 (alpha) from the multipliers line in
+ @ the coeffs matrix (Q2)
+
+ 1:
+ vld1.8 {d10}, [r1]! @ get Y (r1->Y)
+ vld2.8 {d12, d14}, [r2], r4 @ split V from U (r2 -> VU) and increase pointer by 8 (in r4)
+ pld [r1, #64] @ preloading data from address y(r1) + 64 for subsequent loops
+ pld [r2, #64] @ preloading data from address vu(r2) + 64 for subsequent loops
+
+ vsubl.u8 Q5, d10, d6 @ Y to 16 bit - 16 (in 16bit) (n to n+7)
+ vmull.s16 Q8, d10, d4[0] @ Y(n,n+1,n+2,n+3) * 298 = Q8 (to 32bit)
+ vmull.s16 Q11, d11, d4[0] @ Y(n+4,n+5,n+6,n+7) * 298 = Q11 (to 32bit)
+
+ vsubl.u8 Q5, d14, d8 @ V to 16 bit - 128 = Q5 // V(n, n+1, n+2,n+3)
+ vsubl.u8 Q6, d12, d8 @ U to 16 bit - 128 = Q6 // U(n, n+1, n+2,n+3)
+ vmov.u16 d11, d10 @ Copying V to d11
+ vmov.u16 d13, d12 @ Copying U to d13
+ vzip.u16 d10, d11 @ Q5 = V (n,n n+1, n+1) V(n+2, n+2, n+3, n+3)
+ vzip.u16 d12, d13 @ Q5 = U (n,n n+1, n+1) U(n+2, n+2, n+3, n+3)
+
+
+ vmov Q9, Q8 @ Copy Q8(Y: n, n+1, n+2, n+3) to Q9
+ vmov Q10, Q8 @ Copy Q8(Y: n, n+1, n+2, n+3) to Q10
+ vmov Q12, Q11 @ Copy Q11(Y: n+5, n+6, n+6, n+7) to Q12
+ vmov Q13, Q11 @ Copy Q11(Y: n+5, n+6, n+6, n+7) to Q13
+
+ @ R G B
+ @ Pixel(0-3) Q8, Q9, Q10
+ @ Pixel(4-7) Q11, Q12, Q13
+ @
+
+ @ Pixel(0-3)
+ vmlal.s16 Q8, d10, d4[1] @ R : Q8 = Q8(Y-16) + (V-128) * 409
+ vmlal.s16 Q9, d10, d5[0] @ G : Q9 = Q9(Y-16) + (V-128) * (-208)
+ vmlal.s16 Q9, d12, d4[2] @ + (U-128) * (-100)
+ vmlal.s16 Q10, d12, d4[3] @ B : Q10 = Q10(Y-16) + (U-128) * 516
+
+ @ Pixel(4-7)
+ vmlal.s16 Q11, d11, d4[1] @ R : Q11 = Q11(Y-16) + (V-128) * 409
+ vmlal.s16 Q12, d11, d5[0] @ G : Q12 = Q12(Y-16) + (V-128) * (-208)
+ vmlal.s16 Q12, d13, d4[2] @ + (U-128) * (-100)
+ vmlal.s16 Q13, d13, d4[3] @ B : Q13 = Q13(Y-16) + (U-128) * 516
+
+ @ Pixel(0-3)
+ vrshrn.i32 d16, Q8, #8 @ d16 : R shifted right by 8 rounded'n narrowed to 16bit
+ vrshrn.i32 d18, Q9, #8 @ d18 : G shifted right by 8 rounded'n narrowed to 16bit
+ vrshrn.i32 d20, Q10, #8 @ d20 : B shifted right by 8 rounded'n narrowed to 16bit
+
+ @ Pixel(4-7)
+ vrshrn.i32 d17, Q11, #8 @ d17 : R shifted right by 8 rounded'n narrowed to 16bit
+ vrshrn.i32 d19, Q12, #8 @ d19 : G shifted right by 8 rounded'n narrowed to 16bit
+ vrshrn.i32 d21, Q13, #8 @ d21 : B shifted right by 8 rounded'n narrowed to 16bit
+
+ vqmovun.s16 d0, Q8 @ r = d0 (saturated, unsigned and narrowed to 8bit)
+ vqmovun.s16 d1, Q9 @ g = d1 (saturated, unsigned and narrowed to 8bit)
+ vqmovun.s16 d2, Q10 @ b = d2 (saturated, unsigned and narrowed to 8bit)
+
+ subs r3, r3, #1 @ Checking length (r3)
+ vst4.8 {d0, d1, d2, d3}, [r0]! @ Writing out 8 RGBA values to dst (r0)
+
+ bne 1b @ if not done with length, loop
+
+ vpop {Q4-Q7} @ Restore Vregisters
+ pop {r4, r5, lr} @ Restore int registers
+ bx lr
+END(rsdIntrinsicYuvR_K)
+
+/*
Function called with the following arguments: dst, Y, v, u, len, YuvCoeff
r0 = dst
r1 = Y
diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp
index 66eddeb..9f217e8 100644
--- a/driver/rsdAllocation.cpp
+++ b/driver/rsdAllocation.cpp
@@ -243,8 +243,9 @@
return;
}
RSD_CALL_GL(glBindBuffer, drv->glTarget, drv->bufferID);
- RSD_CALL_GL(glBufferData, drv->glTarget, alloc->mHal.state.type->getSizeBytes(),
- alloc->mHal.drvState.lod[0].mallocPtr, GL_DYNAMIC_DRAW);
+ RSD_CALL_GL(glBufferData, drv->glTarget,
+ alloc->mHal.state.type->getPackedSizeBytes(),
+ alloc->mHal.drvState.lod[0].mallocPtr, GL_DYNAMIC_DRAW);
RSD_CALL_GL(glBindBuffer, drv->glTarget, 0);
rsdGLCheckError(rsc, "UploadToBufferObject");
#endif
@@ -255,34 +256,42 @@
// YUV only supports basic 2d
// so we can stash the plane pointers in the mipmap levels.
size_t uvSize = 0;
+ state->lod[1].dimX = state->lod[0].dimX / 2;
+ state->lod[1].dimY = state->lod[0].dimY / 2;
+ state->lod[2].dimX = state->lod[0].dimX / 2;
+ state->lod[2].dimY = state->lod[0].dimY / 2;
+ state->yuv.shift = 1;
+ state->yuv.step = 1;
+ state->lodCount = 3;
+
#ifndef RS_SERVER
switch(yuv) {
case HAL_PIXEL_FORMAT_YV12:
- state->lod[2].dimX = state->lod[0].dimX / 2;
- state->lod[2].dimY = state->lod[0].dimY / 2;
state->lod[2].stride = rsRound(state->lod[0].stride >> 1, 16);
state->lod[2].mallocPtr = ((uint8_t *)state->lod[0].mallocPtr) +
(state->lod[0].stride * state->lod[0].dimY);
uvSize += state->lod[2].stride * state->lod[2].dimY;
- state->lod[1].dimX = state->lod[2].dimX;
- state->lod[1].dimY = state->lod[2].dimY;
state->lod[1].stride = state->lod[2].stride;
state->lod[1].mallocPtr = ((uint8_t *)state->lod[2].mallocPtr) +
(state->lod[2].stride * state->lod[2].dimY);
uvSize += state->lod[1].stride * state->lod[2].dimY;
-
- state->lodCount = 3;
break;
case HAL_PIXEL_FORMAT_YCrCb_420_SP: // NV21
- state->lod[1].dimX = state->lod[0].dimX;
- state->lod[1].dimY = state->lod[0].dimY / 2;
+ //state->lod[1].dimX = state->lod[0].dimX;
state->lod[1].stride = state->lod[0].stride;
- state->lod[1].mallocPtr = ((uint8_t *)state->lod[0].mallocPtr) +
+ state->lod[2].stride = state->lod[0].stride;
+ state->lod[2].mallocPtr = ((uint8_t *)state->lod[0].mallocPtr) +
(state->lod[0].stride * state->lod[0].dimY);
+ state->lod[1].mallocPtr = ((uint8_t *)state->lod[2].mallocPtr) + 1;
uvSize += state->lod[1].stride * state->lod[1].dimY;
- state->lodCount = 2;
+ state->yuv.step = 2;
break;
+#ifndef RS_COMPATIBILITY_LIB
+ case HAL_PIXEL_FORMAT_YCbCr_420_888:
+ // This will be filled in by ioReceive()
+ break;
+#endif
default:
rsAssert(0);
}
@@ -766,7 +775,6 @@
const size_t eSize = alloc->mHal.state.type->getElementSizeBytes();
uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X);
size_t size = count * eSize;
-
if (ptr != data) {
// Skip the copy if we are the same allocation. This can arise from
// our Bitmap optimization, where we share the same storage.
@@ -811,13 +819,20 @@
}
if (alloc->mHal.state.yuv) {
int lod = 1;
- while (alloc->mHal.drvState.lod[lod].mallocPtr) {
- size_t lineSize = alloc->mHal.drvState.lod[lod].dimX;
+ int maxLod = 2;
+ if (alloc->mHal.state.yuv == HAL_PIXEL_FORMAT_YV12) {
+ maxLod = 3;
+ } else if (alloc->mHal.state.yuv == HAL_PIXEL_FORMAT_YCrCb_420_SP) {
+ lod = 2;
+ maxLod = 3;
+ }
+
+ while (lod < maxLod) {
uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, 0, lod, face);
for (uint32_t line=(yoff >> 1); line < ((yoff+h)>>1); line++) {
memcpy(dst, src, lineSize);
- src += lineSize;
+ src += alloc->mHal.drvState.lod[lod].stride;
dst += alloc->mHal.drvState.lod[lod].stride;
}
lod++;
diff --git a/driver/runtime/rs_allocation.c b/driver/runtime/rs_allocation.c
index 198e9af..964853b 100644
--- a/driver/runtime/rs_allocation.c
+++ b/driver/runtime/rs_allocation.c
@@ -266,53 +266,27 @@
rsGetElementAtYuv_uchar_U(rs_allocation a, uint32_t x, uint32_t y) {
Allocation_t *alloc = (Allocation_t *)a.p;
- const uint32_t yuvID = alloc->mHal.state.yuv;
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[1].mallocPtr;
- const uint32_t stride = alloc->mHal.drvState.lod[1].stride;
- switch(yuvID) {
- case 0x32315659: //HAL_PIXEL_FORMAT_YV12:
- x >>= 1;
- y >>= 1;
- return p[x + (y * stride)];
- case 11: //HAL_PIXEL_FORMAT_YCrCb_420_SP: // NV21
- x >>= 1;
- y >>= 1;
- return p[(x<<1) + (y * stride)];
- default:
- break;
- }
+ const size_t cstep = alloc->mHal.drvState.yuv.step;
+ const size_t shift = alloc->mHal.drvState.yuv.shift;
+ const size_t stride = alloc->mHal.drvState.lod[2].stride;
- return 0;
+ const uchar *pin = (const uchar *)alloc->mHal.drvState.lod[2].mallocPtr;
+
+ return pin[((x >> shift) * cstep) + ((y >> shift) * stride)];
}
extern const uchar __attribute__((overloadable))
rsGetElementAtYuv_uchar_V(rs_allocation a, uint32_t x, uint32_t y) {
Allocation_t *alloc = (Allocation_t *)a.p;
- const uint32_t yuvID = alloc->mHal.state.yuv;
- switch(yuvID) {
- case 0x32315659: //HAL_PIXEL_FORMAT_YV12:
- {
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[2].mallocPtr;
- const uint32_t stride = alloc->mHal.drvState.lod[2].stride;
- x >>= 1;
- y >>= 1;
- return p[x + (y * stride)];
- }
- case 11: //HAL_PIXEL_FORMAT_YCrCb_420_SP: // NV21
- {
- const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[1].mallocPtr;
- const uint32_t stride = alloc->mHal.drvState.lod[1].stride;
- x >>= 1;
- y >>= 1;
- return p[(x<<1) + (y * stride) + 1];
- }
- default:
- break;
- }
+ const size_t cstep = alloc->mHal.drvState.yuv.step;
+ const size_t shift = alloc->mHal.drvState.yuv.shift;
+ const size_t stride = alloc->mHal.drvState.lod[1].stride;
- return 0;
+ const uchar *pin = (const uchar *)alloc->mHal.drvState.lod[1].mallocPtr;
+
+ return pin[((x >> shift) * cstep) + ((y >> shift) * stride)];
}
diff --git a/driver/runtime/rs_structs.h b/driver/runtime/rs_structs.h
index 204717c..26a41ee 100644
--- a/driver/runtime/rs_structs.h
+++ b/driver/runtime/rs_structs.h
@@ -54,6 +54,11 @@
size_t faceOffset;
uint32_t lodCount;
uint32_t faceCount;
+
+ struct YuvState {
+ uint32_t shift;
+ uint32_t step;
+ } yuv;
} drvState;
} mHal;
} Allocation_t;
diff --git a/rsAllocation.cpp b/rsAllocation.cpp
index 9bf8709..f6f4ac9 100644
--- a/rsAllocation.cpp
+++ b/rsAllocation.cpp
@@ -256,7 +256,7 @@
}
uint32_t Allocation::getPackedSize() const {
- uint32_t numItems = mHal.state.type->getSizeBytes() / mHal.state.type->getElementSizeBytes();
+ uint32_t numItems = mHal.state.type->getCellCount();
return numItems * mHal.state.type->getElement()->getSizeBytesUnpadded();
}
@@ -265,7 +265,7 @@
const Element *elem = type->getElement();
uint32_t unpaddedBytes = elem->getSizeBytesUnpadded();
uint32_t paddedBytes = elem->getSizeBytes();
- uint32_t numItems = type->getSizeBytes() / paddedBytes;
+ uint32_t numItems = type->getPackedSizeBytes() / paddedBytes;
uint32_t srcInc = !dstPadded ? paddedBytes : unpaddedBytes;
uint32_t dstInc = dstPadded ? paddedBytes : unpaddedBytes;
@@ -320,7 +320,7 @@
void Allocation::packVec3Allocation(Context *rsc, OStream *stream) const {
uint32_t paddedBytes = getType()->getElement()->getSizeBytes();
uint32_t unpaddedBytes = getType()->getElement()->getSizeBytesUnpadded();
- uint32_t numItems = mHal.state.type->getSizeBytes() / paddedBytes;
+ uint32_t numItems = mHal.state.type->getCellCount();
const uint8_t *src = (const uint8_t*)rsc->mHal.funcs.allocation.lock1D(rsc, this);
uint8_t *dst = new uint8_t[numItems * unpaddedBytes];
@@ -341,7 +341,7 @@
// to initialize the class
mHal.state.type->serialize(rsc, stream);
- uint32_t dataSize = mHal.state.type->getSizeBytes();
+ uint32_t dataSize = mHal.state.type->getPackedSizeBytes();
// 3 element vectors are padded to 4 in memory, but padding isn't serialized
uint32_t packedSize = getPackedSize();
// Write how much data we are storing
@@ -379,7 +379,7 @@
uint32_t dataSize = stream->loadU32();
// 3 element vectors are padded to 4 in memory, but padding isn't serialized
uint32_t packedSize = alloc->getPackedSize();
- if (dataSize != type->getSizeBytes() &&
+ if (dataSize != type->getPackedSizeBytes() &&
dataSize != packedSize) {
ALOGE("failed to read allocation because numbytes written is not the same loaded type wants\n");
ObjectBase::checkDelete(alloc);
@@ -388,7 +388,7 @@
}
alloc->assignName(name);
- if (dataSize == type->getSizeBytes()) {
+ if (dataSize == type->getPackedSizeBytes()) {
uint32_t count = dataSize / type->getElementSizeBytes();
// Read in all of our allocation data
alloc->data(rsc, 0, 0, count, stream->getPtr() + stream->getPos(), dataSize);
@@ -422,7 +422,7 @@
void Allocation::freeChildrenUnlocked () {
void *ptr = mRSC->mHal.funcs.allocation.lock1D(mRSC, this);
- decRefs(ptr, mHal.state.type->getSizeBytes() / mHal.state.type->getElementSizeBytes(), 0);
+ decRefs(ptr, mHal.state.type->getCellCount(), 0);
mRSC->mHal.funcs.allocation.unlock1D(mRSC, this);
}
diff --git a/rsAllocation.h b/rsAllocation.h
index f98fa83..b0f2f9e 100644
--- a/rsAllocation.h
+++ b/rsAllocation.h
@@ -83,6 +83,11 @@
size_t faceOffset;
uint32_t lodCount;
uint32_t faceCount;
+
+ struct YuvState {
+ uint32_t shift;
+ uint32_t step;
+ } yuv;
};
mutable DrvState drvState;
diff --git a/rsGrallocConsumer.cpp b/rsGrallocConsumer.cpp
index e3bd9d4..c5d37b2 100644
--- a/rsGrallocConsumer.cpp
+++ b/rsGrallocConsumer.cpp
@@ -41,7 +41,9 @@
if (y < 1) y = 1;
mConsumer->setDefaultBufferSize(a->mHal.drvState.lod[0].dimX, y);
- //mBufferQueue->setDefaultBufferFormat(defaultFormat);
+ if (a->mHal.state.yuv) {
+ bq->setDefaultBufferFormat(a->mHal.state.yuv);
+ }
//mBufferQueue->setConsumerName(name);
}
@@ -146,6 +148,9 @@
mAlloc->mHal.drvState.lod[0].stride = ycbcr.ystride;
mAlloc->mHal.drvState.lod[1].stride = ycbcr.cstride;
mAlloc->mHal.drvState.lod[2].stride = ycbcr.cstride;
+
+ mAlloc->mHal.drvState.yuv.shift = 1;
+ mAlloc->mHal.drvState.yuv.step = ycbcr.chroma_step;
}
return OK;
diff --git a/rsScriptC_LibGL.cpp b/rsScriptC_LibGL.cpp
index 279ddb2..dbf2336 100644
--- a/rsScriptC_LibGL.cpp
+++ b/rsScriptC_LibGL.cpp
@@ -220,7 +220,7 @@
void rsrDrawTextAlloc(Context *rsc, Allocation *a, int x, int y) {
const char *text = (const char *)rsc->mHal.funcs.allocation.lock1D(rsc, a);
- size_t allocSize = a->getType()->getSizeBytes();
+ size_t allocSize = a->getType()->getPackedSizeBytes();
rsc->mStateFont.renderText(text, allocSize, x, y);
rsc->mHal.funcs.allocation.unlock1D(rsc, a);
}
@@ -250,7 +250,7 @@
int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) {
CHECK_OBJ(a);
const char *text = (const char *)rsc->mHal.funcs.allocation.lock1D(rsc, a);
- size_t textLen = a->getType()->getSizeBytes();
+ size_t textLen = a->getType()->getPackedSizeBytes();
Font::Rect metrics;
rsc->mStateFont.measureText(text, textLen, &metrics);
SetMetrics(&metrics, left, right, top, bottom);
diff --git a/rsType.cpp b/rsType.cpp
index 33e2ca7..48a18cd 100644
--- a/rsType.cpp
+++ b/rsType.cpp
@@ -50,7 +50,6 @@
delete [] mHal.state.lodDimX;
delete [] mHal.state.lodDimY;
delete [] mHal.state.lodDimZ;
- delete [] mHal.state.lodOffset;
}
mElement.clear();
memset(&mHal, 0, sizeof(mHal));
@@ -63,11 +62,6 @@
rsAssert(!mTypes.size());
}
-size_t Type::getOffsetForFace(uint32_t face) const {
- rsAssert(mHal.state.faces);
- return 0;
-}
-
void Type::compute() {
uint32_t oldLODCount = mHal.state.lodCount;
if (mDimLOD) {
@@ -85,97 +79,58 @@
delete [] mHal.state.lodDimX;
delete [] mHal.state.lodDimY;
delete [] mHal.state.lodDimZ;
- delete [] mHal.state.lodOffset;
}
mHal.state.lodDimX = new uint32_t[mHal.state.lodCount];
mHal.state.lodDimY = new uint32_t[mHal.state.lodCount];
mHal.state.lodDimZ = new uint32_t[mHal.state.lodCount];
- mHal.state.lodOffset = new uint32_t[mHal.state.lodCount];
}
uint32_t tx = mHal.state.dimX;
uint32_t ty = mHal.state.dimY;
uint32_t tz = mHal.state.dimZ;
- size_t offset = 0;
+ mCellCount = 0;
for (uint32_t lod=0; lod < mHal.state.lodCount; lod++) {
mHal.state.lodDimX[lod] = tx;
mHal.state.lodDimY[lod] = ty;
mHal.state.lodDimZ[lod] = tz;
- mHal.state.lodOffset[lod] = offset;
- offset += tx * rsMax(ty, 1u) * rsMax(tz, 1u) * mElement->getSizeBytes();
+ mCellCount += tx * rsMax(ty, 1u) * rsMax(tz, 1u);
if (tx > 1) tx >>= 1;
if (ty > 1) ty >>= 1;
if (tz > 1) tz >>= 1;
}
- // At this point the offset is the size of a mipmap chain;
- mMipChainSizeBytes = offset;
-
if (mHal.state.faces) {
- offset *= 6;
+ mCellCount *= 6;
}
#ifndef RS_SERVER
// YUV only supports basic 2d
// so we can stash the plane pointers in the mipmap levels.
if (mHal.state.dimYuv) {
+ mHal.state.lodDimX[1] = mHal.state.lodDimX[0] / 2;
+ mHal.state.lodDimY[1] = mHal.state.lodDimY[0] / 2;
+ mHal.state.lodDimX[2] = mHal.state.lodDimX[0] / 2;
+ mHal.state.lodDimY[2] = mHal.state.lodDimY[0] / 2;
+ mCellCount += mHal.state.lodDimX[1] * mHal.state.lodDimY[1];
+ mCellCount += mHal.state.lodDimX[2] * mHal.state.lodDimY[2];
+
switch(mHal.state.dimYuv) {
case HAL_PIXEL_FORMAT_YV12:
- mHal.state.lodOffset[1] = offset;
- mHal.state.lodDimX[1] = mHal.state.lodDimX[0] / 2;
- mHal.state.lodDimY[1] = mHal.state.lodDimY[0] / 2;
- offset += offset / 4;
- mHal.state.lodOffset[2] = offset;
- mHal.state.lodDimX[2] = mHal.state.lodDimX[0] / 2;
- mHal.state.lodDimY[2] = mHal.state.lodDimY[0] / 2;
- offset += offset / 4;
break;
case HAL_PIXEL_FORMAT_YCrCb_420_SP: // NV21
- mHal.state.lodOffset[1] = offset;
mHal.state.lodDimX[1] = mHal.state.lodDimX[0];
- mHal.state.lodDimY[1] = mHal.state.lodDimY[0] / 2;
- offset += offset / 2;
break;
+#ifndef RS_COMPATIBILITY_LIB
+ case HAL_PIXEL_FORMAT_YCbCr_420_888:
+ break;
+#endif
default:
rsAssert(0);
}
}
#endif
- mTotalSizeBytes = offset;
mHal.state.element = mElement.get();
}
-uint32_t Type::getLODOffset(uint32_t lod, uint32_t x) const {
- uint32_t offset = mHal.state.lodOffset[lod];
- offset += x * mElement->getSizeBytes();
- return offset;
-}
-
-uint32_t Type::getLODOffset(uint32_t lod, uint32_t x, uint32_t y) const {
- uint32_t offset = mHal.state.lodOffset[lod];
- offset += (x + y * mHal.state.lodDimX[lod]) * mElement->getSizeBytes();
- return offset;
-}
-
-uint32_t Type::getLODOffset(uint32_t lod, uint32_t x, uint32_t y, uint32_t z) const {
- uint32_t offset = mHal.state.lodOffset[lod];
- offset += (x +
- y * mHal.state.lodDimX[lod] +
- z * mHal.state.lodDimX[lod] * mHal.state.lodDimY[lod]) * mElement->getSizeBytes();
- return offset;
-}
-
-uint32_t Type::getLODFaceOffset(uint32_t lod, RsAllocationCubemapFace face,
- uint32_t x, uint32_t y) const {
- uint32_t offset = mHal.state.lodOffset[lod];
- offset += (x + y * mHal.state.lodDimX[lod]) * mElement->getSizeBytes();
-
- if (face != 0) {
- uint32_t faceOffset = getSizeBytes() / 6;
- offset += faceOffset * face;
- }
- return offset;
-}
-
void Type::dumpLOGV(const char *prefix) const {
char buf[1024];
ObjectBase::dumpLOGV(prefix);
diff --git a/rsType.h b/rsType.h
index d2bc96b..26dacfc 100644
--- a/rsType.h
+++ b/rsType.h
@@ -47,7 +47,7 @@
uint32_t *lodDimX;
uint32_t *lodDimY;
uint32_t *lodDimZ;
- uint32_t *lodOffset;
+ uint32_t *_unused;
uint32_t lodCount;
uint32_t dimYuv;
bool faces;
@@ -58,10 +58,9 @@
Type * createTex2D(const Element *, size_t w, size_t h, bool mip);
- size_t getOffsetForFace(uint32_t face) const;
-
- size_t getSizeBytes() const {return mTotalSizeBytes;}
+ size_t getCellCount() const {return mCellCount;}
size_t getElementSizeBytes() const {return mElement->getSizeBytes();}
+ size_t getPackedSizeBytes() const {return mCellCount * mElement->getSizeBytes();}
const Element * getElement() const {return mElement.get();}
uint32_t getDimX() const {return mHal.state.dimX;}
@@ -83,16 +82,6 @@
rsAssert(lod < mHal.state.lodCount);
return mHal.state.lodDimZ[lod];
}
- uint32_t getLODOffset(uint32_t lod) const {
- rsAssert(lod < mHal.state.lodCount);
- return mHal.state.lodOffset[lod];
- }
- uint32_t getLODOffset(uint32_t lod, uint32_t x) const;
- uint32_t getLODOffset(uint32_t lod, uint32_t x, uint32_t y) const;
- uint32_t getLODOffset(uint32_t lod, uint32_t x, uint32_t y, uint32_t z) const;
-
- uint32_t getLODFaceOffset(uint32_t lod, RsAllocationCubemapFace face,
- uint32_t x, uint32_t y) const;
uint32_t getLODCount() const {return mHal.state.lodCount;}
bool getIsNp2() const;
@@ -137,8 +126,7 @@
// count of mipmap levels, 0 indicates no mipmapping
- size_t mMipChainSizeBytes;
- size_t mTotalSizeBytes;
+ size_t mCellCount;
protected:
virtual void preDestroy() const;
virtual ~Type();