Merge "Fixing some bugs and inefficiencies"
diff --git a/lib/ScriptCRT/rs_sample.c b/lib/ScriptCRT/rs_sample.c
index bf192c5..e3fd29d 100644
--- a/lib/ScriptCRT/rs_sample.c
+++ b/lib/ScriptCRT/rs_sample.c
@@ -6,7 +6,7 @@
* Allocation sampling
*/
static const void * __attribute__((overloadable))
- getElementAt1D(rs_allocation a, uint32_t x, uint32_t lod) {
+ getElementAt(rs_allocation a, uint32_t x, uint32_t lod) {
Allocation_t *alloc = (Allocation_t *)a.p;
const Type_t *type = (const Type_t*)alloc->mHal.state.type;
const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.mallocPtr;
@@ -31,18 +31,18 @@
}
static const void * __attribute__((overloadable))
- getElementAt(rs_allocation a, int2 uv, uint32_t lod) {
+ getElementAt(rs_allocation a, uint2 uv, uint32_t lod) {
return getElementAt(a, uv.x, uv.y, lod);
}
-static int32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
+static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
if (wrap == RS_SAMPLER_WRAP) {
coord = coord % size;
if (coord < 0) {
coord += size;
}
}
- return max(0, min(coord, size - 1));
+ return (uint32_t)max(0, min(coord, size - 1));
}
// 565 Conversion bits taken from SkBitmap
@@ -89,9 +89,9 @@
#define SAMPLE_1D_FUNC(vecsize, intype, outtype, convert) \
static outtype __attribute__((overloadable)) \
getSample##vecsize(rs_allocation a, float2 weights, \
- int iPixel, int next, uint32_t lod) { \
- intype *p0c = (intype*)getElementAt1D(a, iPixel, lod); \
- intype *p1c = (intype*)getElementAt1D(a, next, lod); \
+ uint32_t iPixel, uint32_t next, uint32_t lod) { \
+ intype *p0c = (intype*)getElementAt(a, iPixel, lod); \
+ intype *p1c = (intype*)getElementAt(a, next, lod); \
outtype p0 = convert(*p0c); \
outtype p1 = convert(*p1c); \
return p0 * weights.x + p1 * weights.y; \
@@ -99,7 +99,7 @@
#define SAMPLE_2D_FUNC(vecsize, intype, outtype, convert) \
static outtype __attribute__((overloadable)) \
getSample##vecsize(rs_allocation a, float4 weights, \
- int2 iPixel, int2 next, uint32_t lod) { \
+ uint2 iPixel, uint2 next, uint32_t lod) { \
intype *p0c = (intype*)getElementAt(a, iPixel.x, iPixel.y, lod); \
intype *p1c = (intype*)getElementAt(a, next.x, iPixel.y, lod); \
intype *p2c = (intype*)getElementAt(a, iPixel.x, next.y, lod); \
@@ -150,15 +150,18 @@
} \
\
if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) { \
- lod = clamp(lod, 0.0f, (float)(type->mHal.state.lodCount - 1)); \
+ uint32_t maxLOD = type->mHal.state.lodCount - 1; \
+ lod = min(lod, (float)maxLOD); \
uint32_t nearestLOD = (uint32_t)round(lod); \
return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, nearestLOD); \
} \
\
if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) { \
- lod = clamp(lod, 0.0f, (float)(type->mHal.state.lodCount - 1)); \
uint32_t lod0 = (uint32_t)floor(lod); \
uint32_t lod1 = (uint32_t)ceil(lod); \
+ uint32_t maxLOD = type->mHal.state.lodCount - 1; \
+ lod0 = min(lod0, maxLOD); \
+ lod1 = min(lod1, maxLOD); \
float4 sample0 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod0); \
float4 sample1 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod1); \
float frac = lod - (float)lod0; \
@@ -224,26 +227,26 @@
static float4 __attribute__((overloadable))
getBilinearSample(rs_allocation a, float2 weights,
- int32_t iPixel, int32_t next,
+ uint32_t iPixel, uint32_t next,
uint32_t vecSize, rs_data_type dt, uint32_t lod) {
BILINEAR_SAMPLE_BODY()
}
static float4 __attribute__((overloadable))
getBilinearSample(rs_allocation a, float4 weights,
- int2 iPixel, int2 next,
+ uint2 iPixel, uint2 next,
uint32_t vecSize, rs_data_type dt, uint32_t lod) {
BILINEAR_SAMPLE_BODY()
}
static float4 __attribute__((overloadable))
- getNearestSample(rs_allocation a, int32_t iPixel, uint32_t vecSize,
+ getNearestSample(rs_allocation a, uint32_t iPixel, uint32_t vecSize,
rs_data_type dt, uint32_t lod) {
NEAREST_SAMPLE_BODY()
}
static float4 __attribute__((overloadable))
- getNearestSample(rs_allocation a, int2 iPixel, uint32_t vecSize,
+ getNearestSample(rs_allocation a, uint2 iPixel, uint32_t vecSize,
rs_data_type dt, uint32_t lod) {
NEAREST_SAMPLE_BODY()
}
@@ -272,10 +275,10 @@
weights.x = oneMinusFrac;
weights.y = frac;
- int32_t next = wrapI(wrapS, iPixel + 1, sourceW);
- iPixel = wrapI(wrapS, iPixel, sourceW);
+ uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
+ uint32_t location = wrapI(wrapS, iPixel, sourceW);
- return getBilinearSample(a, weights, iPixel, next, vecSize, dt, lod);
+ return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
}
static float4 __attribute__((overloadable))
@@ -286,9 +289,9 @@
rs_sampler_value wrapS = rsgSamplerGetWrapS(s);
int32_t sourceW = type->mHal.state.lodDimX[lod];
int32_t iPixel = (int32_t)(uv * (float)(sourceW));
- iPixel = wrapI(wrapS, iPixel, sourceW);
+ uint32_t location = wrapI(wrapS, iPixel, sourceW);
- return getNearestSample(a, iPixel, vecSize, dt, lod);
+ return getNearestSample(a, location, vecSize, dt, lod);
}
static float4 __attribute__((overloadable))
@@ -330,13 +333,14 @@
weights.z = oneMinusFrac.x * frac.y;
weights.w = frac.x * frac.y;
- int2 next;
+ uint2 next;
next.x = wrapI(wrapS, iPixel.x + 1, sourceW);
next.y = wrapI(wrapT, iPixel.y + 1, sourceH);
- iPixel.x = wrapI(wrapS, iPixel.x, sourceW);
- iPixel.y = wrapI(wrapT, iPixel.y, sourceH);
+ uint2 location;
+ location.x = wrapI(wrapS, iPixel.x, sourceW);
+ location.y = wrapI(wrapT, iPixel.y, sourceH);
- return getBilinearSample(a, weights, iPixel, next, vecSize, dt, lod);
+ return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
}
static float4 __attribute__((overloadable))
@@ -355,9 +359,10 @@
dimF.y = (float)(sourceH);
int2 iPixel = convert_int2(uv * dimF);
- iPixel.x = wrapI(wrapS, iPixel.x, sourceW);
- iPixel.y = wrapI(wrapT, iPixel.y, sourceH);
- return getNearestSample(a, iPixel, vecSize, dt, lod);
+ uint2 location;
+ location.x = wrapI(wrapS, iPixel.x, sourceW);
+ location.y = wrapI(wrapT, iPixel.y, sourceH);
+ return getNearestSample(a, location, vecSize, dt, lod);
}
extern const float4 __attribute__((overloadable))
@@ -379,16 +384,3 @@
rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
SAMPLE_FUNC_BODY()
}
-
-// TODO: implement cubemap lookups
-extern const float4 __attribute__((overloadable))
- rsSample(rs_allocation a, rs_sampler s, float3 location) {
- return rsSample(a, s, location, 0.0f);
-}
-
-// TODO: implement cubemap lookups
-extern const float4 __attribute__((overloadable))
- rsSample(rs_allocation a, rs_sampler s, float3 location, float lod) {
- float4 result;
- return result;
-}