use SkClampRange to speedup clamp iterators
speeds up linear gradients 4x
git-svn-id: http://skia.googlecode.com/svn/trunk@1139 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/effects/SkGradientShader.cpp b/src/effects/SkGradientShader.cpp
index b212dfe..ea90e26 100644
--- a/src/effects/SkGradientShader.cpp
+++ b/src/effects/SkGradientShader.cpp
@@ -27,6 +27,24 @@
#define USE_DITHER_32BIT_GRADIENT
#endif
+static void sk_memset32_dither(uint32_t dst[], uint32_t v0, uint32_t v1,
+ int count) {
+ if (count > 0) {
+ if (v0 == v1) {
+ sk_memset32(dst, v0, count);
+ } else {
+ int pairs = count >> 1;
+ for (int i = 0; i < pairs; i++) {
+ *dst++ = v0;
+ *dst++ = v1;
+ }
+ if (count & 1) {
+ *dst = v0;
+ }
+ }
+ }
+}
+
///////////////////////////////////////////////////////////////////////////////
typedef SkFixed (*TileProc)(SkFixed);
@@ -794,6 +812,16 @@
return (unsigned)((fx | (fx + (count - 1) * dx)) >> 8) <= 0xFF;
}
+#include "SkClampRange.h"
+
+#define NO_CHECK_ITER \
+ fi = fx >> 8; \
+ SkASSERT(fi <= 0xFF); \
+ fx += dx; \
+ *dstC++ = cache[toggle + fi]; \
+ toggle ^= TOGGLE_MASK
+
+
void Linear_Gradient::shadeSpan(int x, int y, SkPMColor dstC[], int count) {
SkASSERT(count > 0);
@@ -830,6 +858,7 @@
// TODO: dither version
sk_memset32(dstC, cache[fi >> (16 - kCache32Bits)], count);
} else if (proc == clamp_tileproc) {
+#if 0
do {
unsigned fi = SkClampMax(fx >> 8, 0xFF);
SkASSERT(fi <= 0xFF);
@@ -837,6 +866,39 @@
*dstC++ = cache[toggle + fi];
toggle ^= TOGGLE_MASK;
} while (--count != 0);
+#else
+ SkClampRange range;
+ range.init(fx, dx, count, 0, 0xFF);
+
+ if ((count = range.fCount0) > 0) {
+ sk_memset32_dither(dstC,
+ cache[toggle + range.fV0],
+ cache[(toggle ^ TOGGLE_MASK) + range.fV0],
+ count);
+ dstC += count;
+ }
+ if ((count = range.fCount1) > 0) {
+ unsigned fi;
+ int i, unroll = count >> 3;
+ for (i = 0; i < unroll; i++) {
+ NO_CHECK_ITER; NO_CHECK_ITER;
+ NO_CHECK_ITER; NO_CHECK_ITER;
+ NO_CHECK_ITER; NO_CHECK_ITER;
+ NO_CHECK_ITER; NO_CHECK_ITER;
+ }
+ if ((count &= 7) > 0) {
+ do {
+ NO_CHECK_ITER;
+ } while (--count != 0);
+ }
+ }
+ if ((count = range.fCount2) > 0) {
+ sk_memset32_dither(dstC,
+ cache[toggle + range.fV1],
+ cache[(toggle ^ TOGGLE_MASK) + range.fV1],
+ count);
+ }
+#endif
} else if (proc == mirror_tileproc) {
do {
unsigned fi = mirror_8bits(fx >> 8);
@@ -911,6 +973,14 @@
}
}
+#define NO_CHECK_ITER_16 \
+ fi = fx >> kCache16Shift; \
+ SkASSERT(fi <= kCache16Mask); \
+ fx += dx; \
+ *dstC++ = cache[toggle + fi]; \
+ toggle ^= TOGGLE_MASK
+
+
void Linear_Gradient::shadeSpan16(int x, int y, uint16_t dstC[], int count) {
SkASSERT(count > 0);
@@ -919,6 +989,7 @@
TileProc proc = fTileProc;
const uint16_t* cache = this->getCache16();
int toggle = ((x ^ y) & 1) << kCache16Bits;
+ const int TOGGLE_MASK = (1 << kCache32Bits);
if (fDstToIndexClass != kPerspective_MatrixClass) {
dstProc(fDstToIndex, SkIntToScalar(x) + SK_ScalarHalf,
@@ -938,22 +1009,57 @@
// we're a vertical gradient, so no change in a span
unsigned fi = proc(fx) >> kCache16Shift;
SkASSERT(fi <= kCache16Mask);
- dither_memset16(dstC, cache[toggle + fi], cache[(toggle ^ (1 << kCache16Bits)) + fi], count);
+ dither_memset16(dstC, cache[toggle + fi],
+ cache[(toggle ^ TOGGLE_MASK) + fi], count);
} else if (proc == clamp_tileproc) {
+#if 0
do {
unsigned fi = SkClampMax(fx >> kCache16Shift, kCache16Mask);
SkASSERT(fi <= kCache16Mask);
fx += dx;
*dstC++ = cache[toggle + fi];
- toggle ^= (1 << kCache16Bits);
+ toggle ^= TOGGLE_MASK;
} while (--count != 0);
+#else
+ SkClampRange range;
+ range.init(fx, dx, count, 0, kCache16Mask);
+
+ if ((count = range.fCount0) > 0) {
+ dither_memset16(dstC,
+ cache[toggle + range.fV0],
+ cache[(toggle ^ TOGGLE_MASK) + range.fV0],
+ count);
+ dstC += count;
+ }
+ if ((count = range.fCount1) > 0) {
+ unsigned fi;
+ int i, unroll = count >> 3;
+ for (i = 0; i < unroll; i++) {
+ NO_CHECK_ITER_16; NO_CHECK_ITER_16;
+ NO_CHECK_ITER_16; NO_CHECK_ITER_16;
+ NO_CHECK_ITER_16; NO_CHECK_ITER_16;
+ NO_CHECK_ITER_16; NO_CHECK_ITER_16;
+ }
+ if ((count &= 7) > 0) {
+ do {
+ NO_CHECK_ITER_16;
+ } while (--count != 0);
+ }
+ }
+ if ((count = range.fCount2) > 0) {
+ dither_memset16(dstC,
+ cache[toggle + range.fV1],
+ cache[(toggle ^ TOGGLE_MASK) + range.fV1],
+ count);
+ }
+#endif
} else if (proc == mirror_tileproc) {
do {
unsigned fi = mirror_bits(fx >> kCache16Shift, kCache16Bits);
SkASSERT(fi <= kCache16Mask);
fx += dx;
*dstC++ = cache[toggle + fi];
- toggle ^= (1 << kCache16Bits);
+ toggle ^= TOGGLE_MASK;
} while (--count != 0);
} else {
SkASSERT(proc == repeat_tileproc);
@@ -962,7 +1068,7 @@
SkASSERT(fi <= kCache16Mask);
fx += dx;
*dstC++ = cache[toggle + fi];
- toggle ^= (1 << kCache16Bits);
+ toggle ^= TOGGLE_MASK;
} while (--count != 0);
}
} else {
@@ -975,7 +1081,7 @@
int index = fi >> kCache16Shift;
*dstC++ = cache[toggle + index];
- toggle ^= (1 << kCache16Bits);
+ toggle ^= TOGGLE_MASK;
dstX += SK_Scalar1;
} while (--count != 0);