blob: 4ac93d7b59eddc7553d59ba73cb941a0176a7f55 [file] [log] [blame]
rileya@google.com589708b2012-07-26 20:04:23 +00001
2/*
3 * Copyright 2012 Google Inc.
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
9#include "SkRadialGradient.h"
10#include "SkRadialGradient_Table.h"
11
12#define kSQRT_TABLE_BITS 11
13#define kSQRT_TABLE_SIZE (1 << kSQRT_TABLE_BITS)
14
15#if defined(SK_BUILD_FOR_WIN32) && defined(SK_DEBUG)
16
17#include <stdio.h>
18
19void SkRadialGradient_BuildTable() {
20 // build it 0..127 x 0..127, so we use 2^15 - 1 in the numerator for our "fixed" table
21
22 FILE* file = ::fopen("SkRadialGradient_Table.h", "w");
23 SkASSERT(file);
24 ::fprintf(file, "static const uint8_t gSqrt8Table[] = {\n");
25
26 for (int i = 0; i < kSQRT_TABLE_SIZE; i++) {
27 if ((i & 15) == 0) {
28 ::fprintf(file, "\t");
29 }
30
31 uint8_t value = SkToU8(SkFixedSqrt(i * SK_Fixed1 / kSQRT_TABLE_SIZE) >> 8);
32
33 ::fprintf(file, "0x%02X", value);
34 if (i < kSQRT_TABLE_SIZE-1) {
35 ::fprintf(file, ", ");
36 }
37 if ((i & 15) == 15) {
38 ::fprintf(file, "\n");
39 }
40 }
41 ::fprintf(file, "};\n");
42 ::fclose(file);
43}
44
45#endif
46
47namespace {
48
49void rad_to_unit_matrix(const SkPoint& center, SkScalar radius,
50 SkMatrix* matrix) {
51 SkScalar inv = SkScalarInvert(radius);
52
53 matrix->setTranslate(-center.fX, -center.fY);
54 matrix->postScale(inv, inv);
55}
56
57typedef void (* RadialShade16Proc)(SkScalar sfx, SkScalar sdx,
58 SkScalar sfy, SkScalar sdy,
59 uint16_t* dstC, const uint16_t* cache,
60 int toggle, int count);
61
62void shadeSpan16_radial_clamp(SkScalar sfx, SkScalar sdx,
63 SkScalar sfy, SkScalar sdy,
64 uint16_t* SK_RESTRICT dstC, const uint16_t* SK_RESTRICT cache,
65 int toggle, int count) {
66 const uint8_t* SK_RESTRICT sqrt_table = gSqrt8Table;
67
68 /* knock these down so we can pin against +- 0x7FFF, which is an
69 immediate load, rather than 0xFFFF which is slower. This is a
70 compromise, since it reduces our precision, but that appears
71 to be visually OK. If we decide this is OK for all of our cases,
72 we could (it seems) put this scale-down into fDstToIndex,
73 to avoid having to do these extra shifts each time.
74 */
75 SkFixed fx = SkScalarToFixed(sfx) >> 1;
76 SkFixed dx = SkScalarToFixed(sdx) >> 1;
77 SkFixed fy = SkScalarToFixed(sfy) >> 1;
78 SkFixed dy = SkScalarToFixed(sdy) >> 1;
79 // might perform this check for the other modes,
80 // but the win will be a smaller % of the total
81 if (dy == 0) {
82 fy = SkPin32(fy, -0xFFFF >> 1, 0xFFFF >> 1);
83 fy *= fy;
84 do {
85 unsigned xx = SkPin32(fx, -0xFFFF >> 1, 0xFFFF >> 1);
86 unsigned fi = (xx * xx + fy) >> (14 + 16 - kSQRT_TABLE_BITS);
87 fi = SkFastMin32(fi, 0xFFFF >> (16 - kSQRT_TABLE_BITS));
88 fx += dx;
89 *dstC++ = cache[toggle +
90 (sqrt_table[fi] >> SkGradientShaderBase::kSqrt16Shift)];
91 toggle ^= SkGradientShaderBase::kDitherStride16;
92 } while (--count != 0);
93 } else {
94 do {
95 unsigned xx = SkPin32(fx, -0xFFFF >> 1, 0xFFFF >> 1);
96 unsigned fi = SkPin32(fy, -0xFFFF >> 1, 0xFFFF >> 1);
97 fi = (xx * xx + fi * fi) >> (14 + 16 - kSQRT_TABLE_BITS);
98 fi = SkFastMin32(fi, 0xFFFF >> (16 - kSQRT_TABLE_BITS));
99 fx += dx;
100 fy += dy;
101 *dstC++ = cache[toggle +
102 (sqrt_table[fi] >> SkGradientShaderBase::kSqrt16Shift)];
103 toggle ^= SkGradientShaderBase::kDitherStride16;
104 } while (--count != 0);
105 }
106}
107
108void shadeSpan16_radial_mirror(SkScalar sfx, SkScalar sdx,
109 SkScalar sfy, SkScalar sdy,
110 uint16_t* SK_RESTRICT dstC, const uint16_t* SK_RESTRICT cache,
111 int toggle, int count) {
112 do {
113#ifdef SK_SCALAR_IS_FLOAT
114 float fdist = sk_float_sqrt(sfx*sfx + sfy*sfy);
115 SkFixed dist = SkFloatToFixed(fdist);
116#else
117 SkFixed magnitudeSquared = SkFixedSquare(sfx) +
118 SkFixedSquare(sfy);
119 if (magnitudeSquared < 0) // Overflow.
120 magnitudeSquared = SK_FixedMax;
121 SkFixed dist = SkFixedSqrt(magnitudeSquared);
122#endif
123 unsigned fi = mirror_tileproc(dist);
124 SkASSERT(fi <= 0xFFFF);
125 *dstC++ = cache[toggle + (fi >> SkGradientShaderBase::kCache16Shift)];
126 toggle ^= SkGradientShaderBase::kDitherStride16;
127 sfx += sdx;
128 sfy += sdy;
129 } while (--count != 0);
130}
131
132void shadeSpan16_radial_repeat(SkScalar sfx, SkScalar sdx,
133 SkScalar sfy, SkScalar sdy,
134 uint16_t* SK_RESTRICT dstC, const uint16_t* SK_RESTRICT cache,
135 int toggle, int count) {
136 SkFixed fx = SkScalarToFixed(sfx);
137 SkFixed dx = SkScalarToFixed(sdx);
138 SkFixed fy = SkScalarToFixed(sfy);
139 SkFixed dy = SkScalarToFixed(sdy);
140 do {
141 SkFixed dist = SkFixedSqrt(SkFixedSquare(fx) + SkFixedSquare(fy));
142 unsigned fi = repeat_tileproc(dist);
143 SkASSERT(fi <= 0xFFFF);
144 fx += dx;
145 fy += dy;
146 *dstC++ = cache[toggle + (fi >> SkGradientShaderBase::kCache16Shift)];
147 toggle ^= SkGradientShaderBase::kDitherStride16;
148 } while (--count != 0);
149}
150
151}
152
153SkRadialGradient::SkRadialGradient(const SkPoint& center, SkScalar radius,
154 const SkColor colors[], const SkScalar pos[], int colorCount,
155 SkShader::TileMode mode, SkUnitMapper* mapper)
156 : SkGradientShaderBase(colors, pos, colorCount, mode, mapper),
157 fCenter(center),
158 fRadius(radius)
159{
160 // make sure our table is insync with our current #define for kSQRT_TABLE_SIZE
161 SkASSERT(sizeof(gSqrt8Table) == kSQRT_TABLE_SIZE);
162
163 rad_to_unit_matrix(center, radius, &fPtsToUnit);
164}
165
166void SkRadialGradient::shadeSpan16(int x, int y, uint16_t* dstCParam,
167 int count) {
168 SkASSERT(count > 0);
169
170 uint16_t* SK_RESTRICT dstC = dstCParam;
171
172 SkPoint srcPt;
173 SkMatrix::MapXYProc dstProc = fDstToIndexProc;
174 TileProc proc = fTileProc;
175 const uint16_t* SK_RESTRICT cache = this->getCache16();
176 int toggle = ((x ^ y) & 1) * kDitherStride16;
177
178 if (fDstToIndexClass != kPerspective_MatrixClass) {
179 dstProc(fDstToIndex, SkIntToScalar(x) + SK_ScalarHalf,
180 SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
181
182 SkScalar sdx = fDstToIndex.getScaleX();
183 SkScalar sdy = fDstToIndex.getSkewY();
184
185 if (fDstToIndexClass == kFixedStepInX_MatrixClass) {
186 SkFixed storage[2];
187 (void)fDstToIndex.fixedStepInX(SkIntToScalar(y),
188 &storage[0], &storage[1]);
189 sdx = SkFixedToScalar(storage[0]);
190 sdy = SkFixedToScalar(storage[1]);
191 } else {
192 SkASSERT(fDstToIndexClass == kLinear_MatrixClass);
193 }
194
195 RadialShade16Proc shadeProc = shadeSpan16_radial_repeat;
196 if (SkShader::kClamp_TileMode == fTileMode) {
197 shadeProc = shadeSpan16_radial_clamp;
198 } else if (SkShader::kMirror_TileMode == fTileMode) {
199 shadeProc = shadeSpan16_radial_mirror;
200 } else {
201 SkASSERT(SkShader::kRepeat_TileMode == fTileMode);
202 }
203 (*shadeProc)(srcPt.fX, sdx, srcPt.fY, sdy, dstC,
204 cache, toggle, count);
205 } else { // perspective case
206 SkScalar dstX = SkIntToScalar(x);
207 SkScalar dstY = SkIntToScalar(y);
208 do {
209 dstProc(fDstToIndex, dstX, dstY, &srcPt);
210 unsigned fi = proc(SkScalarToFixed(srcPt.length()));
211 SkASSERT(fi <= 0xFFFF);
212
213 int index = fi >> (16 - kCache16Bits);
214 *dstC++ = cache[toggle + index];
215 toggle ^= kDitherStride16;
216
217 dstX += SK_Scalar1;
218 } while (--count != 0);
219 }
220}
221
222SkShader::BitmapType SkRadialGradient::asABitmap(SkBitmap* bitmap,
223 SkMatrix* matrix, SkShader::TileMode* xy) const {
224 if (bitmap) {
225 this->commonAsABitmap(bitmap);
226 }
227 if (matrix) {
228 matrix->setScale(SkIntToScalar(kGradient32Length),
229 SkIntToScalar(kGradient32Length));
230 matrix->preConcat(fPtsToUnit);
231 }
232 if (xy) {
233 xy[0] = fTileMode;
234 xy[1] = kClamp_TileMode;
235 }
236 return kRadial_BitmapType;
237}
238
239SkShader::GradientType SkRadialGradient::asAGradient(GradientInfo* info) const {
240 if (info) {
241 commonAsAGradient(info);
242 info->fPoint[0] = fCenter;
243 info->fRadius[0] = fRadius;
244 }
245 return kRadial_GradientType;
246}
247
248GrCustomStage* SkRadialGradient::asNewCustomStage(GrContext* context,
249 GrSamplerState* sampler) const {
250 SkASSERT(NULL != context && NULL != sampler);
251 sampler->matrix()->preConcat(fPtsToUnit);
252 sampler->textureParams()->setTileModeX(fTileMode);
253 sampler->textureParams()->setTileModeY(kClamp_TileMode);
254 sampler->textureParams()->setBilerp(true);
255 return SkNEW_ARGS(GrRadialGradient, (context, *this, sampler));
256}
257
258SkRadialGradient::SkRadialGradient(SkFlattenableReadBuffer& buffer)
259 : INHERITED(buffer),
260 fCenter(buffer.readPoint()),
261 fRadius(buffer.readScalar()) {
262}
263
264void SkRadialGradient::flatten(SkFlattenableWriteBuffer& buffer) const {
265 this->INHERITED::flatten(buffer);
266 buffer.writePoint(fCenter);
267 buffer.writeScalar(fRadius);
268}
269
270namespace {
271
272inline bool radial_completely_pinned(int fx, int dx, int fy, int dy) {
273 // fast, overly-conservative test: checks unit square instead
274 // of unit circle
275 bool xClamped = (fx >= SK_FixedHalf && dx >= 0) ||
276 (fx <= -SK_FixedHalf && dx <= 0);
277 bool yClamped = (fy >= SK_FixedHalf && dy >= 0) ||
278 (fy <= -SK_FixedHalf && dy <= 0);
279
280 return xClamped || yClamped;
281}
282
283// Return true if (fx * fy) is always inside the unit circle
284// SkPin32 is expensive, but so are all the SkFixedMul in this test,
285// so it shouldn't be run if count is small.
286inline bool no_need_for_radial_pin(int fx, int dx,
287 int fy, int dy, int count) {
288 SkASSERT(count > 0);
289 if (SkAbs32(fx) > 0x7FFF || SkAbs32(fy) > 0x7FFF) {
290 return false;
291 }
292 if (fx*fx + fy*fy > 0x7FFF*0x7FFF) {
293 return false;
294 }
295 fx += (count - 1) * dx;
296 fy += (count - 1) * dy;
297 if (SkAbs32(fx) > 0x7FFF || SkAbs32(fy) > 0x7FFF) {
298 return false;
299 }
300 return fx*fx + fy*fy <= 0x7FFF*0x7FFF;
301}
302
303#define UNPINNED_RADIAL_STEP \
304 fi = (fx * fx + fy * fy) >> (14 + 16 - kSQRT_TABLE_BITS); \
305 *dstC++ = cache[toggle + \
306 (sqrt_table[fi] >> SkGradientShaderBase::kSqrt32Shift)]; \
307 toggle ^= SkGradientShaderBase::kDitherStride32; \
308 fx += dx; \
309 fy += dy;
310
311typedef void (* RadialShadeProc)(SkScalar sfx, SkScalar sdx,
312 SkScalar sfy, SkScalar sdy,
313 SkPMColor* dstC, const SkPMColor* cache,
314 int count, int toggle);
315
316// On Linux, this is faster with SkPMColor[] params than SkPMColor* SK_RESTRICT
317void shadeSpan_radial_clamp(SkScalar sfx, SkScalar sdx,
318 SkScalar sfy, SkScalar sdy,
319 SkPMColor* SK_RESTRICT dstC, const SkPMColor* SK_RESTRICT cache,
320 int count, int toggle) {
321 // Floating point seems to be slower than fixed point,
322 // even when we have float hardware.
323 const uint8_t* SK_RESTRICT sqrt_table = gSqrt8Table;
324 SkFixed fx = SkScalarToFixed(sfx) >> 1;
325 SkFixed dx = SkScalarToFixed(sdx) >> 1;
326 SkFixed fy = SkScalarToFixed(sfy) >> 1;
327 SkFixed dy = SkScalarToFixed(sdy) >> 1;
328 if ((count > 4) && radial_completely_pinned(fx, dx, fy, dy)) {
329 unsigned fi = SkGradientShaderBase::kGradient32Length;
330 sk_memset32_dither(dstC,
331 cache[toggle + fi],
332 cache[(toggle ^ SkGradientShaderBase::kDitherStride32) + fi],
333 count);
334 } else if ((count > 4) &&
335 no_need_for_radial_pin(fx, dx, fy, dy, count)) {
336 unsigned fi;
337 // 4x unroll appears to be no faster than 2x unroll on Linux
338 while (count > 1) {
339 UNPINNED_RADIAL_STEP;
340 UNPINNED_RADIAL_STEP;
341 count -= 2;
342 }
343 if (count) {
344 UNPINNED_RADIAL_STEP;
345 }
346 }
347 else {
348 // Specializing for dy == 0 gains us 25% on Skia benchmarks
349 if (dy == 0) {
350 unsigned yy = SkPin32(fy, -0xFFFF >> 1, 0xFFFF >> 1);
351 yy *= yy;
352 do {
353 unsigned xx = SkPin32(fx, -0xFFFF >> 1, 0xFFFF >> 1);
354 unsigned fi = (xx * xx + yy) >> (14 + 16 - kSQRT_TABLE_BITS);
355 fi = SkFastMin32(fi, 0xFFFF >> (16 - kSQRT_TABLE_BITS));
356 *dstC++ = cache[toggle + (sqrt_table[fi] >>
357 SkGradientShaderBase::kSqrt32Shift)];
358 toggle ^= SkGradientShaderBase::kDitherStride32;
359 fx += dx;
360 } while (--count != 0);
361 } else {
362 do {
363 unsigned xx = SkPin32(fx, -0xFFFF >> 1, 0xFFFF >> 1);
364 unsigned fi = SkPin32(fy, -0xFFFF >> 1, 0xFFFF >> 1);
365 fi = (xx * xx + fi * fi) >> (14 + 16 - kSQRT_TABLE_BITS);
366 fi = SkFastMin32(fi, 0xFFFF >> (16 - kSQRT_TABLE_BITS));
367 *dstC++ = cache[toggle + (sqrt_table[fi] >>
368 SkGradientShaderBase::kSqrt32Shift)];
369 toggle ^= SkGradientShaderBase::kDitherStride32;
370 fx += dx;
371 fy += dy;
372 } while (--count != 0);
373 }
374 }
375}
376
377// Unrolling this loop doesn't seem to help (when float); we're stalling to
378// get the results of the sqrt (?), and don't have enough extra registers to
379// have many in flight.
380void shadeSpan_radial_mirror(SkScalar sfx, SkScalar sdx,
381 SkScalar sfy, SkScalar sdy,
382 SkPMColor* SK_RESTRICT dstC, const SkPMColor* SK_RESTRICT cache,
383 int count, int toggle) {
384 do {
385#ifdef SK_SCALAR_IS_FLOAT
386 float fdist = sk_float_sqrt(sfx*sfx + sfy*sfy);
387 SkFixed dist = SkFloatToFixed(fdist);
388#else
389 SkFixed magnitudeSquared = SkFixedSquare(sfx) +
390 SkFixedSquare(sfy);
391 if (magnitudeSquared < 0) // Overflow.
392 magnitudeSquared = SK_FixedMax;
393 SkFixed dist = SkFixedSqrt(magnitudeSquared);
394#endif
395 unsigned fi = mirror_tileproc(dist);
396 SkASSERT(fi <= 0xFFFF);
397 *dstC++ = cache[toggle + (fi >> SkGradientShaderBase::kCache32Shift)];
398 toggle ^= SkGradientShaderBase::kDitherStride32;
399 sfx += sdx;
400 sfy += sdy;
401 } while (--count != 0);
402}
403
404void shadeSpan_radial_repeat(SkScalar sfx, SkScalar sdx,
405 SkScalar sfy, SkScalar sdy,
406 SkPMColor* SK_RESTRICT dstC, const SkPMColor* SK_RESTRICT cache,
407 int count, int toggle) {
408 SkFixed fx = SkScalarToFixed(sfx);
409 SkFixed dx = SkScalarToFixed(sdx);
410 SkFixed fy = SkScalarToFixed(sfy);
411 SkFixed dy = SkScalarToFixed(sdy);
412 do {
413 SkFixed magnitudeSquared = SkFixedSquare(fx) +
414 SkFixedSquare(fy);
415 if (magnitudeSquared < 0) // Overflow.
416 magnitudeSquared = SK_FixedMax;
417 SkFixed dist = SkFixedSqrt(magnitudeSquared);
418 unsigned fi = repeat_tileproc(dist);
419 SkASSERT(fi <= 0xFFFF);
420 *dstC++ = cache[toggle + (fi >> SkGradientShaderBase::kCache32Shift)];
421 toggle ^= SkGradientShaderBase::kDitherStride32;
422 fx += dx;
423 fy += dy;
424 } while (--count != 0);
425}
426}
427
428void SkRadialGradient::shadeSpan(int x, int y,
429 SkPMColor* SK_RESTRICT dstC, int count) {
430 SkASSERT(count > 0);
431
432 SkPoint srcPt;
433 SkMatrix::MapXYProc dstProc = fDstToIndexProc;
434 TileProc proc = fTileProc;
435 const SkPMColor* SK_RESTRICT cache = this->getCache32();
436#ifdef USE_DITHER_32BIT_GRADIENT
437 int toggle = ((x ^ y) & 1) * SkGradientShaderBase::kDitherStride32;
438#else
439 int toggle = 0;
440#endif
441
442 if (fDstToIndexClass != kPerspective_MatrixClass) {
443 dstProc(fDstToIndex, SkIntToScalar(x) + SK_ScalarHalf,
444 SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
445 SkScalar sdx = fDstToIndex.getScaleX();
446 SkScalar sdy = fDstToIndex.getSkewY();
447
448 if (fDstToIndexClass == kFixedStepInX_MatrixClass) {
449 SkFixed storage[2];
450 (void)fDstToIndex.fixedStepInX(SkIntToScalar(y),
451 &storage[0], &storage[1]);
452 sdx = SkFixedToScalar(storage[0]);
453 sdy = SkFixedToScalar(storage[1]);
454 } else {
455 SkASSERT(fDstToIndexClass == kLinear_MatrixClass);
456 }
457
458 RadialShadeProc shadeProc = shadeSpan_radial_repeat;
459 if (SkShader::kClamp_TileMode == fTileMode) {
460 shadeProc = shadeSpan_radial_clamp;
461 } else if (SkShader::kMirror_TileMode == fTileMode) {
462 shadeProc = shadeSpan_radial_mirror;
463 } else {
464 SkASSERT(SkShader::kRepeat_TileMode == fTileMode);
465 }
466 (*shadeProc)(srcPt.fX, sdx, srcPt.fY, sdy, dstC, cache, count, toggle);
467 } else { // perspective case
468 SkScalar dstX = SkIntToScalar(x);
469 SkScalar dstY = SkIntToScalar(y);
470 do {
471 dstProc(fDstToIndex, dstX, dstY, &srcPt);
472 unsigned fi = proc(SkScalarToFixed(srcPt.length()));
473 SkASSERT(fi <= 0xFFFF);
474 *dstC++ = cache[fi >> SkGradientShaderBase::kCache32Shift];
475 dstX += SK_Scalar1;
476 } while (--count != 0);
477 }
478}
479