blob: a44f439e6f016952e2514de96c6c9d33bbf82991 [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000015// scale factor for the blur radius to match the behavior of the all existing blur
humper@google.com7c7292c2013-01-04 20:29:03 +000016// code (both on the CPU and the GPU). This magic constant is 1/sqrt(3).
17
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000018// TODO: get rid of this fudge factor and move any required fudging up into
humper@google.com7c7292c2013-01-04 20:29:03 +000019// the calling library
20
21#define kBlurRadiusFudgeFactor SkFloatToScalar( .57735f )
22
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000023#define UNROLL_SEPARABLE_LOOPS
24
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000025/**
26 * This function performs a box blur in X, of the given radius. If the
skia.committer@gmail.com884e60b2012-11-16 02:01:17 +000027 * "transpose" parameter is true, it will transpose the pixels on write,
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000028 * such that X and Y are swapped. Reads are always performed from contiguous
29 * memory in X, for speed. The destination buffer (dst) must be at least
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000030 * (width + leftRadius + rightRadius) * height bytes in size.
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000031 */
32static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000033 int leftRadius, int rightRadius, int width, int height,
34 bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000035{
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000036 int diameter = leftRadius + rightRadius;
37 int kernelSize = diameter + 1;
38 int border = SkMin32(width, diameter);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000039 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000040 int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000041 int dst_x_stride = transpose ? height : 1;
42 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000043 for (int y = 0; y < height; ++y) {
44 int sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000045 uint8_t* dptr = dst + y * dst_y_stride;
46 const uint8_t* right = src + y * src_y_stride;
47 const uint8_t* left = right;
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +000048 for (int x = 0; x < rightRadius - leftRadius; x++) {
49 *dptr = 0;
50 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000051 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000052#define LEFT_BORDER_ITER \
53 sum += *right++; \
54 *dptr = (sum * scale) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000055 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000056
57 int x = 0;
58#ifdef UNROLL_SEPARABLE_LOOPS
59 for (; x < border - 16; x += 16) {
60 LEFT_BORDER_ITER
61 LEFT_BORDER_ITER
62 LEFT_BORDER_ITER
63 LEFT_BORDER_ITER
64 LEFT_BORDER_ITER
65 LEFT_BORDER_ITER
66 LEFT_BORDER_ITER
67 LEFT_BORDER_ITER
68 LEFT_BORDER_ITER
69 LEFT_BORDER_ITER
70 LEFT_BORDER_ITER
71 LEFT_BORDER_ITER
72 LEFT_BORDER_ITER
73 LEFT_BORDER_ITER
74 LEFT_BORDER_ITER
75 LEFT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000076 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000077#endif
78 for (; x < border; ++x) {
79 LEFT_BORDER_ITER
80 }
81#undef LEFT_BORDER_ITER
82#define TRIVIAL_ITER \
83 *dptr = (sum * scale) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000084 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000085 x = width;
86#ifdef UNROLL_SEPARABLE_LOOPS
87 for (; x < diameter - 16; x += 16) {
88 TRIVIAL_ITER
89 TRIVIAL_ITER
90 TRIVIAL_ITER
91 TRIVIAL_ITER
92 TRIVIAL_ITER
93 TRIVIAL_ITER
94 TRIVIAL_ITER
95 TRIVIAL_ITER
96 TRIVIAL_ITER
97 TRIVIAL_ITER
98 TRIVIAL_ITER
99 TRIVIAL_ITER
100 TRIVIAL_ITER
101 TRIVIAL_ITER
102 TRIVIAL_ITER
103 TRIVIAL_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000104 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000105#endif
106 for (; x < diameter; ++x) {
107 TRIVIAL_ITER
108 }
109#undef TRIVIAL_ITER
110#define CENTER_ITER \
111 sum += *right++; \
112 *dptr = (sum * scale) >> 24; \
113 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000114 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000115
116 x = diameter;
117#ifdef UNROLL_SEPARABLE_LOOPS
118 for (; x < width - 16; x += 16) {
119 CENTER_ITER
120 CENTER_ITER
121 CENTER_ITER
122 CENTER_ITER
123 CENTER_ITER
124 CENTER_ITER
125 CENTER_ITER
126 CENTER_ITER
127 CENTER_ITER
128 CENTER_ITER
129 CENTER_ITER
130 CENTER_ITER
131 CENTER_ITER
132 CENTER_ITER
133 CENTER_ITER
134 CENTER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000135 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000136#endif
137 for (; x < width; ++x) {
138 CENTER_ITER
139 }
140#undef CENTER_ITER
141#define RIGHT_BORDER_ITER \
142 *dptr = (sum * scale) >> 24; \
143 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000144 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000145
146 x = 0;
147#ifdef UNROLL_SEPARABLE_LOOPS
148 for (; x < border - 16; x += 16) {
149 RIGHT_BORDER_ITER
150 RIGHT_BORDER_ITER
151 RIGHT_BORDER_ITER
152 RIGHT_BORDER_ITER
153 RIGHT_BORDER_ITER
154 RIGHT_BORDER_ITER
155 RIGHT_BORDER_ITER
156 RIGHT_BORDER_ITER
157 RIGHT_BORDER_ITER
158 RIGHT_BORDER_ITER
159 RIGHT_BORDER_ITER
160 RIGHT_BORDER_ITER
161 RIGHT_BORDER_ITER
162 RIGHT_BORDER_ITER
163 RIGHT_BORDER_ITER
164 RIGHT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000165 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000166#endif
167 for (; x < border; ++x) {
168 RIGHT_BORDER_ITER
169 }
170#undef RIGHT_BORDER_ITER
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +0000171 for (int x = 0; x < leftRadius - rightRadius; x++) {
172 *dptr = 0;
173 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000174 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000175 SkASSERT(sum == 0);
176 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000177 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000178}
179
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000180/**
181 * This variant of the box blur handles blurring of non-integer radii. It
182 * keeps two running sums: an outer sum for the rounded-up kernel radius, and
183 * an inner sum for the rounded-down kernel radius. For each pixel, it linearly
184 * interpolates between them. In float this would be:
185 * outer_weight * outer_sum / kernelSize +
186 * (1.0 - outer_weight) * innerSum / (kernelSize - 2)
187 */
188static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
189 int radius, int width, int height,
190 bool transpose, uint8_t outer_weight)
191{
192 int diameter = radius * 2;
193 int kernelSize = diameter + 1;
194 int border = SkMin32(width, diameter);
195 int inner_weight = 255 - outer_weight;
196 outer_weight += outer_weight >> 7;
197 inner_weight += inner_weight >> 7;
198 uint32_t outer_scale = (outer_weight << 16) / kernelSize;
199 uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
200 int new_width = width + diameter;
201 int dst_x_stride = transpose ? height : 1;
202 int dst_y_stride = transpose ? 1 : new_width;
203 for (int y = 0; y < height; ++y) {
204 int outer_sum = 0, inner_sum = 0;
205 uint8_t* dptr = dst + y * dst_y_stride;
206 const uint8_t* right = src + y * src_y_stride;
207 const uint8_t* left = right;
208 int x = 0;
209
210#define LEFT_BORDER_ITER \
211 inner_sum = outer_sum; \
212 outer_sum += *right++; \
213 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
214 dptr += dst_x_stride;
215
216#ifdef UNROLL_SEPARABLE_LOOPS
217 for (;x < border - 16; x += 16) {
218 LEFT_BORDER_ITER
219 LEFT_BORDER_ITER
220 LEFT_BORDER_ITER
221 LEFT_BORDER_ITER
222 LEFT_BORDER_ITER
223 LEFT_BORDER_ITER
224 LEFT_BORDER_ITER
225 LEFT_BORDER_ITER
226 LEFT_BORDER_ITER
227 LEFT_BORDER_ITER
228 LEFT_BORDER_ITER
229 LEFT_BORDER_ITER
230 LEFT_BORDER_ITER
231 LEFT_BORDER_ITER
232 LEFT_BORDER_ITER
233 LEFT_BORDER_ITER
234 }
235#endif
236
237 for (;x < border; x++) {
238 LEFT_BORDER_ITER
239 }
240#undef LEFT_BORDER_ITER
241 for (int x = width; x < diameter; ++x) {
242 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24;
243 dptr += dst_x_stride;
244 }
245 x = diameter;
246
247#define CENTER_ITER \
248 inner_sum = outer_sum - *left; \
249 outer_sum += *right++; \
250 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
251 dptr += dst_x_stride; \
252 outer_sum -= *left++;
253
254#ifdef UNROLL_SEPARABLE_LOOPS
255 for (; x < width - 16; x += 16) {
256 CENTER_ITER
257 CENTER_ITER
258 CENTER_ITER
259 CENTER_ITER
260 CENTER_ITER
261 CENTER_ITER
262 CENTER_ITER
263 CENTER_ITER
264 CENTER_ITER
265 CENTER_ITER
266 CENTER_ITER
267 CENTER_ITER
268 CENTER_ITER
269 CENTER_ITER
270 CENTER_ITER
271 CENTER_ITER
272 }
273#endif
274 for (; x < width; ++x) {
275 CENTER_ITER
276 }
277#undef CENTER_ITER
278
279 #define RIGHT_BORDER_ITER \
280 inner_sum = outer_sum - *left++; \
281 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
282 dptr += dst_x_stride; \
283 outer_sum = inner_sum;
284
285 x = 0;
286#ifdef UNROLL_SEPARABLE_LOOPS
287 for (; x < border - 16; x += 16) {
288 RIGHT_BORDER_ITER
289 RIGHT_BORDER_ITER
290 RIGHT_BORDER_ITER
291 RIGHT_BORDER_ITER
292 RIGHT_BORDER_ITER
293 RIGHT_BORDER_ITER
294 RIGHT_BORDER_ITER
295 RIGHT_BORDER_ITER
296 RIGHT_BORDER_ITER
297 RIGHT_BORDER_ITER
298 RIGHT_BORDER_ITER
299 RIGHT_BORDER_ITER
300 RIGHT_BORDER_ITER
301 RIGHT_BORDER_ITER
302 RIGHT_BORDER_ITER
303 RIGHT_BORDER_ITER
304 }
305#endif
306 for (; x < border; x++) {
307 RIGHT_BORDER_ITER
308 }
309#undef RIGHT_BORDER_ITER
310 SkASSERT(outer_sum == 0 && inner_sum == 0);
311 }
312 return new_width;
313}
314
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000315static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
316{
317 *loRadius = *hiRadius = SkScalarCeil(passRadius);
318 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
319 *loRadius = *hiRadius - 1;
320 }
321}
322
tomhudson@google.com01224d52011-11-28 18:22:01 +0000323// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
324// breakeven on Mac, and ~15% slowdown on Linux.
325// Reading a word at a time when bulding the sum buffer seems to give
326// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +0000327#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +0000328#define UNROLL_KERNEL_LOOP 1
329#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +0000330
reed@android.com45607672009-09-21 00:27:08 +0000331/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
332 src values at their position, plus all values above and to the left.
333 When we sample into this buffer, we need an initial row and column of 0s,
334 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000335
reed@android.com45607672009-09-21 00:27:08 +0000336 src[i, j] == sum[i+1, j+1]
337 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000338
reed@android.com45607672009-09-21 00:27:08 +0000339 We assume that the sum buffer's stride == its width
340 */
reed@google.com03016a32011-08-12 14:59:59 +0000341static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
342 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +0000343 int sumW = srcW + 1;
344
345 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000346 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000347 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000348
349 int x, y;
350
reed@android.com45607672009-09-21 00:27:08 +0000351 // zero out the top row and column
352 memset(sum, 0, sumW * sizeof(sum[0]));
353 sum += sumW;
354
reed@android.com8a1c16f2008-12-17 15:59:43 +0000355 // special case first row
356 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000357 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000358 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000359 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000360 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000361 }
362 src += srcRB;
363
364 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000365 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000366 uint32_t L = 0;
367 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000368 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000369
370 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
371 uint32_t T = sum[-sumW];
372 X = *src++ + L + T - C;
373 *sum++ = X;
374 L = X;
375 C = T;
376 }
377
378 for (; x >= 4; x-=4) {
379 uint32_t T = sum[-sumW];
380 X = *src++ + L + T - C;
381 *sum++ = X;
382 L = X;
383 C = T;
384 T = sum[-sumW];
385 X = *src++ + L + T - C;
386 *sum++ = X;
387 L = X;
388 C = T;
389 T = sum[-sumW];
390 X = *src++ + L + T - C;
391 *sum++ = X;
392 L = X;
393 C = T;
394 T = sum[-sumW];
395 X = *src++ + L + T - C;
396 *sum++ = X;
397 L = X;
398 C = T;
399 }
400
401 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000402 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000403 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000404 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000405 L = X;
406 C = T;
407 }
408 src += srcRB;
409 }
410}
411
reed@google.com03016a32011-08-12 14:59:59 +0000412/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000413 * This is the path for apply_kernel() to be taken when the kernel
414 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000415 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000416static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
417 int sw, int sh) {
418 SkASSERT(2*rx > sw);
419
reed@android.com8a1c16f2008-12-17 15:59:43 +0000420 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
421
reed@android.com45607672009-09-21 00:27:08 +0000422 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000423
424 int dw = sw + 2*rx;
425 int dh = sh + 2*ry;
426
reed@android.com45607672009-09-21 00:27:08 +0000427 int prev_y = -2*ry;
428 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000429
reed@android.com45607672009-09-21 00:27:08 +0000430 for (int y = 0; y < dh; y++) {
431 int py = SkClampPos(prev_y) * sumStride;
432 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000433
reed@android.com45607672009-09-21 00:27:08 +0000434 int prev_x = -2*rx;
435 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000436
reed@android.com45607672009-09-21 00:27:08 +0000437 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000438 int px = SkClampPos(prev_x);
439 int nx = SkFastMin32(next_x, sw);
440
reed@android.com45607672009-09-21 00:27:08 +0000441 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
442 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000443
444 prev_x += 1;
445 next_x += 1;
446 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000447
448 prev_y += 1;
449 next_y += 1;
450 }
451}
452/**
453 * sw and sh are the width and height of the src. Since the sum buffer
454 * matches that, but has an extra row and col at the beginning (with zeros),
455 * we can just use sw and sh as our "max" values for pinning coordinates
456 * when sampling into sum[][]
457 *
458 * The inner loop is conceptually simple; we break it into several sections
459 * to improve performance. Here's the original version:
460 for (int x = 0; x < dw; x++) {
461 int px = SkClampPos(prev_x);
462 int nx = SkFastMin32(next_x, sw);
463
464 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
465 *dst++ = SkToU8(tmp * scale >> 24);
466
467 prev_x += 1;
468 next_x += 1;
469 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000470 * The sections are:
471 * left-hand section, where prev_x is clamped to 0
472 * center section, where neither prev_x nor next_x is clamped
473 * right-hand section, where next_x is clamped to sw
474 * On some operating systems, the center section is unrolled for additional
475 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000476*/
477static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
478 int sw, int sh) {
479 if (2*rx > sw) {
480 kernel_clamped(dst, rx, ry, sum, sw, sh);
481 return;
482 }
483
484 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
485
486 int sumStride = sw + 1;
487
488 int dw = sw + 2*rx;
489 int dh = sh + 2*ry;
490
491 int prev_y = -2*ry;
492 int next_y = 1;
493
494 SkASSERT(2*rx <= dw - 2*rx);
495
496 for (int y = 0; y < dh; y++) {
497 int py = SkClampPos(prev_y) * sumStride;
498 int ny = SkFastMin32(next_y, sh) * sumStride;
499
500 int prev_x = -2*rx;
501 int next_x = 1;
502 int x = 0;
503
504 for (; x < 2*rx; x++) {
505 SkASSERT(prev_x <= 0);
506 SkASSERT(next_x <= sw);
507
508 int px = 0;
509 int nx = next_x;
510
511 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
512 *dst++ = SkToU8(tmp * scale >> 24);
513
514 prev_x += 1;
515 next_x += 1;
516 }
517
tomhudson@google.com01224d52011-11-28 18:22:01 +0000518 int i0 = prev_x + py;
519 int i1 = next_x + ny;
520 int i2 = next_x + py;
521 int i3 = prev_x + ny;
522
523#if UNROLL_KERNEL_LOOP
524 for (; x < dw - 2*rx - 4; x += 4) {
525 SkASSERT(prev_x >= 0);
526 SkASSERT(next_x <= sw);
527
528 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
529 *dst++ = SkToU8(tmp * scale >> 24);
530 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
531 *dst++ = SkToU8(tmp * scale >> 24);
532 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
533 *dst++ = SkToU8(tmp * scale >> 24);
534 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
535 *dst++ = SkToU8(tmp * scale >> 24);
536
537 prev_x += 4;
538 next_x += 4;
539 }
540#endif
541
tomhudson@google.com8caac642011-11-22 15:58:06 +0000542 for (; x < dw - 2*rx; x++) {
543 SkASSERT(prev_x >= 0);
544 SkASSERT(next_x <= sw);
545
tomhudson@google.com01224d52011-11-28 18:22:01 +0000546 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000547 *dst++ = SkToU8(tmp * scale >> 24);
548
549 prev_x += 1;
550 next_x += 1;
551 }
552
553 for (; x < dw; x++) {
554 SkASSERT(prev_x >= 0);
555 SkASSERT(next_x > sw);
556
557 int px = prev_x;
558 int nx = sw;
559
560 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
561 *dst++ = SkToU8(tmp * scale >> 24);
562
563 prev_x += 1;
564 next_x += 1;
565 }
566
reed@android.com8a1c16f2008-12-17 15:59:43 +0000567 prev_y += 1;
568 next_y += 1;
569 }
570}
571
reed@google.com03016a32011-08-12 14:59:59 +0000572/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000573 * This is the path for apply_kernel_interp() to be taken when the kernel
574 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000575 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000576static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
reed@android.com45607672009-09-21 00:27:08 +0000577 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000578 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000579
580 int inner_weight = 255 - outer_weight;
581
582 // round these guys up if they're bigger than 127
583 outer_weight += outer_weight >> 7;
584 inner_weight += inner_weight >> 7;
585
586 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
587 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
588
reed@android.com45607672009-09-21 00:27:08 +0000589 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000590
591 int dw = sw + 2*rx;
592 int dh = sh + 2*ry;
593
reed@android.com45607672009-09-21 00:27:08 +0000594 int prev_y = -2*ry;
595 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000596
reed@android.com45607672009-09-21 00:27:08 +0000597 for (int y = 0; y < dh; y++) {
598 int py = SkClampPos(prev_y) * sumStride;
599 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000600
reed@android.com45607672009-09-21 00:27:08 +0000601 int ipy = SkClampPos(prev_y + 1) * sumStride;
602 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000603
reed@android.com45607672009-09-21 00:27:08 +0000604 int prev_x = -2*rx;
605 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000606
reed@android.com45607672009-09-21 00:27:08 +0000607 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000608 int px = SkClampPos(prev_x);
609 int nx = SkFastMin32(next_x, sw);
610
611 int ipx = SkClampPos(prev_x + 1);
612 int inx = SkClampMax(next_x - 1, sw);
613
tomhudson@google.com8caac642011-11-22 15:58:06 +0000614 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
615 - sum[nx+py] - sum[px+ny];
616 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
617 - sum[inx+ipy] - sum[ipx+iny];
618 *dst++ = SkToU8((outer_sum * outer_scale
619 + inner_sum * inner_scale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000620
621 prev_x += 1;
622 next_x += 1;
623 }
624 prev_y += 1;
625 next_y += 1;
626 }
627}
628
tomhudson@google.com8caac642011-11-22 15:58:06 +0000629/**
630 * sw and sh are the width and height of the src. Since the sum buffer
631 * matches that, but has an extra row and col at the beginning (with zeros),
632 * we can just use sw and sh as our "max" values for pinning coordinates
633 * when sampling into sum[][]
634 *
635 * The inner loop is conceptually simple; we break it into several variants
636 * to improve performance. Here's the original version:
637 for (int x = 0; x < dw; x++) {
638 int px = SkClampPos(prev_x);
639 int nx = SkFastMin32(next_x, sw);
640
641 int ipx = SkClampPos(prev_x + 1);
642 int inx = SkClampMax(next_x - 1, sw);
643
644 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
645 - sum[nx+py] - sum[px+ny];
646 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
647 - sum[inx+ipy] - sum[ipx+iny];
648 *dst++ = SkToU8((outer_sum * outer_scale
649 + inner_sum * inner_scale) >> 24);
650
651 prev_x += 1;
652 next_x += 1;
653 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000654 * The sections are:
655 * left-hand section, where prev_x is clamped to 0
656 * center section, where neither prev_x nor next_x is clamped
657 * right-hand section, where next_x is clamped to sw
658 * On some operating systems, the center section is unrolled for additional
659 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000660*/
661static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
662 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
663 SkASSERT(rx > 0 && ry > 0);
664 SkASSERT(outer_weight <= 255);
665
666 if (2*rx > sw) {
667 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
668 return;
669 }
670
671 int inner_weight = 255 - outer_weight;
672
673 // round these guys up if they're bigger than 127
674 outer_weight += outer_weight >> 7;
675 inner_weight += inner_weight >> 7;
676
677 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
678 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
679
680 int sumStride = sw + 1;
681
682 int dw = sw + 2*rx;
683 int dh = sh + 2*ry;
684
685 int prev_y = -2*ry;
686 int next_y = 1;
687
688 SkASSERT(2*rx <= dw - 2*rx);
689
690 for (int y = 0; y < dh; y++) {
691 int py = SkClampPos(prev_y) * sumStride;
692 int ny = SkFastMin32(next_y, sh) * sumStride;
693
694 int ipy = SkClampPos(prev_y + 1) * sumStride;
695 int iny = SkClampMax(next_y - 1, sh) * sumStride;
696
697 int prev_x = -2*rx;
698 int next_x = 1;
699 int x = 0;
700
701 for (; x < 2*rx; x++) {
702 SkASSERT(prev_x < 0);
703 SkASSERT(next_x <= sw);
704
705 int px = 0;
706 int nx = next_x;
707
708 int ipx = 0;
709 int inx = next_x - 1;
710
711 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
712 - sum[nx+py] - sum[px+ny];
713 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
714 - sum[inx+ipy] - sum[ipx+iny];
715 *dst++ = SkToU8((outer_sum * outer_scale
716 + inner_sum * inner_scale) >> 24);
717
718 prev_x += 1;
719 next_x += 1;
720 }
721
tomhudson@google.com01224d52011-11-28 18:22:01 +0000722 int i0 = prev_x + py;
723 int i1 = next_x + ny;
724 int i2 = next_x + py;
725 int i3 = prev_x + ny;
726 int i4 = prev_x + 1 + ipy;
727 int i5 = next_x - 1 + iny;
728 int i6 = next_x - 1 + ipy;
729 int i7 = prev_x + 1 + iny;
730
731#if UNROLL_KERNEL_LOOP
732 for (; x < dw - 2*rx - 4; x += 4) {
733 SkASSERT(prev_x >= 0);
734 SkASSERT(next_x <= sw);
735
736 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
737 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
738 *dst++ = SkToU8((outer_sum * outer_scale
739 + inner_sum * inner_scale) >> 24);
740 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
741 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
742 *dst++ = SkToU8((outer_sum * outer_scale
743 + inner_sum * inner_scale) >> 24);
744 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
745 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
746 *dst++ = SkToU8((outer_sum * outer_scale
747 + inner_sum * inner_scale) >> 24);
748 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
749 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
750 *dst++ = SkToU8((outer_sum * outer_scale
751 + inner_sum * inner_scale) >> 24);
752
753 prev_x += 4;
754 next_x += 4;
755 }
756#endif
757
tomhudson@google.com8caac642011-11-22 15:58:06 +0000758 for (; x < dw - 2*rx; x++) {
759 SkASSERT(prev_x >= 0);
760 SkASSERT(next_x <= sw);
761
tomhudson@google.com01224d52011-11-28 18:22:01 +0000762 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
763 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000764 *dst++ = SkToU8((outer_sum * outer_scale
765 + inner_sum * inner_scale) >> 24);
766
767 prev_x += 1;
768 next_x += 1;
769 }
770
771 for (; x < dw; x++) {
772 SkASSERT(prev_x >= 0);
773 SkASSERT(next_x > sw);
774
775 int px = prev_x;
776 int nx = sw;
777
778 int ipx = prev_x + 1;
779 int inx = sw;
780
781 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
782 - sum[nx+py] - sum[px+ny];
783 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
784 - sum[inx+ipy] - sum[ipx+iny];
785 *dst++ = SkToU8((outer_sum * outer_scale
786 + inner_sum * inner_scale) >> 24);
787
788 prev_x += 1;
789 next_x += 1;
790 }
791
792 prev_y += 1;
793 next_y += 1;
794 }
795}
796
reed@android.com8a1c16f2008-12-17 15:59:43 +0000797#include "SkColorPriv.h"
798
reed@android.com0e3c6642009-09-18 13:41:56 +0000799static void merge_src_with_blur(uint8_t dst[], int dstRB,
800 const uint8_t src[], int srcRB,
801 const uint8_t blur[], int blurRB,
802 int sw, int sh) {
803 dstRB -= sw;
804 srcRB -= sw;
805 blurRB -= sw;
806 while (--sh >= 0) {
807 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000808 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
809 dst += 1;
810 src += 1;
811 blur += 1;
812 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000813 dst += dstRB;
814 src += srcRB;
815 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000816 }
817}
818
819static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000820 const uint8_t src[], int srcRowBytes,
821 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000822 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000823 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000824 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000825 switch (style) {
826 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000827 for (x = sw - 1; x >= 0; --x) {
828 int s = *src;
829 int d = *dst;
830 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000831 dst += 1;
832 src += 1;
833 }
834 break;
835 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000836 for (x = sw - 1; x >= 0; --x) {
837 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000838 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000839 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000840 dst += 1;
841 src += 1;
842 }
843 break;
844 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000845 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000846 break;
847 }
848 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000849 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000850 }
851}
852
reed@google.com03016a32011-08-12 14:59:59 +0000853///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000854
855// we use a local funciton to wrap the class static method to work around
856// a bug in gcc98
857void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000858void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000859 SkMask::FreeImage(image);
860}
861
862bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000863 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000864 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000865{
reed@google.com03016a32011-08-12 14:59:59 +0000866 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000867 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000868 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000869
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000870 // Force high quality off for small radii (performance)
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000871 if (radius < SkIntToScalar(3)) {
872 quality = kLow_Quality;
873 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000874
875 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000876 int passCount = (kHigh_Quality == quality) ? 3 : 1;
humper@google.com7c7292c2013-01-04 20:29:03 +0000877 SkScalar passRadius = (kHigh_Quality == quality) ? SkScalarMul( radius, kBlurRadiusFudgeFactor): radius;
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000878
879 int rx = SkScalarCeil(passRadius);
880 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000881
882 SkASSERT(rx >= 0);
883 SkASSERT((unsigned)outer_weight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000884 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000885 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000886 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000887
888 int ry = rx; // only do square blur for now
889
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000890 int padx = passCount * rx;
891 int pady = passCount * ry;
bungeman@google.com5af16f82011-09-02 15:06:44 +0000892 if (margin) {
893 margin->set(padx, pady);
894 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000895 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
896 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
reed@android.com49f0ff22009-03-19 21:52:42 +0000897 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000898 dst->fFormat = SkMask::kA8_Format;
899 dst->fImage = NULL;
900
reed@android.com0e3c6642009-09-18 13:41:56 +0000901 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +0000902 size_t dstSize = dst->computeImageSize();
903 if (0 == dstSize) {
904 return false; // too big to allocate, abort
905 }
906
reed@android.com8a1c16f2008-12-17 15:59:43 +0000907 int sw = src.fBounds.width();
908 int sh = src.fBounds.height();
909 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +0000910 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000911
912 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
913
914 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000915 if (separable) {
916 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
917 uint8_t* tp = tmpBuffer.get();
918 int w = sw, h = sh;
919
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000920 if (outer_weight == 255) {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000921 int loRadius, hiRadius;
922 get_adjusted_radii(passRadius, &loRadius, &hiRadius);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000923 if (kHigh_Quality == quality) {
924 // Do three X blurs, with a transpose on the final one.
925 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
926 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
927 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
928 // Do three Y blurs, with a transpose on the final one.
929 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
930 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
931 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
932 } else {
933 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
934 h = boxBlur(tp, h, dp, ry, ry, h, w, true);
935 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000936 } else {
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000937 if (kHigh_Quality == quality) {
938 // Do three X blurs, with a transpose on the final one.
939 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outer_weight);
940 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outer_weight);
941 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outer_weight);
942 // Do three Y blurs, with a transpose on the final one.
943 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outer_weight);
944 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outer_weight);
945 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);
946 } else {
947 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outer_weight);
948 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);
949 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000950 }
951 } else {
reed@google.com03016a32011-08-12 14:59:59 +0000952 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
953 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
954 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000955 uint32_t* sumBuffer = storage.get();
956
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000957 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +0000958 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
reed@google.com03016a32011-08-12 14:59:59 +0000959 if (outer_weight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000960 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +0000961 } else {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000962 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
reed@google.com03016a32011-08-12 14:59:59 +0000963 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000964
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000965 if (kHigh_Quality == quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000966 //pass2: dp is source, tmpBuffer is destination
967 int tmp_sw = sw + 2 * rx;
968 int tmp_sh = sh + 2 * ry;
969 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
970 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
971 if (outer_weight == 255)
972 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
973 else
reed@google.com03016a32011-08-12 14:59:59 +0000974 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
975 tmp_sw, tmp_sh, outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000976
977 //pass3: tmpBuffer is source, dp is destination
978 tmp_sw += 2 * rx;
979 tmp_sh += 2 * ry;
980 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
981 if (outer_weight == 255)
982 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
983 else
reed@google.com03016a32011-08-12 14:59:59 +0000984 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
985 outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000986 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000987 }
988
989 dst->fImage = dp;
990 // if need be, alloc the "real" dst (same size as src) and copy/merge
991 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +0000992 if (style == kInner_Style) {
993 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +0000994 size_t srcSize = src.computeImageSize();
995 if (0 == srcSize) {
996 return false; // too big to allocate, abort
997 }
998 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +0000999 merge_src_with_blur(dst->fImage, src.fRowBytes,
1000 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +00001001 dp + passCount * (rx + ry * dst->fRowBytes),
1002 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001003 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +00001004 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +00001005 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
1006 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001007 }
1008 (void)autoCall.detach();
1009 }
1010
reed@android.com0e3c6642009-09-18 13:41:56 +00001011 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001012 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +00001013 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +00001014 }
1015
reed@android.com8a1c16f2008-12-17 15:59:43 +00001016 return true;
1017}
1018
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001019bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
1020 SkScalar radius, Style style, Quality quality,
1021 SkIPoint* margin)
1022{
1023 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
1024}
1025
1026bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
1027 SkScalar radius, Style style, Quality quality,
1028 SkIPoint* margin)
1029{
1030 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
1031}
humper@google.com7c7292c2013-01-04 20:29:03 +00001032
1033/* Convolving a box with itself three times results in a piecewise
1034 quadratic function:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001035
humper@google.com7c7292c2013-01-04 20:29:03 +00001036 0 x <= -1.5
1037 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= 1.5
1038 3/4 - x^2 -.5 < x <= .5
1039 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5
1040 0 1.5 < x
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001041
humper@google.com7c7292c2013-01-04 20:29:03 +00001042 To get the profile curve of the blurred step function at the rectangle
1043 edge, we evaluate the indefinite integral, which is piecewise cubic:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001044
humper@google.com7c7292c2013-01-04 20:29:03 +00001045 0 x <= -1.5
1046 5/8 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5
1047 1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5
1048 3/8 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5
1049 1 1.5 < x
1050*/
1051
1052static float gaussian_integral( float x ) {
1053 if ( x > 1.5f ) {
1054 return 0.0f;
1055 }
1056 if ( x < -1.5f ) {
1057 return 1.0f;
1058 }
1059
1060 float x2 = x*x;
1061 float x3 = x2*x;
1062
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001063 if ( x > 0.5f ) {
1064 return 0.5625f - ( x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001065 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001066 if ( x > -0.5f ) {
1067 return 0.5f - (0.75f * x - x3 / 3.0f);
humper@google.com7c7292c2013-01-04 20:29:03 +00001068 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001069 return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001070}
1071
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001072/*
humper@google.com7c7292c2013-01-04 20:29:03 +00001073 compute_profile allocates and fills in an array of floating
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001074 point values between 0 and 255 for the profile signature of
humper@google.com7c7292c2013-01-04 20:29:03 +00001075 a blurred half-plane with the given blur radius. Since we're
1076 going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
1077 all the time, we actually fill in the profile pre-inverted
1078 (already done 255-x).
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001079
humper@google.com7c7292c2013-01-04 20:29:03 +00001080 The function returns the size of the array allocated for the
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001081 profile. It's the responsibility of the caller to delete the
humper@google.com7c7292c2013-01-04 20:29:03 +00001082 memory returned in profile_out.
1083*/
1084
1085static int compute_profile( SkScalar radius, unsigned int **profile_out ) {
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001086 int size = SkScalarFloorToInt(radius * 3 + 1);
humper@google.com7c7292c2013-01-04 20:29:03 +00001087 int center = size >> 1;
1088
1089 unsigned int *profile = new unsigned int [size];
1090
1091 float invr = 1.0f/radius;
1092
1093 profile[0] = 255;
1094 for (int x = 1 ; x < size ; x++) {
1095 float scaled_x = ( center - x ) * invr;
1096 float gi = gaussian_integral( scaled_x );
1097 profile[x] = 255 - (uint8_t) ( 255.f * gi );
1098 }
1099
1100 *profile_out = profile;
1101 return size;
1102}
1103
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001104// TODO MAYBE: Maintain a profile cache to avoid recomputing this for
humper@google.com7c7292c2013-01-04 20:29:03 +00001105// commonly used radii. Consider baking some of the most common blur radii
1106// directly in as static data?
1107
1108// Implementation adapted from Michael Herf's approach:
1109// http://stereopsis.com/shadowrect/
1110
1111bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src,
1112 SkScalar provided_radius, Style style, Quality quality,
1113 SkIPoint *margin) {
1114 int profile_size;
1115 unsigned int *profile;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001116
1117
humper@google.com7c7292c2013-01-04 20:29:03 +00001118 float radius = SkScalarToFloat( SkScalarMul( provided_radius, kBlurRadiusFudgeFactor ) );
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001119
humper@google.com7c7292c2013-01-04 20:29:03 +00001120 profile_size = compute_profile( radius, &profile );
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001121
humper@google.com7c7292c2013-01-04 20:29:03 +00001122 int pad = (int) (radius * 1.5f + 1);
1123 if (margin) {
1124 margin->set( pad, pad );
1125 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001126 dst->fBounds = SkIRect::MakeWH(SkScalarFloorToInt(src.width()), SkScalarFloorToInt(src.height()));
humper@google.com7c7292c2013-01-04 20:29:03 +00001127 dst->fBounds.outset(pad, pad);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001128
humper@google.com7c7292c2013-01-04 20:29:03 +00001129 dst->fRowBytes = dst->fBounds.width();
1130 dst->fFormat = SkMask::kA8_Format;
1131 dst->fImage = NULL;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001132
humper@google.com7c7292c2013-01-04 20:29:03 +00001133 size_t dstSize = dst->computeImageSize();
1134 if (0 == dstSize) {
1135 return false; // too big to allocate, abort
1136 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001137
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001138 int sw = SkScalarFloorToInt(src.width());
1139 int sh = SkScalarFloorToInt(src.height());
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001140
humper@google.com7c7292c2013-01-04 20:29:03 +00001141 uint8_t* dp = SkMask::AllocImage(dstSize);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001142
humper@google.com7c7292c2013-01-04 20:29:03 +00001143 dst->fImage = dp;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001144
humper@google.com7c7292c2013-01-04 20:29:03 +00001145 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001146
humper@google.com7c7292c2013-01-04 20:29:03 +00001147 int dst_height = dst->fBounds.height();
1148 int dst_width = dst->fBounds.width();
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001149
humper@google.com7c7292c2013-01-04 20:29:03 +00001150 // nearest odd number less than the profile size represents the center
1151 // of the (2x scaled) profile
1152 int center = ( profile_size & ~1 ) - 1;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001153
humper@google.com7c7292c2013-01-04 20:29:03 +00001154 int w = sw - center;
1155 int h = sh - center;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001156
humper@google.com7c7292c2013-01-04 20:29:03 +00001157 uint8_t *outptr = dp;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001158
humper@google.com7c7292c2013-01-04 20:29:03 +00001159 for (int y = 0 ; y < dst_height ; y++)
1160 {
1161 // time to fill in a scanline of the blurry rectangle.
1162 // to avoid floating point math, everything is multiplied by
1163 // 2 where needed. This keeps things nice and integer-oriented.
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001164
humper@google.com7c7292c2013-01-04 20:29:03 +00001165 int dy = abs((y << 1) - dst_height) - h; // how far are we from the original edge?
1166 int oy = dy >> 1;
1167 if (oy < 0) oy = 0;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001168
humper@google.com7c7292c2013-01-04 20:29:03 +00001169 unsigned int profile_y = profile[oy];
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001170
humper@google.com7c7292c2013-01-04 20:29:03 +00001171 for (int x = 0 ; x < (dst_width << 1) ; x += 2) {
1172 int dx = abs( x - dst_width ) - w;
1173 int ox = dx >> 1;
1174 if (ox < 0) ox = 0;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001175
humper@google.com7c7292c2013-01-04 20:29:03 +00001176 unsigned int maskval = SkMulDiv255Round(profile[ox], profile_y);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001177
humper@google.com7c7292c2013-01-04 20:29:03 +00001178 *(outptr++) = maskval;
1179 }
1180 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001181
humper@google.com7c7292c2013-01-04 20:29:03 +00001182 return true;
1183}