blob: 658b0fd47ec9402b2073771e63d77ff4dcac358a [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000015#define UNROLL_SEPARABLE_LOOPS
16
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000017/**
18 * This function performs a box blur in X, of the given radius. If the
skia.committer@gmail.com884e60b2012-11-16 02:01:17 +000019 * "transpose" parameter is true, it will transpose the pixels on write,
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000020 * such that X and Y are swapped. Reads are always performed from contiguous
21 * memory in X, for speed. The destination buffer (dst) must be at least
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000022 * (width + leftRadius + rightRadius) * height bytes in size.
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000023 */
24static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000025 int leftRadius, int rightRadius, int width, int height,
26 bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000027{
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000028 int diameter = leftRadius + rightRadius;
29 int kernelSize = diameter + 1;
30 int border = SkMin32(width, diameter);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000031 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000032 int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000033 int dst_x_stride = transpose ? height : 1;
34 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000035 for (int y = 0; y < height; ++y) {
36 int sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000037 uint8_t* dptr = dst + y * dst_y_stride;
38 const uint8_t* right = src + y * src_y_stride;
39 const uint8_t* left = right;
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +000040 for (int x = 0; x < rightRadius - leftRadius; x++) {
41 *dptr = 0;
42 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000043 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000044#define LEFT_BORDER_ITER \
45 sum += *right++; \
46 *dptr = (sum * scale) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000047 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000048
49 int x = 0;
50#ifdef UNROLL_SEPARABLE_LOOPS
51 for (; x < border - 16; x += 16) {
52 LEFT_BORDER_ITER
53 LEFT_BORDER_ITER
54 LEFT_BORDER_ITER
55 LEFT_BORDER_ITER
56 LEFT_BORDER_ITER
57 LEFT_BORDER_ITER
58 LEFT_BORDER_ITER
59 LEFT_BORDER_ITER
60 LEFT_BORDER_ITER
61 LEFT_BORDER_ITER
62 LEFT_BORDER_ITER
63 LEFT_BORDER_ITER
64 LEFT_BORDER_ITER
65 LEFT_BORDER_ITER
66 LEFT_BORDER_ITER
67 LEFT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000068 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000069#endif
70 for (; x < border; ++x) {
71 LEFT_BORDER_ITER
72 }
73#undef LEFT_BORDER_ITER
74#define TRIVIAL_ITER \
75 *dptr = (sum * scale) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000076 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000077 x = width;
78#ifdef UNROLL_SEPARABLE_LOOPS
79 for (; x < diameter - 16; x += 16) {
80 TRIVIAL_ITER
81 TRIVIAL_ITER
82 TRIVIAL_ITER
83 TRIVIAL_ITER
84 TRIVIAL_ITER
85 TRIVIAL_ITER
86 TRIVIAL_ITER
87 TRIVIAL_ITER
88 TRIVIAL_ITER
89 TRIVIAL_ITER
90 TRIVIAL_ITER
91 TRIVIAL_ITER
92 TRIVIAL_ITER
93 TRIVIAL_ITER
94 TRIVIAL_ITER
95 TRIVIAL_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000096 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000097#endif
98 for (; x < diameter; ++x) {
99 TRIVIAL_ITER
100 }
101#undef TRIVIAL_ITER
102#define CENTER_ITER \
103 sum += *right++; \
104 *dptr = (sum * scale) >> 24; \
105 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000106 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000107
108 x = diameter;
109#ifdef UNROLL_SEPARABLE_LOOPS
110 for (; x < width - 16; x += 16) {
111 CENTER_ITER
112 CENTER_ITER
113 CENTER_ITER
114 CENTER_ITER
115 CENTER_ITER
116 CENTER_ITER
117 CENTER_ITER
118 CENTER_ITER
119 CENTER_ITER
120 CENTER_ITER
121 CENTER_ITER
122 CENTER_ITER
123 CENTER_ITER
124 CENTER_ITER
125 CENTER_ITER
126 CENTER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000127 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000128#endif
129 for (; x < width; ++x) {
130 CENTER_ITER
131 }
132#undef CENTER_ITER
133#define RIGHT_BORDER_ITER \
134 *dptr = (sum * scale) >> 24; \
135 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000136 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000137
138 x = 0;
139#ifdef UNROLL_SEPARABLE_LOOPS
140 for (; x < border - 16; x += 16) {
141 RIGHT_BORDER_ITER
142 RIGHT_BORDER_ITER
143 RIGHT_BORDER_ITER
144 RIGHT_BORDER_ITER
145 RIGHT_BORDER_ITER
146 RIGHT_BORDER_ITER
147 RIGHT_BORDER_ITER
148 RIGHT_BORDER_ITER
149 RIGHT_BORDER_ITER
150 RIGHT_BORDER_ITER
151 RIGHT_BORDER_ITER
152 RIGHT_BORDER_ITER
153 RIGHT_BORDER_ITER
154 RIGHT_BORDER_ITER
155 RIGHT_BORDER_ITER
156 RIGHT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000157 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000158#endif
159 for (; x < border; ++x) {
160 RIGHT_BORDER_ITER
161 }
162#undef RIGHT_BORDER_ITER
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +0000163 for (int x = 0; x < leftRadius - rightRadius; x++) {
164 *dptr = 0;
165 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000166 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000167 SkASSERT(sum == 0);
168 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000169 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000170}
171
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000172/**
173 * This variant of the box blur handles blurring of non-integer radii. It
174 * keeps two running sums: an outer sum for the rounded-up kernel radius, and
175 * an inner sum for the rounded-down kernel radius. For each pixel, it linearly
176 * interpolates between them. In float this would be:
177 * outer_weight * outer_sum / kernelSize +
178 * (1.0 - outer_weight) * innerSum / (kernelSize - 2)
179 */
180static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
181 int radius, int width, int height,
182 bool transpose, uint8_t outer_weight)
183{
184 int diameter = radius * 2;
185 int kernelSize = diameter + 1;
186 int border = SkMin32(width, diameter);
187 int inner_weight = 255 - outer_weight;
188 outer_weight += outer_weight >> 7;
189 inner_weight += inner_weight >> 7;
190 uint32_t outer_scale = (outer_weight << 16) / kernelSize;
191 uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
192 int new_width = width + diameter;
193 int dst_x_stride = transpose ? height : 1;
194 int dst_y_stride = transpose ? 1 : new_width;
195 for (int y = 0; y < height; ++y) {
196 int outer_sum = 0, inner_sum = 0;
197 uint8_t* dptr = dst + y * dst_y_stride;
198 const uint8_t* right = src + y * src_y_stride;
199 const uint8_t* left = right;
200 int x = 0;
201
202#define LEFT_BORDER_ITER \
203 inner_sum = outer_sum; \
204 outer_sum += *right++; \
205 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
206 dptr += dst_x_stride;
207
208#ifdef UNROLL_SEPARABLE_LOOPS
209 for (;x < border - 16; x += 16) {
210 LEFT_BORDER_ITER
211 LEFT_BORDER_ITER
212 LEFT_BORDER_ITER
213 LEFT_BORDER_ITER
214 LEFT_BORDER_ITER
215 LEFT_BORDER_ITER
216 LEFT_BORDER_ITER
217 LEFT_BORDER_ITER
218 LEFT_BORDER_ITER
219 LEFT_BORDER_ITER
220 LEFT_BORDER_ITER
221 LEFT_BORDER_ITER
222 LEFT_BORDER_ITER
223 LEFT_BORDER_ITER
224 LEFT_BORDER_ITER
225 LEFT_BORDER_ITER
226 }
227#endif
228
229 for (;x < border; x++) {
230 LEFT_BORDER_ITER
231 }
232#undef LEFT_BORDER_ITER
233 for (int x = width; x < diameter; ++x) {
234 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24;
235 dptr += dst_x_stride;
236 }
237 x = diameter;
238
239#define CENTER_ITER \
240 inner_sum = outer_sum - *left; \
241 outer_sum += *right++; \
242 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
243 dptr += dst_x_stride; \
244 outer_sum -= *left++;
245
246#ifdef UNROLL_SEPARABLE_LOOPS
247 for (; x < width - 16; x += 16) {
248 CENTER_ITER
249 CENTER_ITER
250 CENTER_ITER
251 CENTER_ITER
252 CENTER_ITER
253 CENTER_ITER
254 CENTER_ITER
255 CENTER_ITER
256 CENTER_ITER
257 CENTER_ITER
258 CENTER_ITER
259 CENTER_ITER
260 CENTER_ITER
261 CENTER_ITER
262 CENTER_ITER
263 CENTER_ITER
264 }
265#endif
266 for (; x < width; ++x) {
267 CENTER_ITER
268 }
269#undef CENTER_ITER
270
271 #define RIGHT_BORDER_ITER \
272 inner_sum = outer_sum - *left++; \
273 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale) >> 24; \
274 dptr += dst_x_stride; \
275 outer_sum = inner_sum;
276
277 x = 0;
278#ifdef UNROLL_SEPARABLE_LOOPS
279 for (; x < border - 16; x += 16) {
280 RIGHT_BORDER_ITER
281 RIGHT_BORDER_ITER
282 RIGHT_BORDER_ITER
283 RIGHT_BORDER_ITER
284 RIGHT_BORDER_ITER
285 RIGHT_BORDER_ITER
286 RIGHT_BORDER_ITER
287 RIGHT_BORDER_ITER
288 RIGHT_BORDER_ITER
289 RIGHT_BORDER_ITER
290 RIGHT_BORDER_ITER
291 RIGHT_BORDER_ITER
292 RIGHT_BORDER_ITER
293 RIGHT_BORDER_ITER
294 RIGHT_BORDER_ITER
295 RIGHT_BORDER_ITER
296 }
297#endif
298 for (; x < border; x++) {
299 RIGHT_BORDER_ITER
300 }
301#undef RIGHT_BORDER_ITER
302 SkASSERT(outer_sum == 0 && inner_sum == 0);
303 }
304 return new_width;
305}
306
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000307static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
308{
309 *loRadius = *hiRadius = SkScalarCeil(passRadius);
310 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
311 *loRadius = *hiRadius - 1;
312 }
313}
314
tomhudson@google.com01224d52011-11-28 18:22:01 +0000315// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
316// breakeven on Mac, and ~15% slowdown on Linux.
317// Reading a word at a time when bulding the sum buffer seems to give
318// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +0000319#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +0000320#define UNROLL_KERNEL_LOOP 1
321#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +0000322
reed@android.com45607672009-09-21 00:27:08 +0000323/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
324 src values at their position, plus all values above and to the left.
325 When we sample into this buffer, we need an initial row and column of 0s,
326 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000327
reed@android.com45607672009-09-21 00:27:08 +0000328 src[i, j] == sum[i+1, j+1]
329 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000330
reed@android.com45607672009-09-21 00:27:08 +0000331 We assume that the sum buffer's stride == its width
332 */
reed@google.com03016a32011-08-12 14:59:59 +0000333static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
334 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +0000335 int sumW = srcW + 1;
336
337 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000338 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000339 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000340
341 int x, y;
342
reed@android.com45607672009-09-21 00:27:08 +0000343 // zero out the top row and column
344 memset(sum, 0, sumW * sizeof(sum[0]));
345 sum += sumW;
346
reed@android.com8a1c16f2008-12-17 15:59:43 +0000347 // special case first row
348 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000349 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000350 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000351 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000352 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000353 }
354 src += srcRB;
355
356 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000357 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000358 uint32_t L = 0;
359 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000360 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000361
362 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
363 uint32_t T = sum[-sumW];
364 X = *src++ + L + T - C;
365 *sum++ = X;
366 L = X;
367 C = T;
368 }
369
370 for (; x >= 4; x-=4) {
371 uint32_t T = sum[-sumW];
372 X = *src++ + L + T - C;
373 *sum++ = X;
374 L = X;
375 C = T;
376 T = sum[-sumW];
377 X = *src++ + L + T - C;
378 *sum++ = X;
379 L = X;
380 C = T;
381 T = sum[-sumW];
382 X = *src++ + L + T - C;
383 *sum++ = X;
384 L = X;
385 C = T;
386 T = sum[-sumW];
387 X = *src++ + L + T - C;
388 *sum++ = X;
389 L = X;
390 C = T;
391 }
392
393 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000394 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000395 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000396 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000397 L = X;
398 C = T;
399 }
400 src += srcRB;
401 }
402}
403
reed@google.com03016a32011-08-12 14:59:59 +0000404/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000405 * This is the path for apply_kernel() to be taken when the kernel
406 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000407 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000408static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
409 int sw, int sh) {
410 SkASSERT(2*rx > sw);
411
reed@android.com8a1c16f2008-12-17 15:59:43 +0000412 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
413
reed@android.com45607672009-09-21 00:27:08 +0000414 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000415
416 int dw = sw + 2*rx;
417 int dh = sh + 2*ry;
418
reed@android.com45607672009-09-21 00:27:08 +0000419 int prev_y = -2*ry;
420 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000421
reed@android.com45607672009-09-21 00:27:08 +0000422 for (int y = 0; y < dh; y++) {
423 int py = SkClampPos(prev_y) * sumStride;
424 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000425
reed@android.com45607672009-09-21 00:27:08 +0000426 int prev_x = -2*rx;
427 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000428
reed@android.com45607672009-09-21 00:27:08 +0000429 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000430 int px = SkClampPos(prev_x);
431 int nx = SkFastMin32(next_x, sw);
432
reed@android.com45607672009-09-21 00:27:08 +0000433 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
434 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000435
436 prev_x += 1;
437 next_x += 1;
438 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000439
440 prev_y += 1;
441 next_y += 1;
442 }
443}
444/**
445 * sw and sh are the width and height of the src. Since the sum buffer
446 * matches that, but has an extra row and col at the beginning (with zeros),
447 * we can just use sw and sh as our "max" values for pinning coordinates
448 * when sampling into sum[][]
449 *
450 * The inner loop is conceptually simple; we break it into several sections
451 * to improve performance. Here's the original version:
452 for (int x = 0; x < dw; x++) {
453 int px = SkClampPos(prev_x);
454 int nx = SkFastMin32(next_x, sw);
455
456 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
457 *dst++ = SkToU8(tmp * scale >> 24);
458
459 prev_x += 1;
460 next_x += 1;
461 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000462 * The sections are:
463 * left-hand section, where prev_x is clamped to 0
464 * center section, where neither prev_x nor next_x is clamped
465 * right-hand section, where next_x is clamped to sw
466 * On some operating systems, the center section is unrolled for additional
467 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000468*/
469static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
470 int sw, int sh) {
471 if (2*rx > sw) {
472 kernel_clamped(dst, rx, ry, sum, sw, sh);
473 return;
474 }
475
476 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
477
478 int sumStride = sw + 1;
479
480 int dw = sw + 2*rx;
481 int dh = sh + 2*ry;
482
483 int prev_y = -2*ry;
484 int next_y = 1;
485
486 SkASSERT(2*rx <= dw - 2*rx);
487
488 for (int y = 0; y < dh; y++) {
489 int py = SkClampPos(prev_y) * sumStride;
490 int ny = SkFastMin32(next_y, sh) * sumStride;
491
492 int prev_x = -2*rx;
493 int next_x = 1;
494 int x = 0;
495
496 for (; x < 2*rx; x++) {
497 SkASSERT(prev_x <= 0);
498 SkASSERT(next_x <= sw);
499
500 int px = 0;
501 int nx = next_x;
502
503 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
504 *dst++ = SkToU8(tmp * scale >> 24);
505
506 prev_x += 1;
507 next_x += 1;
508 }
509
tomhudson@google.com01224d52011-11-28 18:22:01 +0000510 int i0 = prev_x + py;
511 int i1 = next_x + ny;
512 int i2 = next_x + py;
513 int i3 = prev_x + ny;
514
515#if UNROLL_KERNEL_LOOP
516 for (; x < dw - 2*rx - 4; x += 4) {
517 SkASSERT(prev_x >= 0);
518 SkASSERT(next_x <= sw);
519
520 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
521 *dst++ = SkToU8(tmp * scale >> 24);
522 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
523 *dst++ = SkToU8(tmp * scale >> 24);
524 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
525 *dst++ = SkToU8(tmp * scale >> 24);
526 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
527 *dst++ = SkToU8(tmp * scale >> 24);
528
529 prev_x += 4;
530 next_x += 4;
531 }
532#endif
533
tomhudson@google.com8caac642011-11-22 15:58:06 +0000534 for (; x < dw - 2*rx; x++) {
535 SkASSERT(prev_x >= 0);
536 SkASSERT(next_x <= sw);
537
tomhudson@google.com01224d52011-11-28 18:22:01 +0000538 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000539 *dst++ = SkToU8(tmp * scale >> 24);
540
541 prev_x += 1;
542 next_x += 1;
543 }
544
545 for (; x < dw; x++) {
546 SkASSERT(prev_x >= 0);
547 SkASSERT(next_x > sw);
548
549 int px = prev_x;
550 int nx = sw;
551
552 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
553 *dst++ = SkToU8(tmp * scale >> 24);
554
555 prev_x += 1;
556 next_x += 1;
557 }
558
reed@android.com8a1c16f2008-12-17 15:59:43 +0000559 prev_y += 1;
560 next_y += 1;
561 }
562}
563
reed@google.com03016a32011-08-12 14:59:59 +0000564/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000565 * This is the path for apply_kernel_interp() to be taken when the kernel
566 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000567 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000568static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
reed@android.com45607672009-09-21 00:27:08 +0000569 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000570 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000571
572 int inner_weight = 255 - outer_weight;
573
574 // round these guys up if they're bigger than 127
575 outer_weight += outer_weight >> 7;
576 inner_weight += inner_weight >> 7;
577
578 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
579 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
580
reed@android.com45607672009-09-21 00:27:08 +0000581 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000582
583 int dw = sw + 2*rx;
584 int dh = sh + 2*ry;
585
reed@android.com45607672009-09-21 00:27:08 +0000586 int prev_y = -2*ry;
587 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000588
reed@android.com45607672009-09-21 00:27:08 +0000589 for (int y = 0; y < dh; y++) {
590 int py = SkClampPos(prev_y) * sumStride;
591 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000592
reed@android.com45607672009-09-21 00:27:08 +0000593 int ipy = SkClampPos(prev_y + 1) * sumStride;
594 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000595
reed@android.com45607672009-09-21 00:27:08 +0000596 int prev_x = -2*rx;
597 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000598
reed@android.com45607672009-09-21 00:27:08 +0000599 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000600 int px = SkClampPos(prev_x);
601 int nx = SkFastMin32(next_x, sw);
602
603 int ipx = SkClampPos(prev_x + 1);
604 int inx = SkClampMax(next_x - 1, sw);
605
tomhudson@google.com8caac642011-11-22 15:58:06 +0000606 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
607 - sum[nx+py] - sum[px+ny];
608 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
609 - sum[inx+ipy] - sum[ipx+iny];
610 *dst++ = SkToU8((outer_sum * outer_scale
611 + inner_sum * inner_scale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000612
613 prev_x += 1;
614 next_x += 1;
615 }
616 prev_y += 1;
617 next_y += 1;
618 }
619}
620
tomhudson@google.com8caac642011-11-22 15:58:06 +0000621/**
622 * sw and sh are the width and height of the src. Since the sum buffer
623 * matches that, but has an extra row and col at the beginning (with zeros),
624 * we can just use sw and sh as our "max" values for pinning coordinates
625 * when sampling into sum[][]
626 *
627 * The inner loop is conceptually simple; we break it into several variants
628 * to improve performance. Here's the original version:
629 for (int x = 0; x < dw; x++) {
630 int px = SkClampPos(prev_x);
631 int nx = SkFastMin32(next_x, sw);
632
633 int ipx = SkClampPos(prev_x + 1);
634 int inx = SkClampMax(next_x - 1, sw);
635
636 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
637 - sum[nx+py] - sum[px+ny];
638 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
639 - sum[inx+ipy] - sum[ipx+iny];
640 *dst++ = SkToU8((outer_sum * outer_scale
641 + inner_sum * inner_scale) >> 24);
642
643 prev_x += 1;
644 next_x += 1;
645 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000646 * The sections are:
647 * left-hand section, where prev_x is clamped to 0
648 * center section, where neither prev_x nor next_x is clamped
649 * right-hand section, where next_x is clamped to sw
650 * On some operating systems, the center section is unrolled for additional
651 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000652*/
653static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
654 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
655 SkASSERT(rx > 0 && ry > 0);
656 SkASSERT(outer_weight <= 255);
657
658 if (2*rx > sw) {
659 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
660 return;
661 }
662
663 int inner_weight = 255 - outer_weight;
664
665 // round these guys up if they're bigger than 127
666 outer_weight += outer_weight >> 7;
667 inner_weight += inner_weight >> 7;
668
669 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
670 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
671
672 int sumStride = sw + 1;
673
674 int dw = sw + 2*rx;
675 int dh = sh + 2*ry;
676
677 int prev_y = -2*ry;
678 int next_y = 1;
679
680 SkASSERT(2*rx <= dw - 2*rx);
681
682 for (int y = 0; y < dh; y++) {
683 int py = SkClampPos(prev_y) * sumStride;
684 int ny = SkFastMin32(next_y, sh) * sumStride;
685
686 int ipy = SkClampPos(prev_y + 1) * sumStride;
687 int iny = SkClampMax(next_y - 1, sh) * sumStride;
688
689 int prev_x = -2*rx;
690 int next_x = 1;
691 int x = 0;
692
693 for (; x < 2*rx; x++) {
694 SkASSERT(prev_x < 0);
695 SkASSERT(next_x <= sw);
696
697 int px = 0;
698 int nx = next_x;
699
700 int ipx = 0;
701 int inx = next_x - 1;
702
703 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
704 - sum[nx+py] - sum[px+ny];
705 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
706 - sum[inx+ipy] - sum[ipx+iny];
707 *dst++ = SkToU8((outer_sum * outer_scale
708 + inner_sum * inner_scale) >> 24);
709
710 prev_x += 1;
711 next_x += 1;
712 }
713
tomhudson@google.com01224d52011-11-28 18:22:01 +0000714 int i0 = prev_x + py;
715 int i1 = next_x + ny;
716 int i2 = next_x + py;
717 int i3 = prev_x + ny;
718 int i4 = prev_x + 1 + ipy;
719 int i5 = next_x - 1 + iny;
720 int i6 = next_x - 1 + ipy;
721 int i7 = prev_x + 1 + iny;
722
723#if UNROLL_KERNEL_LOOP
724 for (; x < dw - 2*rx - 4; x += 4) {
725 SkASSERT(prev_x >= 0);
726 SkASSERT(next_x <= sw);
727
728 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
729 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
730 *dst++ = SkToU8((outer_sum * outer_scale
731 + inner_sum * inner_scale) >> 24);
732 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
733 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
734 *dst++ = SkToU8((outer_sum * outer_scale
735 + inner_sum * inner_scale) >> 24);
736 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
737 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
738 *dst++ = SkToU8((outer_sum * outer_scale
739 + inner_sum * inner_scale) >> 24);
740 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
741 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
742 *dst++ = SkToU8((outer_sum * outer_scale
743 + inner_sum * inner_scale) >> 24);
744
745 prev_x += 4;
746 next_x += 4;
747 }
748#endif
749
tomhudson@google.com8caac642011-11-22 15:58:06 +0000750 for (; x < dw - 2*rx; x++) {
751 SkASSERT(prev_x >= 0);
752 SkASSERT(next_x <= sw);
753
tomhudson@google.com01224d52011-11-28 18:22:01 +0000754 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
755 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000756 *dst++ = SkToU8((outer_sum * outer_scale
757 + inner_sum * inner_scale) >> 24);
758
759 prev_x += 1;
760 next_x += 1;
761 }
762
763 for (; x < dw; x++) {
764 SkASSERT(prev_x >= 0);
765 SkASSERT(next_x > sw);
766
767 int px = prev_x;
768 int nx = sw;
769
770 int ipx = prev_x + 1;
771 int inx = sw;
772
773 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
774 - sum[nx+py] - sum[px+ny];
775 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
776 - sum[inx+ipy] - sum[ipx+iny];
777 *dst++ = SkToU8((outer_sum * outer_scale
778 + inner_sum * inner_scale) >> 24);
779
780 prev_x += 1;
781 next_x += 1;
782 }
783
784 prev_y += 1;
785 next_y += 1;
786 }
787}
788
reed@android.com8a1c16f2008-12-17 15:59:43 +0000789#include "SkColorPriv.h"
790
reed@android.com0e3c6642009-09-18 13:41:56 +0000791static void merge_src_with_blur(uint8_t dst[], int dstRB,
792 const uint8_t src[], int srcRB,
793 const uint8_t blur[], int blurRB,
794 int sw, int sh) {
795 dstRB -= sw;
796 srcRB -= sw;
797 blurRB -= sw;
798 while (--sh >= 0) {
799 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000800 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
801 dst += 1;
802 src += 1;
803 blur += 1;
804 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000805 dst += dstRB;
806 src += srcRB;
807 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000808 }
809}
810
811static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000812 const uint8_t src[], int srcRowBytes,
813 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000814 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000815 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000816 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000817 switch (style) {
818 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000819 for (x = sw - 1; x >= 0; --x) {
820 int s = *src;
821 int d = *dst;
822 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000823 dst += 1;
824 src += 1;
825 }
826 break;
827 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000828 for (x = sw - 1; x >= 0; --x) {
829 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000830 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000831 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000832 dst += 1;
833 src += 1;
834 }
835 break;
836 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000837 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000838 break;
839 }
840 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000841 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000842 }
843}
844
reed@google.com03016a32011-08-12 14:59:59 +0000845///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000846
847// we use a local funciton to wrap the class static method to work around
848// a bug in gcc98
849void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000850void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000851 SkMask::FreeImage(image);
852}
853
854bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000855 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000856 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000857{
reed@google.com03016a32011-08-12 14:59:59 +0000858 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000859 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000860 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000861
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000862 // Force high quality off for small radii (performance)
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000863 if (radius < SkIntToScalar(3)) {
864 quality = kLow_Quality;
865 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000866
867 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000868 int passCount = (kHigh_Quality == quality) ? 3 : 1;
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000869 SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
870
871 int rx = SkScalarCeil(passRadius);
872 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000873
874 SkASSERT(rx >= 0);
875 SkASSERT((unsigned)outer_weight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000876 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000877 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000878 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000879
880 int ry = rx; // only do square blur for now
881
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000882 int padx = passCount * rx;
883 int pady = passCount * ry;
bungeman@google.com5af16f82011-09-02 15:06:44 +0000884 if (margin) {
885 margin->set(padx, pady);
886 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000887 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
888 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
reed@android.com49f0ff22009-03-19 21:52:42 +0000889 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000890 dst->fFormat = SkMask::kA8_Format;
891 dst->fImage = NULL;
892
reed@android.com0e3c6642009-09-18 13:41:56 +0000893 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +0000894 size_t dstSize = dst->computeImageSize();
895 if (0 == dstSize) {
896 return false; // too big to allocate, abort
897 }
898
reed@android.com8a1c16f2008-12-17 15:59:43 +0000899 int sw = src.fBounds.width();
900 int sh = src.fBounds.height();
901 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +0000902 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000903
904 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
905
906 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000907 if (separable) {
908 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
909 uint8_t* tp = tmpBuffer.get();
910 int w = sw, h = sh;
911
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000912 if (outer_weight == 255) {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000913 int loRadius, hiRadius;
914 get_adjusted_radii(passRadius, &loRadius, &hiRadius);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000915 if (kHigh_Quality == quality) {
916 // Do three X blurs, with a transpose on the final one.
917 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
918 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
919 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
920 // Do three Y blurs, with a transpose on the final one.
921 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
922 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
923 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
924 } else {
925 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
926 h = boxBlur(tp, h, dp, ry, ry, h, w, true);
927 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000928 } else {
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000929 if (kHigh_Quality == quality) {
930 // Do three X blurs, with a transpose on the final one.
931 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outer_weight);
932 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outer_weight);
933 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outer_weight);
934 // Do three Y blurs, with a transpose on the final one.
935 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outer_weight);
936 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outer_weight);
937 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);
938 } else {
939 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outer_weight);
940 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);
941 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000942 }
943 } else {
reed@google.com03016a32011-08-12 14:59:59 +0000944 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
945 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
946 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000947 uint32_t* sumBuffer = storage.get();
948
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000949 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +0000950 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
reed@google.com03016a32011-08-12 14:59:59 +0000951 if (outer_weight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000952 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +0000953 } else {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000954 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
reed@google.com03016a32011-08-12 14:59:59 +0000955 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000956
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000957 if (kHigh_Quality == quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000958 //pass2: dp is source, tmpBuffer is destination
959 int tmp_sw = sw + 2 * rx;
960 int tmp_sh = sh + 2 * ry;
961 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
962 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
963 if (outer_weight == 255)
964 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
965 else
reed@google.com03016a32011-08-12 14:59:59 +0000966 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
967 tmp_sw, tmp_sh, outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000968
969 //pass3: tmpBuffer is source, dp is destination
970 tmp_sw += 2 * rx;
971 tmp_sh += 2 * ry;
972 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
973 if (outer_weight == 255)
974 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
975 else
reed@google.com03016a32011-08-12 14:59:59 +0000976 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
977 outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000978 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000979 }
980
981 dst->fImage = dp;
982 // if need be, alloc the "real" dst (same size as src) and copy/merge
983 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +0000984 if (style == kInner_Style) {
985 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +0000986 size_t srcSize = src.computeImageSize();
987 if (0 == srcSize) {
988 return false; // too big to allocate, abort
989 }
990 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +0000991 merge_src_with_blur(dst->fImage, src.fRowBytes,
992 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +0000993 dp + passCount * (rx + ry * dst->fRowBytes),
994 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000995 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +0000996 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +0000997 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
998 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000999 }
1000 (void)autoCall.detach();
1001 }
1002
reed@android.com0e3c6642009-09-18 13:41:56 +00001003 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001004 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +00001005 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +00001006 }
1007
reed@android.com8a1c16f2008-12-17 15:59:43 +00001008 return true;
1009}
1010
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001011bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
1012 SkScalar radius, Style style, Quality quality,
1013 SkIPoint* margin)
1014{
1015 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
1016}
1017
1018bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
1019 SkScalar radius, Style style, Quality quality,
1020 SkIPoint* margin)
1021{
1022 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
1023}