blob: c97916bbb0ff323ac0fbc75153d7c8121fed325c [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000015// scale factor for the blur radius to match the behavior of the all existing blur
humper@google.com7c7292c2013-01-04 20:29:03 +000016// code (both on the CPU and the GPU). This magic constant is 1/sqrt(3).
17
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000018// TODO: get rid of this fudge factor and move any required fudging up into
humper@google.com7c7292c2013-01-04 20:29:03 +000019// the calling library
20
21#define kBlurRadiusFudgeFactor SkFloatToScalar( .57735f )
22
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000023#define UNROLL_SEPARABLE_LOOPS
24
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000025#define SK_DISABLE_BLUR_ROUNDING
26
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000027/**
28 * This function performs a box blur in X, of the given radius. If the
skia.committer@gmail.com884e60b2012-11-16 02:01:17 +000029 * "transpose" parameter is true, it will transpose the pixels on write,
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000030 * such that X and Y are swapped. Reads are always performed from contiguous
31 * memory in X, for speed. The destination buffer (dst) must be at least
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000032 * (width + leftRadius + rightRadius) * height bytes in size.
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000033 *
34 * This is what the inner loop looks like before unrolling, and with the two
35 * cases broken out separately (width < diameter, width >= diameter):
36 *
37 * if (width < diameter) {
38 * for (int x = 0; x < width; ++x) {
39 * sum += *right++;
40 * *dptr = (sum * scale + half) >> 24;
41 * dptr += dst_x_stride;
42 * }
43 * for (int x = width; x < diameter; ++x) {
44 * *dptr = (sum * scale + half) >> 24;
45 * dptr += dst_x_stride;
46 * }
47 * for (int x = 0; x < width; ++x) {
48 * *dptr = (sum * scale + half) >> 24;
49 * sum -= *left++;
50 * dptr += dst_x_stride;
51 * }
52 * } else {
53 * for (int x = 0; x < diameter; ++x) {
54 * sum += *right++;
55 * *dptr = (sum * scale + half) >> 24;
56 * dptr += dst_x_stride;
57 * }
58 * for (int x = diameter; x < width; ++x) {
59 * sum += *right++;
60 * *dptr = (sum * scale + half) >> 24;
61 * sum -= *left++;
62 * dptr += dst_x_stride;
63 * }
64 * for (int x = 0; x < diameter; ++x) {
65 * *dptr = (sum * scale + half) >> 24;
66 * sum -= *left++;
67 * dptr += dst_x_stride;
68 * }
69 * }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000070 */
71static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000072 int leftRadius, int rightRadius, int width, int height,
73 bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000074{
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000075 int diameter = leftRadius + rightRadius;
76 int kernelSize = diameter + 1;
77 int border = SkMin32(width, diameter);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000078 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000079 int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000080 int dst_x_stride = transpose ? height : 1;
81 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000082#ifndef SK_DISABLE_BLUR_ROUNDING
83 uint32_t half = 1 << 23;
84#else
85 uint32_t half = 0;
86#endif
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000087 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000088 uint32_t sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000089 uint8_t* dptr = dst + y * dst_y_stride;
90 const uint8_t* right = src + y * src_y_stride;
91 const uint8_t* left = right;
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +000092 for (int x = 0; x < rightRadius - leftRadius; x++) {
93 *dptr = 0;
94 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000095 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000096#define LEFT_BORDER_ITER \
97 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000098 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000099 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000100
101 int x = 0;
102#ifdef UNROLL_SEPARABLE_LOOPS
103 for (; x < border - 16; x += 16) {
104 LEFT_BORDER_ITER
105 LEFT_BORDER_ITER
106 LEFT_BORDER_ITER
107 LEFT_BORDER_ITER
108 LEFT_BORDER_ITER
109 LEFT_BORDER_ITER
110 LEFT_BORDER_ITER
111 LEFT_BORDER_ITER
112 LEFT_BORDER_ITER
113 LEFT_BORDER_ITER
114 LEFT_BORDER_ITER
115 LEFT_BORDER_ITER
116 LEFT_BORDER_ITER
117 LEFT_BORDER_ITER
118 LEFT_BORDER_ITER
119 LEFT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000120 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000121#endif
122 for (; x < border; ++x) {
123 LEFT_BORDER_ITER
124 }
125#undef LEFT_BORDER_ITER
126#define TRIVIAL_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000127 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000128 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000129 x = width;
130#ifdef UNROLL_SEPARABLE_LOOPS
131 for (; x < diameter - 16; x += 16) {
132 TRIVIAL_ITER
133 TRIVIAL_ITER
134 TRIVIAL_ITER
135 TRIVIAL_ITER
136 TRIVIAL_ITER
137 TRIVIAL_ITER
138 TRIVIAL_ITER
139 TRIVIAL_ITER
140 TRIVIAL_ITER
141 TRIVIAL_ITER
142 TRIVIAL_ITER
143 TRIVIAL_ITER
144 TRIVIAL_ITER
145 TRIVIAL_ITER
146 TRIVIAL_ITER
147 TRIVIAL_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000148 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000149#endif
150 for (; x < diameter; ++x) {
151 TRIVIAL_ITER
152 }
153#undef TRIVIAL_ITER
154#define CENTER_ITER \
155 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000156 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000157 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000158 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000159
160 x = diameter;
161#ifdef UNROLL_SEPARABLE_LOOPS
162 for (; x < width - 16; x += 16) {
163 CENTER_ITER
164 CENTER_ITER
165 CENTER_ITER
166 CENTER_ITER
167 CENTER_ITER
168 CENTER_ITER
169 CENTER_ITER
170 CENTER_ITER
171 CENTER_ITER
172 CENTER_ITER
173 CENTER_ITER
174 CENTER_ITER
175 CENTER_ITER
176 CENTER_ITER
177 CENTER_ITER
178 CENTER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000179 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000180#endif
181 for (; x < width; ++x) {
182 CENTER_ITER
183 }
184#undef CENTER_ITER
185#define RIGHT_BORDER_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000186 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000187 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000188 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000189
190 x = 0;
191#ifdef UNROLL_SEPARABLE_LOOPS
192 for (; x < border - 16; x += 16) {
193 RIGHT_BORDER_ITER
194 RIGHT_BORDER_ITER
195 RIGHT_BORDER_ITER
196 RIGHT_BORDER_ITER
197 RIGHT_BORDER_ITER
198 RIGHT_BORDER_ITER
199 RIGHT_BORDER_ITER
200 RIGHT_BORDER_ITER
201 RIGHT_BORDER_ITER
202 RIGHT_BORDER_ITER
203 RIGHT_BORDER_ITER
204 RIGHT_BORDER_ITER
205 RIGHT_BORDER_ITER
206 RIGHT_BORDER_ITER
207 RIGHT_BORDER_ITER
208 RIGHT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000209 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000210#endif
211 for (; x < border; ++x) {
212 RIGHT_BORDER_ITER
213 }
214#undef RIGHT_BORDER_ITER
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +0000215 for (int x = 0; x < leftRadius - rightRadius; x++) {
216 *dptr = 0;
217 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000218 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000219 SkASSERT(sum == 0);
220 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000221 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000222}
223
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000224/**
225 * This variant of the box blur handles blurring of non-integer radii. It
226 * keeps two running sums: an outer sum for the rounded-up kernel radius, and
227 * an inner sum for the rounded-down kernel radius. For each pixel, it linearly
228 * interpolates between them. In float this would be:
229 * outer_weight * outer_sum / kernelSize +
230 * (1.0 - outer_weight) * innerSum / (kernelSize - 2)
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000231 *
232 * This is what the inner loop looks like before unrolling, and with the two
233 * cases broken out separately (width < diameter, width >= diameter):
234 *
235 * if (width < diameter) {
236 * for (int x = 0; x < width; x++) {
237 * inner_sum = outer_sum;
238 * outer_sum += *right++;
239 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
240 * dptr += dst_x_stride;
241 * }
242 * for (int x = width; x < diameter; ++x) {
243 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
244 * dptr += dst_x_stride;
245 * }
246 * for (int x = 0; x < width; x++) {
247 * inner_sum = outer_sum - *left++;
248 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
249 * dptr += dst_x_stride;
250 * outer_sum = inner_sum;
251 * }
252 * } else {
253 * for (int x = 0; x < diameter; x++) {
254 * inner_sum = outer_sum;
255 * outer_sum += *right++;
256 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
257 * dptr += dst_x_stride;
258 * }
259 * for (int x = diameter; x < width; ++x) {
260 * inner_sum = outer_sum - *left;
261 * outer_sum += *right++;
262 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
263 * dptr += dst_x_stride;
264 * outer_sum -= *left++;
265 * }
266 * for (int x = 0; x < diameter; x++) {
267 * inner_sum = outer_sum - *left++;
268 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
269 * dptr += dst_x_stride;
270 * outer_sum = inner_sum;
271 * }
272 * }
273 * }
274 * return new_width;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000275 */
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000276
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000277static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
278 int radius, int width, int height,
279 bool transpose, uint8_t outer_weight)
280{
281 int diameter = radius * 2;
282 int kernelSize = diameter + 1;
283 int border = SkMin32(width, diameter);
284 int inner_weight = 255 - outer_weight;
285 outer_weight += outer_weight >> 7;
286 inner_weight += inner_weight >> 7;
287 uint32_t outer_scale = (outer_weight << 16) / kernelSize;
288 uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000289#ifndef SK_DISABLE_BLUR_ROUNDING
290 uint32_t half = 1 << 23;
291#else
292 uint32_t half = 0;
293#endif
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000294 int new_width = width + diameter;
295 int dst_x_stride = transpose ? height : 1;
296 int dst_y_stride = transpose ? 1 : new_width;
297 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000298 uint32_t outer_sum = 0, inner_sum = 0;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000299 uint8_t* dptr = dst + y * dst_y_stride;
300 const uint8_t* right = src + y * src_y_stride;
301 const uint8_t* left = right;
302 int x = 0;
303
304#define LEFT_BORDER_ITER \
305 inner_sum = outer_sum; \
306 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000307 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000308 dptr += dst_x_stride;
309
310#ifdef UNROLL_SEPARABLE_LOOPS
311 for (;x < border - 16; x += 16) {
312 LEFT_BORDER_ITER
313 LEFT_BORDER_ITER
314 LEFT_BORDER_ITER
315 LEFT_BORDER_ITER
316 LEFT_BORDER_ITER
317 LEFT_BORDER_ITER
318 LEFT_BORDER_ITER
319 LEFT_BORDER_ITER
320 LEFT_BORDER_ITER
321 LEFT_BORDER_ITER
322 LEFT_BORDER_ITER
323 LEFT_BORDER_ITER
324 LEFT_BORDER_ITER
325 LEFT_BORDER_ITER
326 LEFT_BORDER_ITER
327 LEFT_BORDER_ITER
328 }
329#endif
330
331 for (;x < border; x++) {
332 LEFT_BORDER_ITER
333 }
334#undef LEFT_BORDER_ITER
335 for (int x = width; x < diameter; ++x) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000336 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000337 dptr += dst_x_stride;
338 }
339 x = diameter;
340
341#define CENTER_ITER \
342 inner_sum = outer_sum - *left; \
343 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000344 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000345 dptr += dst_x_stride; \
346 outer_sum -= *left++;
347
348#ifdef UNROLL_SEPARABLE_LOOPS
349 for (; x < width - 16; x += 16) {
350 CENTER_ITER
351 CENTER_ITER
352 CENTER_ITER
353 CENTER_ITER
354 CENTER_ITER
355 CENTER_ITER
356 CENTER_ITER
357 CENTER_ITER
358 CENTER_ITER
359 CENTER_ITER
360 CENTER_ITER
361 CENTER_ITER
362 CENTER_ITER
363 CENTER_ITER
364 CENTER_ITER
365 CENTER_ITER
366 }
367#endif
368 for (; x < width; ++x) {
369 CENTER_ITER
370 }
371#undef CENTER_ITER
372
373 #define RIGHT_BORDER_ITER \
374 inner_sum = outer_sum - *left++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000375 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000376 dptr += dst_x_stride; \
377 outer_sum = inner_sum;
378
379 x = 0;
380#ifdef UNROLL_SEPARABLE_LOOPS
381 for (; x < border - 16; x += 16) {
382 RIGHT_BORDER_ITER
383 RIGHT_BORDER_ITER
384 RIGHT_BORDER_ITER
385 RIGHT_BORDER_ITER
386 RIGHT_BORDER_ITER
387 RIGHT_BORDER_ITER
388 RIGHT_BORDER_ITER
389 RIGHT_BORDER_ITER
390 RIGHT_BORDER_ITER
391 RIGHT_BORDER_ITER
392 RIGHT_BORDER_ITER
393 RIGHT_BORDER_ITER
394 RIGHT_BORDER_ITER
395 RIGHT_BORDER_ITER
396 RIGHT_BORDER_ITER
397 RIGHT_BORDER_ITER
398 }
399#endif
400 for (; x < border; x++) {
401 RIGHT_BORDER_ITER
402 }
403#undef RIGHT_BORDER_ITER
404 SkASSERT(outer_sum == 0 && inner_sum == 0);
405 }
406 return new_width;
407}
408
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000409static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
410{
411 *loRadius = *hiRadius = SkScalarCeil(passRadius);
412 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
413 *loRadius = *hiRadius - 1;
414 }
415}
416
tomhudson@google.com01224d52011-11-28 18:22:01 +0000417// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
418// breakeven on Mac, and ~15% slowdown on Linux.
419// Reading a word at a time when bulding the sum buffer seems to give
420// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +0000421#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +0000422#define UNROLL_KERNEL_LOOP 1
423#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +0000424
reed@android.com45607672009-09-21 00:27:08 +0000425/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
426 src values at their position, plus all values above and to the left.
427 When we sample into this buffer, we need an initial row and column of 0s,
428 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000429
reed@android.com45607672009-09-21 00:27:08 +0000430 src[i, j] == sum[i+1, j+1]
431 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000432
reed@android.com45607672009-09-21 00:27:08 +0000433 We assume that the sum buffer's stride == its width
434 */
reed@google.com03016a32011-08-12 14:59:59 +0000435static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
436 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +0000437 int sumW = srcW + 1;
438
439 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000440 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000441 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000442
443 int x, y;
444
reed@android.com45607672009-09-21 00:27:08 +0000445 // zero out the top row and column
446 memset(sum, 0, sumW * sizeof(sum[0]));
447 sum += sumW;
448
reed@android.com8a1c16f2008-12-17 15:59:43 +0000449 // special case first row
450 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000451 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000452 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000453 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000454 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000455 }
456 src += srcRB;
457
458 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000459 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000460 uint32_t L = 0;
461 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000462 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000463
464 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
465 uint32_t T = sum[-sumW];
466 X = *src++ + L + T - C;
467 *sum++ = X;
468 L = X;
469 C = T;
470 }
471
472 for (; x >= 4; x-=4) {
473 uint32_t T = sum[-sumW];
474 X = *src++ + L + T - C;
475 *sum++ = X;
476 L = X;
477 C = T;
478 T = sum[-sumW];
479 X = *src++ + L + T - C;
480 *sum++ = X;
481 L = X;
482 C = T;
483 T = sum[-sumW];
484 X = *src++ + L + T - C;
485 *sum++ = X;
486 L = X;
487 C = T;
488 T = sum[-sumW];
489 X = *src++ + L + T - C;
490 *sum++ = X;
491 L = X;
492 C = T;
493 }
494
495 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000496 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000497 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000498 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000499 L = X;
500 C = T;
501 }
502 src += srcRB;
503 }
504}
505
reed@google.com03016a32011-08-12 14:59:59 +0000506/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000507 * This is the path for apply_kernel() to be taken when the kernel
508 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000509 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000510static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
511 int sw, int sh) {
512 SkASSERT(2*rx > sw);
513
reed@android.com8a1c16f2008-12-17 15:59:43 +0000514 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
515
reed@android.com45607672009-09-21 00:27:08 +0000516 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000517
518 int dw = sw + 2*rx;
519 int dh = sh + 2*ry;
520
reed@android.com45607672009-09-21 00:27:08 +0000521 int prev_y = -2*ry;
522 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000523
reed@android.com45607672009-09-21 00:27:08 +0000524 for (int y = 0; y < dh; y++) {
525 int py = SkClampPos(prev_y) * sumStride;
526 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000527
reed@android.com45607672009-09-21 00:27:08 +0000528 int prev_x = -2*rx;
529 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000530
reed@android.com45607672009-09-21 00:27:08 +0000531 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000532 int px = SkClampPos(prev_x);
533 int nx = SkFastMin32(next_x, sw);
534
reed@android.com45607672009-09-21 00:27:08 +0000535 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
536 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000537
538 prev_x += 1;
539 next_x += 1;
540 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000541
542 prev_y += 1;
543 next_y += 1;
544 }
545}
546/**
547 * sw and sh are the width and height of the src. Since the sum buffer
548 * matches that, but has an extra row and col at the beginning (with zeros),
549 * we can just use sw and sh as our "max" values for pinning coordinates
550 * when sampling into sum[][]
551 *
552 * The inner loop is conceptually simple; we break it into several sections
553 * to improve performance. Here's the original version:
554 for (int x = 0; x < dw; x++) {
555 int px = SkClampPos(prev_x);
556 int nx = SkFastMin32(next_x, sw);
557
558 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
559 *dst++ = SkToU8(tmp * scale >> 24);
560
561 prev_x += 1;
562 next_x += 1;
563 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000564 * The sections are:
565 * left-hand section, where prev_x is clamped to 0
566 * center section, where neither prev_x nor next_x is clamped
567 * right-hand section, where next_x is clamped to sw
568 * On some operating systems, the center section is unrolled for additional
569 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000570*/
571static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
572 int sw, int sh) {
573 if (2*rx > sw) {
574 kernel_clamped(dst, rx, ry, sum, sw, sh);
575 return;
576 }
577
578 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
579
580 int sumStride = sw + 1;
581
582 int dw = sw + 2*rx;
583 int dh = sh + 2*ry;
584
585 int prev_y = -2*ry;
586 int next_y = 1;
587
588 SkASSERT(2*rx <= dw - 2*rx);
589
590 for (int y = 0; y < dh; y++) {
591 int py = SkClampPos(prev_y) * sumStride;
592 int ny = SkFastMin32(next_y, sh) * sumStride;
593
594 int prev_x = -2*rx;
595 int next_x = 1;
596 int x = 0;
597
598 for (; x < 2*rx; x++) {
599 SkASSERT(prev_x <= 0);
600 SkASSERT(next_x <= sw);
601
602 int px = 0;
603 int nx = next_x;
604
605 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
606 *dst++ = SkToU8(tmp * scale >> 24);
607
608 prev_x += 1;
609 next_x += 1;
610 }
611
tomhudson@google.com01224d52011-11-28 18:22:01 +0000612 int i0 = prev_x + py;
613 int i1 = next_x + ny;
614 int i2 = next_x + py;
615 int i3 = prev_x + ny;
616
617#if UNROLL_KERNEL_LOOP
618 for (; x < dw - 2*rx - 4; x += 4) {
619 SkASSERT(prev_x >= 0);
620 SkASSERT(next_x <= sw);
621
622 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
623 *dst++ = SkToU8(tmp * scale >> 24);
624 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
625 *dst++ = SkToU8(tmp * scale >> 24);
626 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
627 *dst++ = SkToU8(tmp * scale >> 24);
628 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
629 *dst++ = SkToU8(tmp * scale >> 24);
630
631 prev_x += 4;
632 next_x += 4;
633 }
634#endif
635
tomhudson@google.com8caac642011-11-22 15:58:06 +0000636 for (; x < dw - 2*rx; x++) {
637 SkASSERT(prev_x >= 0);
638 SkASSERT(next_x <= sw);
639
tomhudson@google.com01224d52011-11-28 18:22:01 +0000640 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000641 *dst++ = SkToU8(tmp * scale >> 24);
642
643 prev_x += 1;
644 next_x += 1;
645 }
646
647 for (; x < dw; x++) {
648 SkASSERT(prev_x >= 0);
649 SkASSERT(next_x > sw);
650
651 int px = prev_x;
652 int nx = sw;
653
654 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
655 *dst++ = SkToU8(tmp * scale >> 24);
656
657 prev_x += 1;
658 next_x += 1;
659 }
660
reed@android.com8a1c16f2008-12-17 15:59:43 +0000661 prev_y += 1;
662 next_y += 1;
663 }
664}
665
reed@google.com03016a32011-08-12 14:59:59 +0000666/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000667 * This is the path for apply_kernel_interp() to be taken when the kernel
668 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000669 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000670static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
reed@android.com45607672009-09-21 00:27:08 +0000671 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000672 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000673
674 int inner_weight = 255 - outer_weight;
675
676 // round these guys up if they're bigger than 127
677 outer_weight += outer_weight >> 7;
678 inner_weight += inner_weight >> 7;
679
680 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
681 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
682
reed@android.com45607672009-09-21 00:27:08 +0000683 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000684
685 int dw = sw + 2*rx;
686 int dh = sh + 2*ry;
687
reed@android.com45607672009-09-21 00:27:08 +0000688 int prev_y = -2*ry;
689 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000690
reed@android.com45607672009-09-21 00:27:08 +0000691 for (int y = 0; y < dh; y++) {
692 int py = SkClampPos(prev_y) * sumStride;
693 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000694
reed@android.com45607672009-09-21 00:27:08 +0000695 int ipy = SkClampPos(prev_y + 1) * sumStride;
696 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000697
reed@android.com45607672009-09-21 00:27:08 +0000698 int prev_x = -2*rx;
699 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000700
reed@android.com45607672009-09-21 00:27:08 +0000701 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000702 int px = SkClampPos(prev_x);
703 int nx = SkFastMin32(next_x, sw);
704
705 int ipx = SkClampPos(prev_x + 1);
706 int inx = SkClampMax(next_x - 1, sw);
707
tomhudson@google.com8caac642011-11-22 15:58:06 +0000708 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
709 - sum[nx+py] - sum[px+ny];
710 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
711 - sum[inx+ipy] - sum[ipx+iny];
712 *dst++ = SkToU8((outer_sum * outer_scale
713 + inner_sum * inner_scale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000714
715 prev_x += 1;
716 next_x += 1;
717 }
718 prev_y += 1;
719 next_y += 1;
720 }
721}
722
tomhudson@google.com8caac642011-11-22 15:58:06 +0000723/**
724 * sw and sh are the width and height of the src. Since the sum buffer
725 * matches that, but has an extra row and col at the beginning (with zeros),
726 * we can just use sw and sh as our "max" values for pinning coordinates
727 * when sampling into sum[][]
728 *
729 * The inner loop is conceptually simple; we break it into several variants
730 * to improve performance. Here's the original version:
731 for (int x = 0; x < dw; x++) {
732 int px = SkClampPos(prev_x);
733 int nx = SkFastMin32(next_x, sw);
734
735 int ipx = SkClampPos(prev_x + 1);
736 int inx = SkClampMax(next_x - 1, sw);
737
738 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
739 - sum[nx+py] - sum[px+ny];
740 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
741 - sum[inx+ipy] - sum[ipx+iny];
742 *dst++ = SkToU8((outer_sum * outer_scale
743 + inner_sum * inner_scale) >> 24);
744
745 prev_x += 1;
746 next_x += 1;
747 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000748 * The sections are:
749 * left-hand section, where prev_x is clamped to 0
750 * center section, where neither prev_x nor next_x is clamped
751 * right-hand section, where next_x is clamped to sw
752 * On some operating systems, the center section is unrolled for additional
753 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000754*/
755static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
756 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
757 SkASSERT(rx > 0 && ry > 0);
758 SkASSERT(outer_weight <= 255);
759
760 if (2*rx > sw) {
761 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
762 return;
763 }
764
765 int inner_weight = 255 - outer_weight;
766
767 // round these guys up if they're bigger than 127
768 outer_weight += outer_weight >> 7;
769 inner_weight += inner_weight >> 7;
770
771 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
772 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
773
774 int sumStride = sw + 1;
775
776 int dw = sw + 2*rx;
777 int dh = sh + 2*ry;
778
779 int prev_y = -2*ry;
780 int next_y = 1;
781
782 SkASSERT(2*rx <= dw - 2*rx);
783
784 for (int y = 0; y < dh; y++) {
785 int py = SkClampPos(prev_y) * sumStride;
786 int ny = SkFastMin32(next_y, sh) * sumStride;
787
788 int ipy = SkClampPos(prev_y + 1) * sumStride;
789 int iny = SkClampMax(next_y - 1, sh) * sumStride;
790
791 int prev_x = -2*rx;
792 int next_x = 1;
793 int x = 0;
794
795 for (; x < 2*rx; x++) {
796 SkASSERT(prev_x < 0);
797 SkASSERT(next_x <= sw);
798
799 int px = 0;
800 int nx = next_x;
801
802 int ipx = 0;
803 int inx = next_x - 1;
804
805 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
806 - sum[nx+py] - sum[px+ny];
807 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
808 - sum[inx+ipy] - sum[ipx+iny];
809 *dst++ = SkToU8((outer_sum * outer_scale
810 + inner_sum * inner_scale) >> 24);
811
812 prev_x += 1;
813 next_x += 1;
814 }
815
tomhudson@google.com01224d52011-11-28 18:22:01 +0000816 int i0 = prev_x + py;
817 int i1 = next_x + ny;
818 int i2 = next_x + py;
819 int i3 = prev_x + ny;
820 int i4 = prev_x + 1 + ipy;
821 int i5 = next_x - 1 + iny;
822 int i6 = next_x - 1 + ipy;
823 int i7 = prev_x + 1 + iny;
824
825#if UNROLL_KERNEL_LOOP
826 for (; x < dw - 2*rx - 4; x += 4) {
827 SkASSERT(prev_x >= 0);
828 SkASSERT(next_x <= sw);
829
830 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
831 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
832 *dst++ = SkToU8((outer_sum * outer_scale
833 + inner_sum * inner_scale) >> 24);
834 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
835 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
836 *dst++ = SkToU8((outer_sum * outer_scale
837 + inner_sum * inner_scale) >> 24);
838 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
839 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
840 *dst++ = SkToU8((outer_sum * outer_scale
841 + inner_sum * inner_scale) >> 24);
842 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
843 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
844 *dst++ = SkToU8((outer_sum * outer_scale
845 + inner_sum * inner_scale) >> 24);
846
847 prev_x += 4;
848 next_x += 4;
849 }
850#endif
851
tomhudson@google.com8caac642011-11-22 15:58:06 +0000852 for (; x < dw - 2*rx; x++) {
853 SkASSERT(prev_x >= 0);
854 SkASSERT(next_x <= sw);
855
tomhudson@google.com01224d52011-11-28 18:22:01 +0000856 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
857 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000858 *dst++ = SkToU8((outer_sum * outer_scale
859 + inner_sum * inner_scale) >> 24);
860
861 prev_x += 1;
862 next_x += 1;
863 }
864
865 for (; x < dw; x++) {
866 SkASSERT(prev_x >= 0);
867 SkASSERT(next_x > sw);
868
869 int px = prev_x;
870 int nx = sw;
871
872 int ipx = prev_x + 1;
873 int inx = sw;
874
875 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
876 - sum[nx+py] - sum[px+ny];
877 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
878 - sum[inx+ipy] - sum[ipx+iny];
879 *dst++ = SkToU8((outer_sum * outer_scale
880 + inner_sum * inner_scale) >> 24);
881
882 prev_x += 1;
883 next_x += 1;
884 }
885
886 prev_y += 1;
887 next_y += 1;
888 }
889}
890
reed@android.com8a1c16f2008-12-17 15:59:43 +0000891#include "SkColorPriv.h"
892
reed@android.com0e3c6642009-09-18 13:41:56 +0000893static void merge_src_with_blur(uint8_t dst[], int dstRB,
894 const uint8_t src[], int srcRB,
895 const uint8_t blur[], int blurRB,
896 int sw, int sh) {
897 dstRB -= sw;
898 srcRB -= sw;
899 blurRB -= sw;
900 while (--sh >= 0) {
901 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000902 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
903 dst += 1;
904 src += 1;
905 blur += 1;
906 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000907 dst += dstRB;
908 src += srcRB;
909 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000910 }
911}
912
913static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000914 const uint8_t src[], int srcRowBytes,
915 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000916 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000917 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000918 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000919 switch (style) {
920 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000921 for (x = sw - 1; x >= 0; --x) {
922 int s = *src;
923 int d = *dst;
924 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000925 dst += 1;
926 src += 1;
927 }
928 break;
929 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000930 for (x = sw - 1; x >= 0; --x) {
931 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000932 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000933 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000934 dst += 1;
935 src += 1;
936 }
937 break;
938 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000939 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000940 break;
941 }
942 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000943 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000944 }
945}
946
reed@google.com03016a32011-08-12 14:59:59 +0000947///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000948
bsalomon@google.com33cdbde2013-01-11 20:54:44 +0000949// we use a local function to wrap the class static method to work around
reed@android.com8a1c16f2008-12-17 15:59:43 +0000950// a bug in gcc98
951void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000952void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000953 SkMask::FreeImage(image);
954}
955
956bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000957 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000958 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000959{
reed@google.com03016a32011-08-12 14:59:59 +0000960 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000961 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000962 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000963
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000964 // Force high quality off for small radii (performance)
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000965 if (radius < SkIntToScalar(3)) {
966 quality = kLow_Quality;
967 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000968
969 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000970 int passCount = (kHigh_Quality == quality) ? 3 : 1;
humper@google.com7c7292c2013-01-04 20:29:03 +0000971 SkScalar passRadius = (kHigh_Quality == quality) ? SkScalarMul( radius, kBlurRadiusFudgeFactor): radius;
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000972
973 int rx = SkScalarCeil(passRadius);
974 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000975
976 SkASSERT(rx >= 0);
977 SkASSERT((unsigned)outer_weight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000978 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000979 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000980 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000981
982 int ry = rx; // only do square blur for now
983
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000984 int padx = passCount * rx;
985 int pady = passCount * ry;
bungeman@google.com5af16f82011-09-02 15:06:44 +0000986 if (margin) {
987 margin->set(padx, pady);
988 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000989 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
990 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
reed@android.com49f0ff22009-03-19 21:52:42 +0000991 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000992 dst->fFormat = SkMask::kA8_Format;
993 dst->fImage = NULL;
994
reed@android.com0e3c6642009-09-18 13:41:56 +0000995 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +0000996 size_t dstSize = dst->computeImageSize();
997 if (0 == dstSize) {
998 return false; // too big to allocate, abort
999 }
1000
reed@android.com8a1c16f2008-12-17 15:59:43 +00001001 int sw = src.fBounds.width();
1002 int sh = src.fBounds.height();
1003 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +00001004 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001005
1006 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
1007
1008 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001009 if (separable) {
1010 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1011 uint8_t* tp = tmpBuffer.get();
1012 int w = sw, h = sh;
1013
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001014 if (outer_weight == 255) {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +00001015 int loRadius, hiRadius;
1016 get_adjusted_radii(passRadius, &loRadius, &hiRadius);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001017 if (kHigh_Quality == quality) {
1018 // Do three X blurs, with a transpose on the final one.
1019 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
1020 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
1021 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
1022 // Do three Y blurs, with a transpose on the final one.
1023 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
1024 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
1025 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
1026 } else {
1027 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
1028 h = boxBlur(tp, h, dp, ry, ry, h, w, true);
1029 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +00001030 } else {
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001031 if (kHigh_Quality == quality) {
1032 // Do three X blurs, with a transpose on the final one.
1033 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outer_weight);
1034 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outer_weight);
1035 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outer_weight);
1036 // Do three Y blurs, with a transpose on the final one.
1037 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outer_weight);
1038 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outer_weight);
1039 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);
1040 } else {
1041 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outer_weight);
1042 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);
1043 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001044 }
1045 } else {
reed@google.com03016a32011-08-12 14:59:59 +00001046 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
1047 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
1048 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001049 uint32_t* sumBuffer = storage.get();
1050
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001051 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +00001052 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
reed@google.com03016a32011-08-12 14:59:59 +00001053 if (outer_weight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001054 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +00001055 } else {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001056 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
reed@google.com03016a32011-08-12 14:59:59 +00001057 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001058
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001059 if (kHigh_Quality == quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001060 //pass2: dp is source, tmpBuffer is destination
1061 int tmp_sw = sw + 2 * rx;
1062 int tmp_sh = sh + 2 * ry;
1063 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1064 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
1065 if (outer_weight == 255)
1066 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
1067 else
reed@google.com03016a32011-08-12 14:59:59 +00001068 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
1069 tmp_sw, tmp_sh, outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001070
1071 //pass3: tmpBuffer is source, dp is destination
1072 tmp_sw += 2 * rx;
1073 tmp_sh += 2 * ry;
1074 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
1075 if (outer_weight == 255)
1076 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
1077 else
reed@google.com03016a32011-08-12 14:59:59 +00001078 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
1079 outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001080 }
reed@android.com8a1c16f2008-12-17 15:59:43 +00001081 }
1082
1083 dst->fImage = dp;
1084 // if need be, alloc the "real" dst (same size as src) and copy/merge
1085 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +00001086 if (style == kInner_Style) {
1087 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +00001088 size_t srcSize = src.computeImageSize();
1089 if (0 == srcSize) {
1090 return false; // too big to allocate, abort
1091 }
1092 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +00001093 merge_src_with_blur(dst->fImage, src.fRowBytes,
1094 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +00001095 dp + passCount * (rx + ry * dst->fRowBytes),
1096 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001097 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +00001098 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +00001099 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
1100 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001101 }
1102 (void)autoCall.detach();
1103 }
1104
reed@android.com0e3c6642009-09-18 13:41:56 +00001105 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001106 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +00001107 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +00001108 }
1109
reed@android.com8a1c16f2008-12-17 15:59:43 +00001110 return true;
1111}
1112
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001113bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
1114 SkScalar radius, Style style, Quality quality,
1115 SkIPoint* margin)
1116{
1117 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
1118}
1119
1120bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
1121 SkScalar radius, Style style, Quality quality,
1122 SkIPoint* margin)
1123{
1124 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
1125}
humper@google.com7c7292c2013-01-04 20:29:03 +00001126
1127/* Convolving a box with itself three times results in a piecewise
1128 quadratic function:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001129
humper@google.com7c7292c2013-01-04 20:29:03 +00001130 0 x <= -1.5
1131 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= 1.5
1132 3/4 - x^2 -.5 < x <= .5
1133 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5
1134 0 1.5 < x
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001135
humper@google.com7c7292c2013-01-04 20:29:03 +00001136 To get the profile curve of the blurred step function at the rectangle
1137 edge, we evaluate the indefinite integral, which is piecewise cubic:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001138
humper@google.com7c7292c2013-01-04 20:29:03 +00001139 0 x <= -1.5
1140 5/8 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5
1141 1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5
1142 3/8 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5
1143 1 1.5 < x
1144*/
1145
1146static float gaussian_integral( float x ) {
1147 if ( x > 1.5f ) {
1148 return 0.0f;
1149 }
1150 if ( x < -1.5f ) {
1151 return 1.0f;
1152 }
1153
1154 float x2 = x*x;
1155 float x3 = x2*x;
1156
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001157 if ( x > 0.5f ) {
1158 return 0.5625f - ( x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001159 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001160 if ( x > -0.5f ) {
1161 return 0.5f - (0.75f * x - x3 / 3.0f);
humper@google.com7c7292c2013-01-04 20:29:03 +00001162 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001163 return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001164}
1165
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001166/*
humper@google.com7c7292c2013-01-04 20:29:03 +00001167 compute_profile allocates and fills in an array of floating
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001168 point values between 0 and 255 for the profile signature of
humper@google.com7c7292c2013-01-04 20:29:03 +00001169 a blurred half-plane with the given blur radius. Since we're
1170 going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
1171 all the time, we actually fill in the profile pre-inverted
1172 (already done 255-x).
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001173
humper@google.com7c7292c2013-01-04 20:29:03 +00001174 The function returns the size of the array allocated for the
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001175 profile. It's the responsibility of the caller to delete the
humper@google.com7c7292c2013-01-04 20:29:03 +00001176 memory returned in profile_out.
1177*/
1178
1179static int compute_profile( SkScalar radius, unsigned int **profile_out ) {
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001180 int size = SkScalarFloorToInt(radius * 3 + 1);
humper@google.com7c7292c2013-01-04 20:29:03 +00001181 int center = size >> 1;
1182
bsalomon@google.com33cdbde2013-01-11 20:54:44 +00001183 unsigned int *profile = SkNEW_ARRAY(unsigned int, size);
humper@google.com7c7292c2013-01-04 20:29:03 +00001184
1185 float invr = 1.0f/radius;
1186
1187 profile[0] = 255;
1188 for (int x = 1 ; x < size ; x++) {
1189 float scaled_x = ( center - x ) * invr;
1190 float gi = gaussian_integral( scaled_x );
1191 profile[x] = 255 - (uint8_t) ( 255.f * gi );
1192 }
1193
1194 *profile_out = profile;
1195 return size;
1196}
1197
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001198// TODO MAYBE: Maintain a profile cache to avoid recomputing this for
humper@google.com7c7292c2013-01-04 20:29:03 +00001199// commonly used radii. Consider baking some of the most common blur radii
1200// directly in as static data?
1201
1202// Implementation adapted from Michael Herf's approach:
1203// http://stereopsis.com/shadowrect/
1204
1205bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src,
1206 SkScalar provided_radius, Style style, Quality quality,
1207 SkIPoint *margin) {
1208 int profile_size;
1209 unsigned int *profile;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001210
1211
humper@google.com7c7292c2013-01-04 20:29:03 +00001212 float radius = SkScalarToFloat( SkScalarMul( provided_radius, kBlurRadiusFudgeFactor ) );
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001213
humper@google.com7c7292c2013-01-04 20:29:03 +00001214 profile_size = compute_profile( radius, &profile );
bsalomon@google.com33cdbde2013-01-11 20:54:44 +00001215 SkAutoTDeleteArray<unsigned int> ada(profile);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001216
humper@google.com7c7292c2013-01-04 20:29:03 +00001217 int pad = (int) (radius * 1.5f + 1);
1218 if (margin) {
1219 margin->set( pad, pad );
1220 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001221 dst->fBounds = SkIRect::MakeWH(SkScalarFloorToInt(src.width()), SkScalarFloorToInt(src.height()));
humper@google.com7c7292c2013-01-04 20:29:03 +00001222 dst->fBounds.outset(pad, pad);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001223
humper@google.com7c7292c2013-01-04 20:29:03 +00001224 dst->fRowBytes = dst->fBounds.width();
1225 dst->fFormat = SkMask::kA8_Format;
1226 dst->fImage = NULL;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001227
humper@google.com7c7292c2013-01-04 20:29:03 +00001228 size_t dstSize = dst->computeImageSize();
1229 if (0 == dstSize) {
1230 return false; // too big to allocate, abort
1231 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001232
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001233 int sw = SkScalarFloorToInt(src.width());
1234 int sh = SkScalarFloorToInt(src.height());
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001235
humper@google.com7c7292c2013-01-04 20:29:03 +00001236 uint8_t* dp = SkMask::AllocImage(dstSize);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001237
humper@google.com7c7292c2013-01-04 20:29:03 +00001238 dst->fImage = dp;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001239
humper@google.com7c7292c2013-01-04 20:29:03 +00001240 int dst_height = dst->fBounds.height();
1241 int dst_width = dst->fBounds.width();
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001242
humper@google.com7c7292c2013-01-04 20:29:03 +00001243 // nearest odd number less than the profile size represents the center
1244 // of the (2x scaled) profile
1245 int center = ( profile_size & ~1 ) - 1;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001246
humper@google.com7c7292c2013-01-04 20:29:03 +00001247 int w = sw - center;
1248 int h = sh - center;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001249
humper@google.com7c7292c2013-01-04 20:29:03 +00001250 uint8_t *outptr = dp;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001251
humper@google.com7c7292c2013-01-04 20:29:03 +00001252 for (int y = 0 ; y < dst_height ; y++)
1253 {
1254 // time to fill in a scanline of the blurry rectangle.
1255 // to avoid floating point math, everything is multiplied by
1256 // 2 where needed. This keeps things nice and integer-oriented.
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001257
humper@google.com7c7292c2013-01-04 20:29:03 +00001258 int dy = abs((y << 1) - dst_height) - h; // how far are we from the original edge?
1259 int oy = dy >> 1;
1260 if (oy < 0) oy = 0;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001261
humper@google.com7c7292c2013-01-04 20:29:03 +00001262 unsigned int profile_y = profile[oy];
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001263
humper@google.com7c7292c2013-01-04 20:29:03 +00001264 for (int x = 0 ; x < (dst_width << 1) ; x += 2) {
1265 int dx = abs( x - dst_width ) - w;
1266 int ox = dx >> 1;
1267 if (ox < 0) ox = 0;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001268
humper@google.com7c7292c2013-01-04 20:29:03 +00001269 unsigned int maskval = SkMulDiv255Round(profile[ox], profile_y);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001270
humper@google.com7c7292c2013-01-04 20:29:03 +00001271 *(outptr++) = maskval;
1272 }
1273 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001274
humper@google.com7c7292c2013-01-04 20:29:03 +00001275 return true;
1276}