blob: 70efa0bcba8df5c28b8e5408c049272ec7c156dc [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000015// scale factor for the blur radius to match the behavior of the all existing blur
humper@google.com7c7292c2013-01-04 20:29:03 +000016// code (both on the CPU and the GPU). This magic constant is 1/sqrt(3).
17
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000018// TODO: get rid of this fudge factor and move any required fudging up into
humper@google.com7c7292c2013-01-04 20:29:03 +000019// the calling library
20
21#define kBlurRadiusFudgeFactor SkFloatToScalar( .57735f )
22
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000023#define UNROLL_SEPARABLE_LOOPS
24
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000025/**
26 * This function performs a box blur in X, of the given radius. If the
skia.committer@gmail.com884e60b2012-11-16 02:01:17 +000027 * "transpose" parameter is true, it will transpose the pixels on write,
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000028 * such that X and Y are swapped. Reads are always performed from contiguous
29 * memory in X, for speed. The destination buffer (dst) must be at least
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000030 * (width + leftRadius + rightRadius) * height bytes in size.
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000031 *
32 * This is what the inner loop looks like before unrolling, and with the two
33 * cases broken out separately (width < diameter, width >= diameter):
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +000034 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000035 * if (width < diameter) {
36 * for (int x = 0; x < width; ++x) {
37 * sum += *right++;
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +000038 * *dptr = (sum * scale + half) >> 24;
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000039 * dptr += dst_x_stride;
40 * }
41 * for (int x = width; x < diameter; ++x) {
42 * *dptr = (sum * scale + half) >> 24;
43 * dptr += dst_x_stride;
44 * }
45 * for (int x = 0; x < width; ++x) {
46 * *dptr = (sum * scale + half) >> 24;
47 * sum -= *left++;
48 * dptr += dst_x_stride;
49 * }
50 * } else {
51 * for (int x = 0; x < diameter; ++x) {
52 * sum += *right++;
53 * *dptr = (sum * scale + half) >> 24;
54 * dptr += dst_x_stride;
55 * }
56 * for (int x = diameter; x < width; ++x) {
57 * sum += *right++;
58 * *dptr = (sum * scale + half) >> 24;
59 * sum -= *left++;
60 * dptr += dst_x_stride;
61 * }
62 * for (int x = 0; x < diameter; ++x) {
63 * *dptr = (sum * scale + half) >> 24;
64 * sum -= *left++;
65 * dptr += dst_x_stride;
66 * }
67 * }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000068 */
69static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000070 int leftRadius, int rightRadius, int width, int height,
71 bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000072{
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000073 int diameter = leftRadius + rightRadius;
74 int kernelSize = diameter + 1;
75 int border = SkMin32(width, diameter);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000076 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000077 int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000078 int dst_x_stride = transpose ? height : 1;
79 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000080#ifndef SK_DISABLE_BLUR_ROUNDING
81 uint32_t half = 1 << 23;
82#else
83 uint32_t half = 0;
84#endif
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000085 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000086 uint32_t sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000087 uint8_t* dptr = dst + y * dst_y_stride;
88 const uint8_t* right = src + y * src_y_stride;
89 const uint8_t* left = right;
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +000090 for (int x = 0; x < rightRadius - leftRadius; x++) {
91 *dptr = 0;
92 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000093 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000094#define LEFT_BORDER_ITER \
95 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000096 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000097 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000098
99 int x = 0;
100#ifdef UNROLL_SEPARABLE_LOOPS
101 for (; x < border - 16; x += 16) {
102 LEFT_BORDER_ITER
103 LEFT_BORDER_ITER
104 LEFT_BORDER_ITER
105 LEFT_BORDER_ITER
106 LEFT_BORDER_ITER
107 LEFT_BORDER_ITER
108 LEFT_BORDER_ITER
109 LEFT_BORDER_ITER
110 LEFT_BORDER_ITER
111 LEFT_BORDER_ITER
112 LEFT_BORDER_ITER
113 LEFT_BORDER_ITER
114 LEFT_BORDER_ITER
115 LEFT_BORDER_ITER
116 LEFT_BORDER_ITER
117 LEFT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000118 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000119#endif
120 for (; x < border; ++x) {
121 LEFT_BORDER_ITER
122 }
123#undef LEFT_BORDER_ITER
124#define TRIVIAL_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000125 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000126 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000127 x = width;
128#ifdef UNROLL_SEPARABLE_LOOPS
129 for (; x < diameter - 16; x += 16) {
130 TRIVIAL_ITER
131 TRIVIAL_ITER
132 TRIVIAL_ITER
133 TRIVIAL_ITER
134 TRIVIAL_ITER
135 TRIVIAL_ITER
136 TRIVIAL_ITER
137 TRIVIAL_ITER
138 TRIVIAL_ITER
139 TRIVIAL_ITER
140 TRIVIAL_ITER
141 TRIVIAL_ITER
142 TRIVIAL_ITER
143 TRIVIAL_ITER
144 TRIVIAL_ITER
145 TRIVIAL_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000146 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000147#endif
148 for (; x < diameter; ++x) {
149 TRIVIAL_ITER
150 }
151#undef TRIVIAL_ITER
152#define CENTER_ITER \
153 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000154 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000155 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000156 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000157
158 x = diameter;
159#ifdef UNROLL_SEPARABLE_LOOPS
160 for (; x < width - 16; x += 16) {
161 CENTER_ITER
162 CENTER_ITER
163 CENTER_ITER
164 CENTER_ITER
165 CENTER_ITER
166 CENTER_ITER
167 CENTER_ITER
168 CENTER_ITER
169 CENTER_ITER
170 CENTER_ITER
171 CENTER_ITER
172 CENTER_ITER
173 CENTER_ITER
174 CENTER_ITER
175 CENTER_ITER
176 CENTER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000177 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000178#endif
179 for (; x < width; ++x) {
180 CENTER_ITER
181 }
182#undef CENTER_ITER
183#define RIGHT_BORDER_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000184 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000185 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000186 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000187
188 x = 0;
189#ifdef UNROLL_SEPARABLE_LOOPS
190 for (; x < border - 16; x += 16) {
191 RIGHT_BORDER_ITER
192 RIGHT_BORDER_ITER
193 RIGHT_BORDER_ITER
194 RIGHT_BORDER_ITER
195 RIGHT_BORDER_ITER
196 RIGHT_BORDER_ITER
197 RIGHT_BORDER_ITER
198 RIGHT_BORDER_ITER
199 RIGHT_BORDER_ITER
200 RIGHT_BORDER_ITER
201 RIGHT_BORDER_ITER
202 RIGHT_BORDER_ITER
203 RIGHT_BORDER_ITER
204 RIGHT_BORDER_ITER
205 RIGHT_BORDER_ITER
206 RIGHT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000207 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000208#endif
209 for (; x < border; ++x) {
210 RIGHT_BORDER_ITER
211 }
212#undef RIGHT_BORDER_ITER
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +0000213 for (int x = 0; x < leftRadius - rightRadius; x++) {
214 *dptr = 0;
215 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000216 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000217 SkASSERT(sum == 0);
218 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000219 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000220}
221
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000222/**
223 * This variant of the box blur handles blurring of non-integer radii. It
224 * keeps two running sums: an outer sum for the rounded-up kernel radius, and
225 * an inner sum for the rounded-down kernel radius. For each pixel, it linearly
226 * interpolates between them. In float this would be:
227 * outer_weight * outer_sum / kernelSize +
228 * (1.0 - outer_weight) * innerSum / (kernelSize - 2)
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +0000229 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000230 * This is what the inner loop looks like before unrolling, and with the two
231 * cases broken out separately (width < diameter, width >= diameter):
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +0000232 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000233 * if (width < diameter) {
234 * for (int x = 0; x < width; x++) {
235 * inner_sum = outer_sum;
236 * outer_sum += *right++;
237 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
238 * dptr += dst_x_stride;
239 * }
240 * for (int x = width; x < diameter; ++x) {
241 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
242 * dptr += dst_x_stride;
243 * }
244 * for (int x = 0; x < width; x++) {
245 * inner_sum = outer_sum - *left++;
246 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
247 * dptr += dst_x_stride;
248 * outer_sum = inner_sum;
249 * }
250 * } else {
251 * for (int x = 0; x < diameter; x++) {
252 * inner_sum = outer_sum;
253 * outer_sum += *right++;
254 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
255 * dptr += dst_x_stride;
256 * }
257 * for (int x = diameter; x < width; ++x) {
258 * inner_sum = outer_sum - *left;
259 * outer_sum += *right++;
260 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
261 * dptr += dst_x_stride;
262 * outer_sum -= *left++;
263 * }
264 * for (int x = 0; x < diameter; x++) {
265 * inner_sum = outer_sum - *left++;
266 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
267 * dptr += dst_x_stride;
268 * outer_sum = inner_sum;
269 * }
270 * }
271 * }
272 * return new_width;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000273 */
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000274
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000275static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
276 int radius, int width, int height,
277 bool transpose, uint8_t outer_weight)
278{
279 int diameter = radius * 2;
280 int kernelSize = diameter + 1;
281 int border = SkMin32(width, diameter);
282 int inner_weight = 255 - outer_weight;
283 outer_weight += outer_weight >> 7;
284 inner_weight += inner_weight >> 7;
285 uint32_t outer_scale = (outer_weight << 16) / kernelSize;
286 uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000287#ifndef SK_DISABLE_BLUR_ROUNDING
288 uint32_t half = 1 << 23;
289#else
290 uint32_t half = 0;
291#endif
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000292 int new_width = width + diameter;
293 int dst_x_stride = transpose ? height : 1;
294 int dst_y_stride = transpose ? 1 : new_width;
295 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000296 uint32_t outer_sum = 0, inner_sum = 0;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000297 uint8_t* dptr = dst + y * dst_y_stride;
298 const uint8_t* right = src + y * src_y_stride;
299 const uint8_t* left = right;
300 int x = 0;
301
302#define LEFT_BORDER_ITER \
303 inner_sum = outer_sum; \
304 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000305 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000306 dptr += dst_x_stride;
307
308#ifdef UNROLL_SEPARABLE_LOOPS
309 for (;x < border - 16; x += 16) {
310 LEFT_BORDER_ITER
311 LEFT_BORDER_ITER
312 LEFT_BORDER_ITER
313 LEFT_BORDER_ITER
314 LEFT_BORDER_ITER
315 LEFT_BORDER_ITER
316 LEFT_BORDER_ITER
317 LEFT_BORDER_ITER
318 LEFT_BORDER_ITER
319 LEFT_BORDER_ITER
320 LEFT_BORDER_ITER
321 LEFT_BORDER_ITER
322 LEFT_BORDER_ITER
323 LEFT_BORDER_ITER
324 LEFT_BORDER_ITER
325 LEFT_BORDER_ITER
326 }
327#endif
328
329 for (;x < border; x++) {
330 LEFT_BORDER_ITER
331 }
332#undef LEFT_BORDER_ITER
333 for (int x = width; x < diameter; ++x) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000334 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000335 dptr += dst_x_stride;
336 }
337 x = diameter;
338
339#define CENTER_ITER \
340 inner_sum = outer_sum - *left; \
341 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000342 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000343 dptr += dst_x_stride; \
344 outer_sum -= *left++;
345
346#ifdef UNROLL_SEPARABLE_LOOPS
347 for (; x < width - 16; x += 16) {
348 CENTER_ITER
349 CENTER_ITER
350 CENTER_ITER
351 CENTER_ITER
352 CENTER_ITER
353 CENTER_ITER
354 CENTER_ITER
355 CENTER_ITER
356 CENTER_ITER
357 CENTER_ITER
358 CENTER_ITER
359 CENTER_ITER
360 CENTER_ITER
361 CENTER_ITER
362 CENTER_ITER
363 CENTER_ITER
364 }
365#endif
366 for (; x < width; ++x) {
367 CENTER_ITER
368 }
369#undef CENTER_ITER
370
371 #define RIGHT_BORDER_ITER \
372 inner_sum = outer_sum - *left++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000373 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000374 dptr += dst_x_stride; \
375 outer_sum = inner_sum;
376
377 x = 0;
378#ifdef UNROLL_SEPARABLE_LOOPS
379 for (; x < border - 16; x += 16) {
380 RIGHT_BORDER_ITER
381 RIGHT_BORDER_ITER
382 RIGHT_BORDER_ITER
383 RIGHT_BORDER_ITER
384 RIGHT_BORDER_ITER
385 RIGHT_BORDER_ITER
386 RIGHT_BORDER_ITER
387 RIGHT_BORDER_ITER
388 RIGHT_BORDER_ITER
389 RIGHT_BORDER_ITER
390 RIGHT_BORDER_ITER
391 RIGHT_BORDER_ITER
392 RIGHT_BORDER_ITER
393 RIGHT_BORDER_ITER
394 RIGHT_BORDER_ITER
395 RIGHT_BORDER_ITER
396 }
397#endif
398 for (; x < border; x++) {
399 RIGHT_BORDER_ITER
400 }
401#undef RIGHT_BORDER_ITER
402 SkASSERT(outer_sum == 0 && inner_sum == 0);
403 }
404 return new_width;
405}
406
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000407static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
408{
409 *loRadius = *hiRadius = SkScalarCeil(passRadius);
410 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
411 *loRadius = *hiRadius - 1;
412 }
413}
414
tomhudson@google.com01224d52011-11-28 18:22:01 +0000415// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
416// breakeven on Mac, and ~15% slowdown on Linux.
417// Reading a word at a time when bulding the sum buffer seems to give
418// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +0000419#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +0000420#define UNROLL_KERNEL_LOOP 1
421#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +0000422
reed@android.com45607672009-09-21 00:27:08 +0000423/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
424 src values at their position, plus all values above and to the left.
425 When we sample into this buffer, we need an initial row and column of 0s,
426 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000427
reed@android.com45607672009-09-21 00:27:08 +0000428 src[i, j] == sum[i+1, j+1]
429 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000430
reed@android.com45607672009-09-21 00:27:08 +0000431 We assume that the sum buffer's stride == its width
432 */
reed@google.com03016a32011-08-12 14:59:59 +0000433static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
434 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +0000435 int sumW = srcW + 1;
436
437 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000438 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000439 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000440
441 int x, y;
442
reed@android.com45607672009-09-21 00:27:08 +0000443 // zero out the top row and column
444 memset(sum, 0, sumW * sizeof(sum[0]));
445 sum += sumW;
446
reed@android.com8a1c16f2008-12-17 15:59:43 +0000447 // special case first row
448 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000449 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000450 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000451 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000452 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000453 }
454 src += srcRB;
455
456 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000457 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000458 uint32_t L = 0;
459 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000460 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000461
462 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
463 uint32_t T = sum[-sumW];
464 X = *src++ + L + T - C;
465 *sum++ = X;
466 L = X;
467 C = T;
468 }
469
470 for (; x >= 4; x-=4) {
471 uint32_t T = sum[-sumW];
472 X = *src++ + L + T - C;
473 *sum++ = X;
474 L = X;
475 C = T;
476 T = sum[-sumW];
477 X = *src++ + L + T - C;
478 *sum++ = X;
479 L = X;
480 C = T;
481 T = sum[-sumW];
482 X = *src++ + L + T - C;
483 *sum++ = X;
484 L = X;
485 C = T;
486 T = sum[-sumW];
487 X = *src++ + L + T - C;
488 *sum++ = X;
489 L = X;
490 C = T;
491 }
492
493 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000494 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000495 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000496 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000497 L = X;
498 C = T;
499 }
500 src += srcRB;
501 }
502}
503
reed@google.com03016a32011-08-12 14:59:59 +0000504/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000505 * This is the path for apply_kernel() to be taken when the kernel
506 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000507 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000508static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
509 int sw, int sh) {
510 SkASSERT(2*rx > sw);
511
reed@android.com8a1c16f2008-12-17 15:59:43 +0000512 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
513
reed@android.com45607672009-09-21 00:27:08 +0000514 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000515
516 int dw = sw + 2*rx;
517 int dh = sh + 2*ry;
518
reed@android.com45607672009-09-21 00:27:08 +0000519 int prev_y = -2*ry;
520 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000521
reed@android.com45607672009-09-21 00:27:08 +0000522 for (int y = 0; y < dh; y++) {
523 int py = SkClampPos(prev_y) * sumStride;
524 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000525
reed@android.com45607672009-09-21 00:27:08 +0000526 int prev_x = -2*rx;
527 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000528
reed@android.com45607672009-09-21 00:27:08 +0000529 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000530 int px = SkClampPos(prev_x);
531 int nx = SkFastMin32(next_x, sw);
532
reed@android.com45607672009-09-21 00:27:08 +0000533 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
534 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000535
536 prev_x += 1;
537 next_x += 1;
538 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000539
540 prev_y += 1;
541 next_y += 1;
542 }
543}
544/**
545 * sw and sh are the width and height of the src. Since the sum buffer
546 * matches that, but has an extra row and col at the beginning (with zeros),
547 * we can just use sw and sh as our "max" values for pinning coordinates
548 * when sampling into sum[][]
549 *
550 * The inner loop is conceptually simple; we break it into several sections
551 * to improve performance. Here's the original version:
552 for (int x = 0; x < dw; x++) {
553 int px = SkClampPos(prev_x);
554 int nx = SkFastMin32(next_x, sw);
555
556 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
557 *dst++ = SkToU8(tmp * scale >> 24);
558
559 prev_x += 1;
560 next_x += 1;
561 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000562 * The sections are:
563 * left-hand section, where prev_x is clamped to 0
564 * center section, where neither prev_x nor next_x is clamped
565 * right-hand section, where next_x is clamped to sw
566 * On some operating systems, the center section is unrolled for additional
567 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000568*/
569static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
570 int sw, int sh) {
571 if (2*rx > sw) {
572 kernel_clamped(dst, rx, ry, sum, sw, sh);
573 return;
574 }
575
576 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
577
578 int sumStride = sw + 1;
579
580 int dw = sw + 2*rx;
581 int dh = sh + 2*ry;
582
583 int prev_y = -2*ry;
584 int next_y = 1;
585
586 SkASSERT(2*rx <= dw - 2*rx);
587
588 for (int y = 0; y < dh; y++) {
589 int py = SkClampPos(prev_y) * sumStride;
590 int ny = SkFastMin32(next_y, sh) * sumStride;
591
592 int prev_x = -2*rx;
593 int next_x = 1;
594 int x = 0;
595
596 for (; x < 2*rx; x++) {
597 SkASSERT(prev_x <= 0);
598 SkASSERT(next_x <= sw);
599
600 int px = 0;
601 int nx = next_x;
602
603 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
604 *dst++ = SkToU8(tmp * scale >> 24);
605
606 prev_x += 1;
607 next_x += 1;
608 }
609
tomhudson@google.com01224d52011-11-28 18:22:01 +0000610 int i0 = prev_x + py;
611 int i1 = next_x + ny;
612 int i2 = next_x + py;
613 int i3 = prev_x + ny;
614
615#if UNROLL_KERNEL_LOOP
616 for (; x < dw - 2*rx - 4; x += 4) {
617 SkASSERT(prev_x >= 0);
618 SkASSERT(next_x <= sw);
619
620 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
621 *dst++ = SkToU8(tmp * scale >> 24);
622 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
623 *dst++ = SkToU8(tmp * scale >> 24);
624 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
625 *dst++ = SkToU8(tmp * scale >> 24);
626 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
627 *dst++ = SkToU8(tmp * scale >> 24);
628
629 prev_x += 4;
630 next_x += 4;
631 }
632#endif
633
tomhudson@google.com8caac642011-11-22 15:58:06 +0000634 for (; x < dw - 2*rx; x++) {
635 SkASSERT(prev_x >= 0);
636 SkASSERT(next_x <= sw);
637
tomhudson@google.com01224d52011-11-28 18:22:01 +0000638 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000639 *dst++ = SkToU8(tmp * scale >> 24);
640
641 prev_x += 1;
642 next_x += 1;
643 }
644
645 for (; x < dw; x++) {
646 SkASSERT(prev_x >= 0);
647 SkASSERT(next_x > sw);
648
649 int px = prev_x;
650 int nx = sw;
651
652 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
653 *dst++ = SkToU8(tmp * scale >> 24);
654
655 prev_x += 1;
656 next_x += 1;
657 }
658
reed@android.com8a1c16f2008-12-17 15:59:43 +0000659 prev_y += 1;
660 next_y += 1;
661 }
662}
663
reed@google.com03016a32011-08-12 14:59:59 +0000664/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000665 * This is the path for apply_kernel_interp() to be taken when the kernel
666 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000667 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000668static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
reed@android.com45607672009-09-21 00:27:08 +0000669 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000670 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000671
672 int inner_weight = 255 - outer_weight;
673
674 // round these guys up if they're bigger than 127
675 outer_weight += outer_weight >> 7;
676 inner_weight += inner_weight >> 7;
677
678 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
679 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
680
reed@android.com45607672009-09-21 00:27:08 +0000681 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000682
683 int dw = sw + 2*rx;
684 int dh = sh + 2*ry;
685
reed@android.com45607672009-09-21 00:27:08 +0000686 int prev_y = -2*ry;
687 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000688
reed@android.com45607672009-09-21 00:27:08 +0000689 for (int y = 0; y < dh; y++) {
690 int py = SkClampPos(prev_y) * sumStride;
691 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000692
reed@android.com45607672009-09-21 00:27:08 +0000693 int ipy = SkClampPos(prev_y + 1) * sumStride;
694 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000695
reed@android.com45607672009-09-21 00:27:08 +0000696 int prev_x = -2*rx;
697 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000698
reed@android.com45607672009-09-21 00:27:08 +0000699 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000700 int px = SkClampPos(prev_x);
701 int nx = SkFastMin32(next_x, sw);
702
703 int ipx = SkClampPos(prev_x + 1);
704 int inx = SkClampMax(next_x - 1, sw);
705
tomhudson@google.com8caac642011-11-22 15:58:06 +0000706 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
707 - sum[nx+py] - sum[px+ny];
708 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
709 - sum[inx+ipy] - sum[ipx+iny];
710 *dst++ = SkToU8((outer_sum * outer_scale
711 + inner_sum * inner_scale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000712
713 prev_x += 1;
714 next_x += 1;
715 }
716 prev_y += 1;
717 next_y += 1;
718 }
719}
720
tomhudson@google.com8caac642011-11-22 15:58:06 +0000721/**
722 * sw and sh are the width and height of the src. Since the sum buffer
723 * matches that, but has an extra row and col at the beginning (with zeros),
724 * we can just use sw and sh as our "max" values for pinning coordinates
725 * when sampling into sum[][]
726 *
727 * The inner loop is conceptually simple; we break it into several variants
728 * to improve performance. Here's the original version:
729 for (int x = 0; x < dw; x++) {
730 int px = SkClampPos(prev_x);
731 int nx = SkFastMin32(next_x, sw);
732
733 int ipx = SkClampPos(prev_x + 1);
734 int inx = SkClampMax(next_x - 1, sw);
735
736 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
737 - sum[nx+py] - sum[px+ny];
738 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
739 - sum[inx+ipy] - sum[ipx+iny];
740 *dst++ = SkToU8((outer_sum * outer_scale
741 + inner_sum * inner_scale) >> 24);
742
743 prev_x += 1;
744 next_x += 1;
745 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000746 * The sections are:
747 * left-hand section, where prev_x is clamped to 0
748 * center section, where neither prev_x nor next_x is clamped
749 * right-hand section, where next_x is clamped to sw
750 * On some operating systems, the center section is unrolled for additional
751 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000752*/
753static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
754 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
755 SkASSERT(rx > 0 && ry > 0);
756 SkASSERT(outer_weight <= 255);
757
758 if (2*rx > sw) {
759 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
760 return;
761 }
762
763 int inner_weight = 255 - outer_weight;
764
765 // round these guys up if they're bigger than 127
766 outer_weight += outer_weight >> 7;
767 inner_weight += inner_weight >> 7;
768
769 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
770 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
771
772 int sumStride = sw + 1;
773
774 int dw = sw + 2*rx;
775 int dh = sh + 2*ry;
776
777 int prev_y = -2*ry;
778 int next_y = 1;
779
780 SkASSERT(2*rx <= dw - 2*rx);
781
782 for (int y = 0; y < dh; y++) {
783 int py = SkClampPos(prev_y) * sumStride;
784 int ny = SkFastMin32(next_y, sh) * sumStride;
785
786 int ipy = SkClampPos(prev_y + 1) * sumStride;
787 int iny = SkClampMax(next_y - 1, sh) * sumStride;
788
789 int prev_x = -2*rx;
790 int next_x = 1;
791 int x = 0;
792
793 for (; x < 2*rx; x++) {
794 SkASSERT(prev_x < 0);
795 SkASSERT(next_x <= sw);
796
797 int px = 0;
798 int nx = next_x;
799
800 int ipx = 0;
801 int inx = next_x - 1;
802
803 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
804 - sum[nx+py] - sum[px+ny];
805 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
806 - sum[inx+ipy] - sum[ipx+iny];
807 *dst++ = SkToU8((outer_sum * outer_scale
808 + inner_sum * inner_scale) >> 24);
809
810 prev_x += 1;
811 next_x += 1;
812 }
813
tomhudson@google.com01224d52011-11-28 18:22:01 +0000814 int i0 = prev_x + py;
815 int i1 = next_x + ny;
816 int i2 = next_x + py;
817 int i3 = prev_x + ny;
818 int i4 = prev_x + 1 + ipy;
819 int i5 = next_x - 1 + iny;
820 int i6 = next_x - 1 + ipy;
821 int i7 = prev_x + 1 + iny;
822
823#if UNROLL_KERNEL_LOOP
824 for (; x < dw - 2*rx - 4; x += 4) {
825 SkASSERT(prev_x >= 0);
826 SkASSERT(next_x <= sw);
827
828 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
829 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
830 *dst++ = SkToU8((outer_sum * outer_scale
831 + inner_sum * inner_scale) >> 24);
832 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
833 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
834 *dst++ = SkToU8((outer_sum * outer_scale
835 + inner_sum * inner_scale) >> 24);
836 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
837 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
838 *dst++ = SkToU8((outer_sum * outer_scale
839 + inner_sum * inner_scale) >> 24);
840 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
841 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
842 *dst++ = SkToU8((outer_sum * outer_scale
843 + inner_sum * inner_scale) >> 24);
844
845 prev_x += 4;
846 next_x += 4;
847 }
848#endif
849
tomhudson@google.com8caac642011-11-22 15:58:06 +0000850 for (; x < dw - 2*rx; x++) {
851 SkASSERT(prev_x >= 0);
852 SkASSERT(next_x <= sw);
853
tomhudson@google.com01224d52011-11-28 18:22:01 +0000854 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
855 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000856 *dst++ = SkToU8((outer_sum * outer_scale
857 + inner_sum * inner_scale) >> 24);
858
859 prev_x += 1;
860 next_x += 1;
861 }
862
863 for (; x < dw; x++) {
864 SkASSERT(prev_x >= 0);
865 SkASSERT(next_x > sw);
866
867 int px = prev_x;
868 int nx = sw;
869
870 int ipx = prev_x + 1;
871 int inx = sw;
872
873 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
874 - sum[nx+py] - sum[px+ny];
875 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
876 - sum[inx+ipy] - sum[ipx+iny];
877 *dst++ = SkToU8((outer_sum * outer_scale
878 + inner_sum * inner_scale) >> 24);
879
880 prev_x += 1;
881 next_x += 1;
882 }
883
884 prev_y += 1;
885 next_y += 1;
886 }
887}
888
reed@android.com8a1c16f2008-12-17 15:59:43 +0000889#include "SkColorPriv.h"
890
reed@android.com0e3c6642009-09-18 13:41:56 +0000891static void merge_src_with_blur(uint8_t dst[], int dstRB,
892 const uint8_t src[], int srcRB,
893 const uint8_t blur[], int blurRB,
894 int sw, int sh) {
895 dstRB -= sw;
896 srcRB -= sw;
897 blurRB -= sw;
898 while (--sh >= 0) {
899 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000900 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
901 dst += 1;
902 src += 1;
903 blur += 1;
904 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000905 dst += dstRB;
906 src += srcRB;
907 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000908 }
909}
910
911static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000912 const uint8_t src[], int srcRowBytes,
913 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000914 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000915 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000916 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000917 switch (style) {
918 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000919 for (x = sw - 1; x >= 0; --x) {
920 int s = *src;
921 int d = *dst;
922 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000923 dst += 1;
924 src += 1;
925 }
926 break;
927 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000928 for (x = sw - 1; x >= 0; --x) {
929 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000930 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000931 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000932 dst += 1;
933 src += 1;
934 }
935 break;
936 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000937 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000938 break;
939 }
940 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000941 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000942 }
943}
944
reed@google.com03016a32011-08-12 14:59:59 +0000945///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000946
bsalomon@google.com33cdbde2013-01-11 20:54:44 +0000947// we use a local function to wrap the class static method to work around
reed@android.com8a1c16f2008-12-17 15:59:43 +0000948// a bug in gcc98
949void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000950void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000951 SkMask::FreeImage(image);
952}
953
954bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000955 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000956 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000957{
reed@google.com03016a32011-08-12 14:59:59 +0000958 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000959 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000960 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000961
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000962 // Force high quality off for small radii (performance)
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000963 if (radius < SkIntToScalar(3)) {
964 quality = kLow_Quality;
965 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000966
967 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000968 int passCount = (kHigh_Quality == quality) ? 3 : 1;
humper@google.com7c7292c2013-01-04 20:29:03 +0000969 SkScalar passRadius = (kHigh_Quality == quality) ? SkScalarMul( radius, kBlurRadiusFudgeFactor): radius;
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000970
971 int rx = SkScalarCeil(passRadius);
972 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000973
974 SkASSERT(rx >= 0);
975 SkASSERT((unsigned)outer_weight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000976 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000977 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000978 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000979
980 int ry = rx; // only do square blur for now
981
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000982 int padx = passCount * rx;
983 int pady = passCount * ry;
bungeman@google.com5af16f82011-09-02 15:06:44 +0000984 if (margin) {
985 margin->set(padx, pady);
986 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000987 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
988 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
reed@android.com49f0ff22009-03-19 21:52:42 +0000989 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000990 dst->fFormat = SkMask::kA8_Format;
991 dst->fImage = NULL;
992
reed@android.com0e3c6642009-09-18 13:41:56 +0000993 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +0000994 size_t dstSize = dst->computeImageSize();
995 if (0 == dstSize) {
996 return false; // too big to allocate, abort
997 }
998
reed@android.com8a1c16f2008-12-17 15:59:43 +0000999 int sw = src.fBounds.width();
1000 int sh = src.fBounds.height();
1001 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +00001002 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001003
1004 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
1005
1006 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001007 if (separable) {
1008 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1009 uint8_t* tp = tmpBuffer.get();
1010 int w = sw, h = sh;
1011
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001012 if (outer_weight == 255) {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +00001013 int loRadius, hiRadius;
1014 get_adjusted_radii(passRadius, &loRadius, &hiRadius);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001015 if (kHigh_Quality == quality) {
1016 // Do three X blurs, with a transpose on the final one.
1017 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
1018 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
1019 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
1020 // Do three Y blurs, with a transpose on the final one.
1021 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
1022 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
1023 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
1024 } else {
1025 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
1026 h = boxBlur(tp, h, dp, ry, ry, h, w, true);
1027 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +00001028 } else {
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001029 if (kHigh_Quality == quality) {
1030 // Do three X blurs, with a transpose on the final one.
1031 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outer_weight);
1032 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outer_weight);
1033 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outer_weight);
1034 // Do three Y blurs, with a transpose on the final one.
1035 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outer_weight);
1036 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outer_weight);
1037 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);
1038 } else {
1039 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outer_weight);
1040 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);
1041 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001042 }
1043 } else {
reed@google.com03016a32011-08-12 14:59:59 +00001044 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
1045 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
1046 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001047 uint32_t* sumBuffer = storage.get();
1048
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001049 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +00001050 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
reed@google.com03016a32011-08-12 14:59:59 +00001051 if (outer_weight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001052 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +00001053 } else {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001054 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
reed@google.com03016a32011-08-12 14:59:59 +00001055 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001056
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001057 if (kHigh_Quality == quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001058 //pass2: dp is source, tmpBuffer is destination
1059 int tmp_sw = sw + 2 * rx;
1060 int tmp_sh = sh + 2 * ry;
1061 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1062 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
1063 if (outer_weight == 255)
1064 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
1065 else
reed@google.com03016a32011-08-12 14:59:59 +00001066 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
1067 tmp_sw, tmp_sh, outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001068
1069 //pass3: tmpBuffer is source, dp is destination
1070 tmp_sw += 2 * rx;
1071 tmp_sh += 2 * ry;
1072 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
1073 if (outer_weight == 255)
1074 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
1075 else
reed@google.com03016a32011-08-12 14:59:59 +00001076 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
1077 outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001078 }
reed@android.com8a1c16f2008-12-17 15:59:43 +00001079 }
1080
1081 dst->fImage = dp;
1082 // if need be, alloc the "real" dst (same size as src) and copy/merge
1083 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +00001084 if (style == kInner_Style) {
1085 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +00001086 size_t srcSize = src.computeImageSize();
1087 if (0 == srcSize) {
1088 return false; // too big to allocate, abort
1089 }
1090 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +00001091 merge_src_with_blur(dst->fImage, src.fRowBytes,
1092 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +00001093 dp + passCount * (rx + ry * dst->fRowBytes),
1094 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001095 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +00001096 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +00001097 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
1098 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001099 }
1100 (void)autoCall.detach();
1101 }
1102
reed@android.com0e3c6642009-09-18 13:41:56 +00001103 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001104 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +00001105 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +00001106 }
1107
reed@android.com8a1c16f2008-12-17 15:59:43 +00001108 return true;
1109}
1110
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001111bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
1112 SkScalar radius, Style style, Quality quality,
1113 SkIPoint* margin)
1114{
1115 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
1116}
1117
1118bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
1119 SkScalar radius, Style style, Quality quality,
1120 SkIPoint* margin)
1121{
1122 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
1123}
humper@google.com7c7292c2013-01-04 20:29:03 +00001124
1125/* Convolving a box with itself three times results in a piecewise
1126 quadratic function:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001127
humper@google.com7c7292c2013-01-04 20:29:03 +00001128 0 x <= -1.5
1129 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= 1.5
1130 3/4 - x^2 -.5 < x <= .5
1131 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5
1132 0 1.5 < x
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001133
humper@google.com7c7292c2013-01-04 20:29:03 +00001134 To get the profile curve of the blurred step function at the rectangle
1135 edge, we evaluate the indefinite integral, which is piecewise cubic:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001136
humper@google.com7c7292c2013-01-04 20:29:03 +00001137 0 x <= -1.5
1138 5/8 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5
1139 1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5
1140 3/8 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5
1141 1 1.5 < x
1142*/
1143
1144static float gaussian_integral( float x ) {
1145 if ( x > 1.5f ) {
1146 return 0.0f;
1147 }
1148 if ( x < -1.5f ) {
1149 return 1.0f;
1150 }
1151
1152 float x2 = x*x;
1153 float x3 = x2*x;
1154
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001155 if ( x > 0.5f ) {
1156 return 0.5625f - ( x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001157 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001158 if ( x > -0.5f ) {
1159 return 0.5f - (0.75f * x - x3 / 3.0f);
humper@google.com7c7292c2013-01-04 20:29:03 +00001160 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001161 return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001162}
1163
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001164/*
humper@google.com7c7292c2013-01-04 20:29:03 +00001165 compute_profile allocates and fills in an array of floating
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001166 point values between 0 and 255 for the profile signature of
humper@google.com7c7292c2013-01-04 20:29:03 +00001167 a blurred half-plane with the given blur radius. Since we're
1168 going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
1169 all the time, we actually fill in the profile pre-inverted
1170 (already done 255-x).
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001171
humper@google.com7c7292c2013-01-04 20:29:03 +00001172 The function returns the size of the array allocated for the
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001173 profile. It's the responsibility of the caller to delete the
humper@google.com7c7292c2013-01-04 20:29:03 +00001174 memory returned in profile_out.
1175*/
1176
1177static int compute_profile( SkScalar radius, unsigned int **profile_out ) {
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001178 int size = SkScalarFloorToInt(radius * 3 + 1);
humper@google.com7c7292c2013-01-04 20:29:03 +00001179 int center = size >> 1;
1180
bsalomon@google.com33cdbde2013-01-11 20:54:44 +00001181 unsigned int *profile = SkNEW_ARRAY(unsigned int, size);
humper@google.com7c7292c2013-01-04 20:29:03 +00001182
1183 float invr = 1.0f/radius;
1184
1185 profile[0] = 255;
1186 for (int x = 1 ; x < size ; x++) {
1187 float scaled_x = ( center - x ) * invr;
1188 float gi = gaussian_integral( scaled_x );
1189 profile[x] = 255 - (uint8_t) ( 255.f * gi );
1190 }
1191
1192 *profile_out = profile;
1193 return size;
1194}
1195
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001196// TODO MAYBE: Maintain a profile cache to avoid recomputing this for
humper@google.com7c7292c2013-01-04 20:29:03 +00001197// commonly used radii. Consider baking some of the most common blur radii
1198// directly in as static data?
1199
1200// Implementation adapted from Michael Herf's approach:
1201// http://stereopsis.com/shadowrect/
1202
1203bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src,
1204 SkScalar provided_radius, Style style, Quality quality,
1205 SkIPoint *margin) {
1206 int profile_size;
1207 unsigned int *profile;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001208
1209
humper@google.com7c7292c2013-01-04 20:29:03 +00001210 float radius = SkScalarToFloat( SkScalarMul( provided_radius, kBlurRadiusFudgeFactor ) );
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001211
humper@google.com7c7292c2013-01-04 20:29:03 +00001212 profile_size = compute_profile( radius, &profile );
bsalomon@google.com33cdbde2013-01-11 20:54:44 +00001213 SkAutoTDeleteArray<unsigned int> ada(profile);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001214
humper@google.com7c7292c2013-01-04 20:29:03 +00001215 int pad = (int) (radius * 1.5f + 1);
1216 if (margin) {
1217 margin->set( pad, pad );
1218 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001219 dst->fBounds = SkIRect::MakeWH(SkScalarFloorToInt(src.width()), SkScalarFloorToInt(src.height()));
humper@google.com7c7292c2013-01-04 20:29:03 +00001220 dst->fBounds.outset(pad, pad);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001221
humper@google.com7c7292c2013-01-04 20:29:03 +00001222 dst->fRowBytes = dst->fBounds.width();
1223 dst->fFormat = SkMask::kA8_Format;
1224 dst->fImage = NULL;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001225
humper@google.com7c7292c2013-01-04 20:29:03 +00001226 size_t dstSize = dst->computeImageSize();
1227 if (0 == dstSize) {
1228 return false; // too big to allocate, abort
1229 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001230
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001231 int sw = SkScalarFloorToInt(src.width());
1232 int sh = SkScalarFloorToInt(src.height());
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001233
humper@google.com7c7292c2013-01-04 20:29:03 +00001234 uint8_t* dp = SkMask::AllocImage(dstSize);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001235
humper@google.com7c7292c2013-01-04 20:29:03 +00001236 dst->fImage = dp;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001237
humper@google.com7c7292c2013-01-04 20:29:03 +00001238 int dst_height = dst->fBounds.height();
1239 int dst_width = dst->fBounds.width();
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001240
humper@google.com7c7292c2013-01-04 20:29:03 +00001241 // nearest odd number less than the profile size represents the center
1242 // of the (2x scaled) profile
1243 int center = ( profile_size & ~1 ) - 1;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001244
humper@google.com7c7292c2013-01-04 20:29:03 +00001245 int w = sw - center;
1246 int h = sh - center;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001247
humper@google.com7c7292c2013-01-04 20:29:03 +00001248 uint8_t *outptr = dp;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001249
humper@google.com7c7292c2013-01-04 20:29:03 +00001250 for (int y = 0 ; y < dst_height ; y++)
1251 {
1252 // time to fill in a scanline of the blurry rectangle.
1253 // to avoid floating point math, everything is multiplied by
1254 // 2 where needed. This keeps things nice and integer-oriented.
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001255
humper@google.com7c7292c2013-01-04 20:29:03 +00001256 int dy = abs((y << 1) - dst_height) - h; // how far are we from the original edge?
1257 int oy = dy >> 1;
1258 if (oy < 0) oy = 0;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001259
humper@google.com7c7292c2013-01-04 20:29:03 +00001260 unsigned int profile_y = profile[oy];
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001261
humper@google.com7c7292c2013-01-04 20:29:03 +00001262 for (int x = 0 ; x < (dst_width << 1) ; x += 2) {
1263 int dx = abs( x - dst_width ) - w;
1264 int ox = dx >> 1;
1265 if (ox < 0) ox = 0;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001266
humper@google.com7c7292c2013-01-04 20:29:03 +00001267 unsigned int maskval = SkMulDiv255Round(profile[ox], profile_y);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001268
humper@google.com7c7292c2013-01-04 20:29:03 +00001269 *(outptr++) = maskval;
1270 }
1271 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001272
humper@google.com7c7292c2013-01-04 20:29:03 +00001273 return true;
1274}