blob: 97b5ab3ee962943c095412f895fcf2b4f37552db [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000015// scale factor for the blur radius to match the behavior of the all existing blur
humper@google.com7c7292c2013-01-04 20:29:03 +000016// code (both on the CPU and the GPU). This magic constant is 1/sqrt(3).
17
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000018// TODO: get rid of this fudge factor and move any required fudging up into
humper@google.com7c7292c2013-01-04 20:29:03 +000019// the calling library
20
21#define kBlurRadiusFudgeFactor SkFloatToScalar( .57735f )
22
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000023#define UNROLL_SEPARABLE_LOOPS
24
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000025/**
26 * This function performs a box blur in X, of the given radius. If the
skia.committer@gmail.com884e60b2012-11-16 02:01:17 +000027 * "transpose" parameter is true, it will transpose the pixels on write,
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000028 * such that X and Y are swapped. Reads are always performed from contiguous
29 * memory in X, for speed. The destination buffer (dst) must be at least
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000030 * (width + leftRadius + rightRadius) * height bytes in size.
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000031 *
32 * This is what the inner loop looks like before unrolling, and with the two
33 * cases broken out separately (width < diameter, width >= diameter):
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +000034 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000035 * if (width < diameter) {
36 * for (int x = 0; x < width; ++x) {
37 * sum += *right++;
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +000038 * *dptr = (sum * scale + half) >> 24;
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000039 * dptr += dst_x_stride;
40 * }
41 * for (int x = width; x < diameter; ++x) {
42 * *dptr = (sum * scale + half) >> 24;
43 * dptr += dst_x_stride;
44 * }
45 * for (int x = 0; x < width; ++x) {
46 * *dptr = (sum * scale + half) >> 24;
47 * sum -= *left++;
48 * dptr += dst_x_stride;
49 * }
50 * } else {
51 * for (int x = 0; x < diameter; ++x) {
52 * sum += *right++;
53 * *dptr = (sum * scale + half) >> 24;
54 * dptr += dst_x_stride;
55 * }
56 * for (int x = diameter; x < width; ++x) {
57 * sum += *right++;
58 * *dptr = (sum * scale + half) >> 24;
59 * sum -= *left++;
60 * dptr += dst_x_stride;
61 * }
62 * for (int x = 0; x < diameter; ++x) {
63 * *dptr = (sum * scale + half) >> 24;
64 * sum -= *left++;
65 * dptr += dst_x_stride;
66 * }
67 * }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000068 */
69static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000070 int leftRadius, int rightRadius, int width, int height,
71 bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000072{
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000073 int diameter = leftRadius + rightRadius;
74 int kernelSize = diameter + 1;
75 int border = SkMin32(width, diameter);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000076 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000077 int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000078 int dst_x_stride = transpose ? height : 1;
79 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000080#ifndef SK_DISABLE_BLUR_ROUNDING
81 uint32_t half = 1 << 23;
82#else
83 uint32_t half = 0;
84#endif
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000085 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000086 uint32_t sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000087 uint8_t* dptr = dst + y * dst_y_stride;
88 const uint8_t* right = src + y * src_y_stride;
89 const uint8_t* left = right;
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +000090 for (int x = 0; x < rightRadius - leftRadius; x++) {
91 *dptr = 0;
92 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000093 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000094#define LEFT_BORDER_ITER \
95 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000096 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000097 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000098
99 int x = 0;
100#ifdef UNROLL_SEPARABLE_LOOPS
101 for (; x < border - 16; x += 16) {
102 LEFT_BORDER_ITER
103 LEFT_BORDER_ITER
104 LEFT_BORDER_ITER
105 LEFT_BORDER_ITER
106 LEFT_BORDER_ITER
107 LEFT_BORDER_ITER
108 LEFT_BORDER_ITER
109 LEFT_BORDER_ITER
110 LEFT_BORDER_ITER
111 LEFT_BORDER_ITER
112 LEFT_BORDER_ITER
113 LEFT_BORDER_ITER
114 LEFT_BORDER_ITER
115 LEFT_BORDER_ITER
116 LEFT_BORDER_ITER
117 LEFT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000118 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000119#endif
120 for (; x < border; ++x) {
121 LEFT_BORDER_ITER
122 }
123#undef LEFT_BORDER_ITER
124#define TRIVIAL_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000125 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000126 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000127 x = width;
128#ifdef UNROLL_SEPARABLE_LOOPS
129 for (; x < diameter - 16; x += 16) {
130 TRIVIAL_ITER
131 TRIVIAL_ITER
132 TRIVIAL_ITER
133 TRIVIAL_ITER
134 TRIVIAL_ITER
135 TRIVIAL_ITER
136 TRIVIAL_ITER
137 TRIVIAL_ITER
138 TRIVIAL_ITER
139 TRIVIAL_ITER
140 TRIVIAL_ITER
141 TRIVIAL_ITER
142 TRIVIAL_ITER
143 TRIVIAL_ITER
144 TRIVIAL_ITER
145 TRIVIAL_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000146 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000147#endif
148 for (; x < diameter; ++x) {
149 TRIVIAL_ITER
150 }
151#undef TRIVIAL_ITER
152#define CENTER_ITER \
153 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000154 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000155 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000156 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000157
158 x = diameter;
159#ifdef UNROLL_SEPARABLE_LOOPS
160 for (; x < width - 16; x += 16) {
161 CENTER_ITER
162 CENTER_ITER
163 CENTER_ITER
164 CENTER_ITER
165 CENTER_ITER
166 CENTER_ITER
167 CENTER_ITER
168 CENTER_ITER
169 CENTER_ITER
170 CENTER_ITER
171 CENTER_ITER
172 CENTER_ITER
173 CENTER_ITER
174 CENTER_ITER
175 CENTER_ITER
176 CENTER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000177 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000178#endif
179 for (; x < width; ++x) {
180 CENTER_ITER
181 }
182#undef CENTER_ITER
183#define RIGHT_BORDER_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000184 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000185 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000186 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000187
188 x = 0;
189#ifdef UNROLL_SEPARABLE_LOOPS
190 for (; x < border - 16; x += 16) {
191 RIGHT_BORDER_ITER
192 RIGHT_BORDER_ITER
193 RIGHT_BORDER_ITER
194 RIGHT_BORDER_ITER
195 RIGHT_BORDER_ITER
196 RIGHT_BORDER_ITER
197 RIGHT_BORDER_ITER
198 RIGHT_BORDER_ITER
199 RIGHT_BORDER_ITER
200 RIGHT_BORDER_ITER
201 RIGHT_BORDER_ITER
202 RIGHT_BORDER_ITER
203 RIGHT_BORDER_ITER
204 RIGHT_BORDER_ITER
205 RIGHT_BORDER_ITER
206 RIGHT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000207 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000208#endif
209 for (; x < border; ++x) {
210 RIGHT_BORDER_ITER
211 }
212#undef RIGHT_BORDER_ITER
humper@google.coma99a92c2013-02-20 16:42:06 +0000213 for (int x = 0; x < leftRadius - rightRadius; ++x) {
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +0000214 *dptr = 0;
215 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000216 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000217 SkASSERT(sum == 0);
218 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000219 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000220}
221
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000222/**
223 * This variant of the box blur handles blurring of non-integer radii. It
224 * keeps two running sums: an outer sum for the rounded-up kernel radius, and
225 * an inner sum for the rounded-down kernel radius. For each pixel, it linearly
226 * interpolates between them. In float this would be:
227 * outer_weight * outer_sum / kernelSize +
228 * (1.0 - outer_weight) * innerSum / (kernelSize - 2)
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +0000229 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000230 * This is what the inner loop looks like before unrolling, and with the two
231 * cases broken out separately (width < diameter, width >= diameter):
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +0000232 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000233 * if (width < diameter) {
234 * for (int x = 0; x < width; x++) {
235 * inner_sum = outer_sum;
236 * outer_sum += *right++;
237 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
238 * dptr += dst_x_stride;
239 * }
240 * for (int x = width; x < diameter; ++x) {
241 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
242 * dptr += dst_x_stride;
243 * }
244 * for (int x = 0; x < width; x++) {
245 * inner_sum = outer_sum - *left++;
246 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
247 * dptr += dst_x_stride;
248 * outer_sum = inner_sum;
249 * }
250 * } else {
251 * for (int x = 0; x < diameter; x++) {
252 * inner_sum = outer_sum;
253 * outer_sum += *right++;
254 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
255 * dptr += dst_x_stride;
256 * }
257 * for (int x = diameter; x < width; ++x) {
258 * inner_sum = outer_sum - *left;
259 * outer_sum += *right++;
260 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
261 * dptr += dst_x_stride;
262 * outer_sum -= *left++;
263 * }
264 * for (int x = 0; x < diameter; x++) {
265 * inner_sum = outer_sum - *left++;
266 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
267 * dptr += dst_x_stride;
268 * outer_sum = inner_sum;
269 * }
270 * }
271 * }
272 * return new_width;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000273 */
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000274
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000275static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
276 int radius, int width, int height,
277 bool transpose, uint8_t outer_weight)
278{
279 int diameter = radius * 2;
280 int kernelSize = diameter + 1;
281 int border = SkMin32(width, diameter);
282 int inner_weight = 255 - outer_weight;
283 outer_weight += outer_weight >> 7;
284 inner_weight += inner_weight >> 7;
285 uint32_t outer_scale = (outer_weight << 16) / kernelSize;
286 uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000287#ifndef SK_DISABLE_BLUR_ROUNDING
288 uint32_t half = 1 << 23;
289#else
290 uint32_t half = 0;
291#endif
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000292 int new_width = width + diameter;
293 int dst_x_stride = transpose ? height : 1;
294 int dst_y_stride = transpose ? 1 : new_width;
295 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000296 uint32_t outer_sum = 0, inner_sum = 0;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000297 uint8_t* dptr = dst + y * dst_y_stride;
298 const uint8_t* right = src + y * src_y_stride;
299 const uint8_t* left = right;
300 int x = 0;
301
302#define LEFT_BORDER_ITER \
303 inner_sum = outer_sum; \
304 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000305 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000306 dptr += dst_x_stride;
307
308#ifdef UNROLL_SEPARABLE_LOOPS
309 for (;x < border - 16; x += 16) {
310 LEFT_BORDER_ITER
311 LEFT_BORDER_ITER
312 LEFT_BORDER_ITER
313 LEFT_BORDER_ITER
314 LEFT_BORDER_ITER
315 LEFT_BORDER_ITER
316 LEFT_BORDER_ITER
317 LEFT_BORDER_ITER
318 LEFT_BORDER_ITER
319 LEFT_BORDER_ITER
320 LEFT_BORDER_ITER
321 LEFT_BORDER_ITER
322 LEFT_BORDER_ITER
323 LEFT_BORDER_ITER
324 LEFT_BORDER_ITER
325 LEFT_BORDER_ITER
326 }
327#endif
328
humper@google.coma99a92c2013-02-20 16:42:06 +0000329 for (;x < border; ++x) {
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000330 LEFT_BORDER_ITER
331 }
332#undef LEFT_BORDER_ITER
333 for (int x = width; x < diameter; ++x) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000334 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000335 dptr += dst_x_stride;
336 }
337 x = diameter;
338
339#define CENTER_ITER \
340 inner_sum = outer_sum - *left; \
341 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000342 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000343 dptr += dst_x_stride; \
344 outer_sum -= *left++;
345
346#ifdef UNROLL_SEPARABLE_LOOPS
347 for (; x < width - 16; x += 16) {
348 CENTER_ITER
349 CENTER_ITER
350 CENTER_ITER
351 CENTER_ITER
352 CENTER_ITER
353 CENTER_ITER
354 CENTER_ITER
355 CENTER_ITER
356 CENTER_ITER
357 CENTER_ITER
358 CENTER_ITER
359 CENTER_ITER
360 CENTER_ITER
361 CENTER_ITER
362 CENTER_ITER
363 CENTER_ITER
364 }
365#endif
366 for (; x < width; ++x) {
367 CENTER_ITER
368 }
369#undef CENTER_ITER
370
371 #define RIGHT_BORDER_ITER \
372 inner_sum = outer_sum - *left++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000373 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000374 dptr += dst_x_stride; \
375 outer_sum = inner_sum;
376
377 x = 0;
378#ifdef UNROLL_SEPARABLE_LOOPS
379 for (; x < border - 16; x += 16) {
380 RIGHT_BORDER_ITER
381 RIGHT_BORDER_ITER
382 RIGHT_BORDER_ITER
383 RIGHT_BORDER_ITER
384 RIGHT_BORDER_ITER
385 RIGHT_BORDER_ITER
386 RIGHT_BORDER_ITER
387 RIGHT_BORDER_ITER
388 RIGHT_BORDER_ITER
389 RIGHT_BORDER_ITER
390 RIGHT_BORDER_ITER
391 RIGHT_BORDER_ITER
392 RIGHT_BORDER_ITER
393 RIGHT_BORDER_ITER
394 RIGHT_BORDER_ITER
395 RIGHT_BORDER_ITER
396 }
397#endif
humper@google.coma99a92c2013-02-20 16:42:06 +0000398 for (; x < border; ++x) {
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000399 RIGHT_BORDER_ITER
400 }
401#undef RIGHT_BORDER_ITER
402 SkASSERT(outer_sum == 0 && inner_sum == 0);
403 }
404 return new_width;
405}
406
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000407static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
408{
409 *loRadius = *hiRadius = SkScalarCeil(passRadius);
410 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
411 *loRadius = *hiRadius - 1;
412 }
413}
414
tomhudson@google.com01224d52011-11-28 18:22:01 +0000415// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
416// breakeven on Mac, and ~15% slowdown on Linux.
417// Reading a word at a time when bulding the sum buffer seems to give
418// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +0000419#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +0000420#define UNROLL_KERNEL_LOOP 1
421#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +0000422
reed@android.com45607672009-09-21 00:27:08 +0000423/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
424 src values at their position, plus all values above and to the left.
425 When we sample into this buffer, we need an initial row and column of 0s,
426 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000427
reed@android.com45607672009-09-21 00:27:08 +0000428 src[i, j] == sum[i+1, j+1]
429 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000430
reed@android.com45607672009-09-21 00:27:08 +0000431 We assume that the sum buffer's stride == its width
432 */
reed@google.com03016a32011-08-12 14:59:59 +0000433static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
434 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +0000435 int sumW = srcW + 1;
436
437 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000438 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000439 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000440
441 int x, y;
442
reed@android.com45607672009-09-21 00:27:08 +0000443 // zero out the top row and column
444 memset(sum, 0, sumW * sizeof(sum[0]));
445 sum += sumW;
446
reed@android.com8a1c16f2008-12-17 15:59:43 +0000447 // special case first row
448 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000449 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000450 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000451 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000452 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000453 }
454 src += srcRB;
455
456 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000457 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000458 uint32_t L = 0;
459 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000460 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000461
462 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
463 uint32_t T = sum[-sumW];
464 X = *src++ + L + T - C;
465 *sum++ = X;
466 L = X;
467 C = T;
468 }
469
470 for (; x >= 4; x-=4) {
471 uint32_t T = sum[-sumW];
472 X = *src++ + L + T - C;
473 *sum++ = X;
474 L = X;
475 C = T;
476 T = sum[-sumW];
477 X = *src++ + L + T - C;
478 *sum++ = X;
479 L = X;
480 C = T;
481 T = sum[-sumW];
482 X = *src++ + L + T - C;
483 *sum++ = X;
484 L = X;
485 C = T;
486 T = sum[-sumW];
487 X = *src++ + L + T - C;
488 *sum++ = X;
489 L = X;
490 C = T;
491 }
492
493 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000494 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000495 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000496 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000497 L = X;
498 C = T;
499 }
500 src += srcRB;
501 }
502}
503
reed@google.com03016a32011-08-12 14:59:59 +0000504/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000505 * This is the path for apply_kernel() to be taken when the kernel
506 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000507 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000508static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
509 int sw, int sh) {
510 SkASSERT(2*rx > sw);
511
reed@android.com8a1c16f2008-12-17 15:59:43 +0000512 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
513
reed@android.com45607672009-09-21 00:27:08 +0000514 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000515
516 int dw = sw + 2*rx;
517 int dh = sh + 2*ry;
518
reed@android.com45607672009-09-21 00:27:08 +0000519 int prev_y = -2*ry;
520 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000521
humper@google.coma99a92c2013-02-20 16:42:06 +0000522 for (int y = 0; y < dh; ++y) {
reed@android.com45607672009-09-21 00:27:08 +0000523 int py = SkClampPos(prev_y) * sumStride;
524 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000525
reed@android.com45607672009-09-21 00:27:08 +0000526 int prev_x = -2*rx;
527 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000528
humper@google.coma99a92c2013-02-20 16:42:06 +0000529 for (int x = 0; x < dw; ++x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000530 int px = SkClampPos(prev_x);
531 int nx = SkFastMin32(next_x, sw);
532
humper@google.coma99a92c2013-02-20 16:42:06 +0000533 // TODO: should we be adding 1/2 (1 << 23) to round to the
534 // nearest integer here?
reed@android.com45607672009-09-21 00:27:08 +0000535 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
536 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000537
538 prev_x += 1;
539 next_x += 1;
540 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000541
542 prev_y += 1;
543 next_y += 1;
544 }
545}
546/**
547 * sw and sh are the width and height of the src. Since the sum buffer
548 * matches that, but has an extra row and col at the beginning (with zeros),
549 * we can just use sw and sh as our "max" values for pinning coordinates
550 * when sampling into sum[][]
551 *
552 * The inner loop is conceptually simple; we break it into several sections
553 * to improve performance. Here's the original version:
humper@google.coma99a92c2013-02-20 16:42:06 +0000554 for (int x = 0; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000555 int px = SkClampPos(prev_x);
556 int nx = SkFastMin32(next_x, sw);
557
558 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
559 *dst++ = SkToU8(tmp * scale >> 24);
560
561 prev_x += 1;
562 next_x += 1;
563 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000564 * The sections are:
565 * left-hand section, where prev_x is clamped to 0
566 * center section, where neither prev_x nor next_x is clamped
567 * right-hand section, where next_x is clamped to sw
568 * On some operating systems, the center section is unrolled for additional
569 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000570*/
571static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
572 int sw, int sh) {
573 if (2*rx > sw) {
574 kernel_clamped(dst, rx, ry, sum, sw, sh);
575 return;
576 }
577
578 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
579
580 int sumStride = sw + 1;
581
582 int dw = sw + 2*rx;
583 int dh = sh + 2*ry;
584
585 int prev_y = -2*ry;
586 int next_y = 1;
587
588 SkASSERT(2*rx <= dw - 2*rx);
589
humper@google.coma99a92c2013-02-20 16:42:06 +0000590 for (int y = 0; y < dh; ++y) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000591 int py = SkClampPos(prev_y) * sumStride;
592 int ny = SkFastMin32(next_y, sh) * sumStride;
593
594 int prev_x = -2*rx;
595 int next_x = 1;
596 int x = 0;
597
humper@google.coma99a92c2013-02-20 16:42:06 +0000598 for (; x < 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000599 SkASSERT(prev_x <= 0);
600 SkASSERT(next_x <= sw);
601
602 int px = 0;
603 int nx = next_x;
604
605 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
606 *dst++ = SkToU8(tmp * scale >> 24);
607
608 prev_x += 1;
609 next_x += 1;
610 }
611
tomhudson@google.com01224d52011-11-28 18:22:01 +0000612 int i0 = prev_x + py;
613 int i1 = next_x + ny;
614 int i2 = next_x + py;
615 int i3 = prev_x + ny;
616
617#if UNROLL_KERNEL_LOOP
618 for (; x < dw - 2*rx - 4; x += 4) {
619 SkASSERT(prev_x >= 0);
620 SkASSERT(next_x <= sw);
621
622 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
623 *dst++ = SkToU8(tmp * scale >> 24);
624 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
625 *dst++ = SkToU8(tmp * scale >> 24);
626 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
627 *dst++ = SkToU8(tmp * scale >> 24);
628 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
629 *dst++ = SkToU8(tmp * scale >> 24);
630
631 prev_x += 4;
632 next_x += 4;
633 }
634#endif
635
humper@google.coma99a92c2013-02-20 16:42:06 +0000636 for (; x < dw - 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000637 SkASSERT(prev_x >= 0);
638 SkASSERT(next_x <= sw);
639
tomhudson@google.com01224d52011-11-28 18:22:01 +0000640 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000641 *dst++ = SkToU8(tmp * scale >> 24);
642
643 prev_x += 1;
644 next_x += 1;
645 }
646
humper@google.coma99a92c2013-02-20 16:42:06 +0000647 for (; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000648 SkASSERT(prev_x >= 0);
649 SkASSERT(next_x > sw);
650
651 int px = prev_x;
652 int nx = sw;
653
654 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
655 *dst++ = SkToU8(tmp * scale >> 24);
656
657 prev_x += 1;
658 next_x += 1;
659 }
660
reed@android.com8a1c16f2008-12-17 15:59:43 +0000661 prev_y += 1;
662 next_y += 1;
663 }
664}
665
reed@google.com03016a32011-08-12 14:59:59 +0000666/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000667 * This is the path for apply_kernel_interp() to be taken when the kernel
668 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000669 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000670static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
humper@google.coma99a92c2013-02-20 16:42:06 +0000671 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000672 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000673
humper@google.coma99a92c2013-02-20 16:42:06 +0000674 int innerWeight = 255 - outerWeight;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000675
676 // round these guys up if they're bigger than 127
humper@google.coma99a92c2013-02-20 16:42:06 +0000677 outerWeight += outerWeight >> 7;
678 innerWeight += innerWeight >> 7;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000679
humper@google.coma99a92c2013-02-20 16:42:06 +0000680 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
681 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000682
reed@android.com45607672009-09-21 00:27:08 +0000683 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000684
685 int dw = sw + 2*rx;
686 int dh = sh + 2*ry;
687
reed@android.com45607672009-09-21 00:27:08 +0000688 int prev_y = -2*ry;
689 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000690
humper@google.coma99a92c2013-02-20 16:42:06 +0000691 for (int y = 0; y < dh; ++y) {
reed@android.com45607672009-09-21 00:27:08 +0000692 int py = SkClampPos(prev_y) * sumStride;
693 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000694
reed@android.com45607672009-09-21 00:27:08 +0000695 int ipy = SkClampPos(prev_y + 1) * sumStride;
696 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000697
reed@android.com45607672009-09-21 00:27:08 +0000698 int prev_x = -2*rx;
699 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000700
humper@google.coma99a92c2013-02-20 16:42:06 +0000701 for (int x = 0; x < dw; ++x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000702 int px = SkClampPos(prev_x);
703 int nx = SkFastMin32(next_x, sw);
704
705 int ipx = SkClampPos(prev_x + 1);
706 int inx = SkClampMax(next_x - 1, sw);
707
humper@google.coma99a92c2013-02-20 16:42:06 +0000708 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000709 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000710 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000711 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000712 *dst++ = SkToU8((outerSum * outerScale
713 + innerSum * innerScale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000714
715 prev_x += 1;
716 next_x += 1;
717 }
718 prev_y += 1;
719 next_y += 1;
720 }
721}
722
tomhudson@google.com8caac642011-11-22 15:58:06 +0000723/**
724 * sw and sh are the width and height of the src. Since the sum buffer
725 * matches that, but has an extra row and col at the beginning (with zeros),
726 * we can just use sw and sh as our "max" values for pinning coordinates
727 * when sampling into sum[][]
728 *
729 * The inner loop is conceptually simple; we break it into several variants
730 * to improve performance. Here's the original version:
humper@google.coma99a92c2013-02-20 16:42:06 +0000731 for (int x = 0; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000732 int px = SkClampPos(prev_x);
733 int nx = SkFastMin32(next_x, sw);
734
735 int ipx = SkClampPos(prev_x + 1);
736 int inx = SkClampMax(next_x - 1, sw);
737
humper@google.coma99a92c2013-02-20 16:42:06 +0000738 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000739 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000740 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000741 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000742 *dst++ = SkToU8((outerSum * outerScale
743 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000744
745 prev_x += 1;
746 next_x += 1;
747 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000748 * The sections are:
749 * left-hand section, where prev_x is clamped to 0
750 * center section, where neither prev_x nor next_x is clamped
751 * right-hand section, where next_x is clamped to sw
752 * On some operating systems, the center section is unrolled for additional
753 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000754*/
755static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
humper@google.coma99a92c2013-02-20 16:42:06 +0000756 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000757 SkASSERT(rx > 0 && ry > 0);
humper@google.coma99a92c2013-02-20 16:42:06 +0000758 SkASSERT(outerWeight <= 255);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000759
760 if (2*rx > sw) {
humper@google.coma99a92c2013-02-20 16:42:06 +0000761 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outerWeight);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000762 return;
763 }
764
humper@google.coma99a92c2013-02-20 16:42:06 +0000765 int innerWeight = 255 - outerWeight;
tomhudson@google.com8caac642011-11-22 15:58:06 +0000766
767 // round these guys up if they're bigger than 127
humper@google.coma99a92c2013-02-20 16:42:06 +0000768 outerWeight += outerWeight >> 7;
769 innerWeight += innerWeight >> 7;
tomhudson@google.com8caac642011-11-22 15:58:06 +0000770
humper@google.coma99a92c2013-02-20 16:42:06 +0000771 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
772 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
tomhudson@google.com8caac642011-11-22 15:58:06 +0000773
774 int sumStride = sw + 1;
775
776 int dw = sw + 2*rx;
777 int dh = sh + 2*ry;
778
779 int prev_y = -2*ry;
780 int next_y = 1;
781
782 SkASSERT(2*rx <= dw - 2*rx);
783
humper@google.coma99a92c2013-02-20 16:42:06 +0000784 for (int y = 0; y < dh; ++y) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000785 int py = SkClampPos(prev_y) * sumStride;
786 int ny = SkFastMin32(next_y, sh) * sumStride;
787
788 int ipy = SkClampPos(prev_y + 1) * sumStride;
789 int iny = SkClampMax(next_y - 1, sh) * sumStride;
790
791 int prev_x = -2*rx;
792 int next_x = 1;
793 int x = 0;
794
humper@google.coma99a92c2013-02-20 16:42:06 +0000795 for (; x < 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000796 SkASSERT(prev_x < 0);
797 SkASSERT(next_x <= sw);
798
799 int px = 0;
800 int nx = next_x;
801
802 int ipx = 0;
803 int inx = next_x - 1;
804
humper@google.coma99a92c2013-02-20 16:42:06 +0000805 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000806 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000807 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000808 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000809 *dst++ = SkToU8((outerSum * outerScale
810 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000811
812 prev_x += 1;
813 next_x += 1;
814 }
815
tomhudson@google.com01224d52011-11-28 18:22:01 +0000816 int i0 = prev_x + py;
817 int i1 = next_x + ny;
818 int i2 = next_x + py;
819 int i3 = prev_x + ny;
820 int i4 = prev_x + 1 + ipy;
821 int i5 = next_x - 1 + iny;
822 int i6 = next_x - 1 + ipy;
823 int i7 = prev_x + 1 + iny;
824
825#if UNROLL_KERNEL_LOOP
826 for (; x < dw - 2*rx - 4; x += 4) {
827 SkASSERT(prev_x >= 0);
828 SkASSERT(next_x <= sw);
829
humper@google.coma99a92c2013-02-20 16:42:06 +0000830 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
831 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
832 *dst++ = SkToU8((outerSum * outerScale
833 + innerSum * innerScale) >> 24);
834 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
835 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
836 *dst++ = SkToU8((outerSum * outerScale
837 + innerSum * innerScale) >> 24);
838 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
839 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
840 *dst++ = SkToU8((outerSum * outerScale
841 + innerSum * innerScale) >> 24);
842 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
843 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
844 *dst++ = SkToU8((outerSum * outerScale
845 + innerSum * innerScale) >> 24);
tomhudson@google.com01224d52011-11-28 18:22:01 +0000846
847 prev_x += 4;
848 next_x += 4;
849 }
850#endif
851
humper@google.coma99a92c2013-02-20 16:42:06 +0000852 for (; x < dw - 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000853 SkASSERT(prev_x >= 0);
854 SkASSERT(next_x <= sw);
855
humper@google.coma99a92c2013-02-20 16:42:06 +0000856 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
857 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
858 *dst++ = SkToU8((outerSum * outerScale
859 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000860
861 prev_x += 1;
862 next_x += 1;
863 }
864
humper@google.coma99a92c2013-02-20 16:42:06 +0000865 for (; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000866 SkASSERT(prev_x >= 0);
867 SkASSERT(next_x > sw);
868
869 int px = prev_x;
870 int nx = sw;
871
872 int ipx = prev_x + 1;
873 int inx = sw;
874
humper@google.coma99a92c2013-02-20 16:42:06 +0000875 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000876 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000877 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000878 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000879 *dst++ = SkToU8((outerSum * outerScale
880 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000881
882 prev_x += 1;
883 next_x += 1;
884 }
885
886 prev_y += 1;
887 next_y += 1;
888 }
889}
890
reed@android.com8a1c16f2008-12-17 15:59:43 +0000891#include "SkColorPriv.h"
892
reed@android.com0e3c6642009-09-18 13:41:56 +0000893static void merge_src_with_blur(uint8_t dst[], int dstRB,
894 const uint8_t src[], int srcRB,
895 const uint8_t blur[], int blurRB,
896 int sw, int sh) {
897 dstRB -= sw;
898 srcRB -= sw;
899 blurRB -= sw;
900 while (--sh >= 0) {
901 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000902 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
903 dst += 1;
904 src += 1;
905 blur += 1;
906 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000907 dst += dstRB;
908 src += srcRB;
909 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000910 }
911}
912
913static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000914 const uint8_t src[], int srcRowBytes,
915 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000916 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000917 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000918 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000919 switch (style) {
920 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000921 for (x = sw - 1; x >= 0; --x) {
922 int s = *src;
923 int d = *dst;
924 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000925 dst += 1;
926 src += 1;
927 }
928 break;
929 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000930 for (x = sw - 1; x >= 0; --x) {
931 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000932 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000933 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000934 dst += 1;
935 src += 1;
936 }
937 break;
938 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000939 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000940 break;
941 }
942 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000943 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000944 }
945}
946
reed@google.com03016a32011-08-12 14:59:59 +0000947///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000948
bsalomon@google.com33cdbde2013-01-11 20:54:44 +0000949// we use a local function to wrap the class static method to work around
reed@android.com8a1c16f2008-12-17 15:59:43 +0000950// a bug in gcc98
951void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000952void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000953 SkMask::FreeImage(image);
954}
955
956bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000957 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000958 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000959{
humper@google.coma99a92c2013-02-20 16:42:06 +0000960
reed@google.com03016a32011-08-12 14:59:59 +0000961 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000962 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000963 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000964
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000965 // Force high quality off for small radii (performance)
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000966 if (radius < SkIntToScalar(3)) {
967 quality = kLow_Quality;
968 }
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +0000969
970 // highQuality: use three box blur passes as a cheap way
humper@google.coma99a92c2013-02-20 16:42:06 +0000971 // to approximate a Gaussian blur
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000972 int passCount = (kHigh_Quality == quality) ? 3 : 1;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +0000973 SkScalar passRadius = (kHigh_Quality == quality) ?
974 SkScalarMul( radius, kBlurRadiusFudgeFactor):
humper@google.coma99a92c2013-02-20 16:42:06 +0000975 radius;
976
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000977 int rx = SkScalarCeil(passRadius);
humper@google.coma99a92c2013-02-20 16:42:06 +0000978 int outerWeight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000979
980 SkASSERT(rx >= 0);
humper@google.coma99a92c2013-02-20 16:42:06 +0000981 SkASSERT((unsigned)outerWeight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000982 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000983 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000984 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000985
986 int ry = rx; // only do square blur for now
987
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000988 int padx = passCount * rx;
989 int pady = passCount * ry;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +0000990
bungeman@google.com5af16f82011-09-02 15:06:44 +0000991 if (margin) {
992 margin->set(padx, pady);
993 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000994 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
995 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +0000996
reed@android.com49f0ff22009-03-19 21:52:42 +0000997 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000998 dst->fFormat = SkMask::kA8_Format;
999 dst->fImage = NULL;
1000
reed@android.com0e3c6642009-09-18 13:41:56 +00001001 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +00001002 size_t dstSize = dst->computeImageSize();
1003 if (0 == dstSize) {
1004 return false; // too big to allocate, abort
1005 }
1006
reed@android.com8a1c16f2008-12-17 15:59:43 +00001007 int sw = src.fBounds.width();
1008 int sh = src.fBounds.height();
1009 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +00001010 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001011 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
1012
1013 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001014 if (separable) {
1015 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1016 uint8_t* tp = tmpBuffer.get();
1017 int w = sw, h = sh;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001018
humper@google.coma99a92c2013-02-20 16:42:06 +00001019 if (outerWeight == 255) {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +00001020 int loRadius, hiRadius;
1021 get_adjusted_radii(passRadius, &loRadius, &hiRadius);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001022 if (kHigh_Quality == quality) {
1023 // Do three X blurs, with a transpose on the final one.
1024 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
1025 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
1026 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
1027 // Do three Y blurs, with a transpose on the final one.
1028 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
1029 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
1030 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
1031 } else {
1032 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
1033 h = boxBlur(tp, h, dp, ry, ry, h, w, true);
1034 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +00001035 } else {
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001036 if (kHigh_Quality == quality) {
1037 // Do three X blurs, with a transpose on the final one.
humper@google.coma99a92c2013-02-20 16:42:06 +00001038 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
1039 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight);
1040 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001041 // Do three Y blurs, with a transpose on the final one.
humper@google.coma99a92c2013-02-20 16:42:06 +00001042 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight);
1043 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight);
1044 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001045 } else {
humper@google.coma99a92c2013-02-20 16:42:06 +00001046 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
1047 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001048 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001049 }
1050 } else {
reed@google.com03016a32011-08-12 14:59:59 +00001051 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
1052 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
1053 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001054 uint32_t* sumBuffer = storage.get();
1055
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001056 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +00001057 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
humper@google.coma99a92c2013-02-20 16:42:06 +00001058 if (outerWeight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001059 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +00001060 } else {
humper@google.coma99a92c2013-02-20 16:42:06 +00001061 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outerWeight);
reed@google.com03016a32011-08-12 14:59:59 +00001062 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001063
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001064 if (kHigh_Quality == quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001065 //pass2: dp is source, tmpBuffer is destination
1066 int tmp_sw = sw + 2 * rx;
1067 int tmp_sh = sh + 2 * ry;
1068 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1069 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
humper@google.coma99a92c2013-02-20 16:42:06 +00001070 if (outerWeight == 255)
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001071 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
1072 else
reed@google.com03016a32011-08-12 14:59:59 +00001073 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
humper@google.coma99a92c2013-02-20 16:42:06 +00001074 tmp_sw, tmp_sh, outerWeight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001075
1076 //pass3: tmpBuffer is source, dp is destination
1077 tmp_sw += 2 * rx;
1078 tmp_sh += 2 * ry;
1079 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
humper@google.coma99a92c2013-02-20 16:42:06 +00001080 if (outerWeight == 255)
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001081 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
1082 else
reed@google.com03016a32011-08-12 14:59:59 +00001083 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
humper@google.coma99a92c2013-02-20 16:42:06 +00001084 outerWeight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001085 }
reed@android.com8a1c16f2008-12-17 15:59:43 +00001086 }
1087
1088 dst->fImage = dp;
1089 // if need be, alloc the "real" dst (same size as src) and copy/merge
1090 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +00001091 if (style == kInner_Style) {
1092 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +00001093 size_t srcSize = src.computeImageSize();
1094 if (0 == srcSize) {
1095 return false; // too big to allocate, abort
1096 }
1097 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +00001098 merge_src_with_blur(dst->fImage, src.fRowBytes,
1099 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +00001100 dp + passCount * (rx + ry * dst->fRowBytes),
1101 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001102 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +00001103 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +00001104 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
1105 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001106 }
1107 (void)autoCall.detach();
1108 }
1109
reed@android.com0e3c6642009-09-18 13:41:56 +00001110 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001111 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +00001112 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +00001113 }
1114
reed@android.com8a1c16f2008-12-17 15:59:43 +00001115 return true;
1116}
1117
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001118bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
1119 SkScalar radius, Style style, Quality quality,
1120 SkIPoint* margin)
1121{
1122 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
1123}
1124
1125bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
1126 SkScalar radius, Style style, Quality quality,
1127 SkIPoint* margin)
1128{
1129 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
1130}
humper@google.com7c7292c2013-01-04 20:29:03 +00001131
1132/* Convolving a box with itself three times results in a piecewise
1133 quadratic function:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001134
humper@google.com7c7292c2013-01-04 20:29:03 +00001135 0 x <= -1.5
humper@google.coma99a92c2013-02-20 16:42:06 +00001136 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5
humper@google.com7c7292c2013-01-04 20:29:03 +00001137 3/4 - x^2 -.5 < x <= .5
1138 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5
1139 0 1.5 < x
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001140
humper@google.coma99a92c2013-02-20 16:42:06 +00001141 Mathematica:
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001142
humper@google.coma99a92c2013-02-20 16:42:06 +00001143 g[x_] := Piecewise [ {
1144 {9/8 + 3/2 x + 1/2 x^2 , -1.5 < x <= -.5},
1145 {3/4 - x^2 , -.5 < x <= .5},
1146 {9/8 - 3/2 x + 1/2 x^2 , 0.5 < x <= 1.5}
1147 }, 0]
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001148
humper@google.com7c7292c2013-01-04 20:29:03 +00001149 To get the profile curve of the blurred step function at the rectangle
1150 edge, we evaluate the indefinite integral, which is piecewise cubic:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001151
humper@google.com7c7292c2013-01-04 20:29:03 +00001152 0 x <= -1.5
humper@google.coma99a92c2013-02-20 16:42:06 +00001153 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5
humper@google.com7c7292c2013-01-04 20:29:03 +00001154 1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5
humper@google.coma99a92c2013-02-20 16:42:06 +00001155 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5
humper@google.com7c7292c2013-01-04 20:29:03 +00001156 1 1.5 < x
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001157
humper@google.coma99a92c2013-02-20 16:42:06 +00001158 in Mathematica code:
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001159
humper@google.coma99a92c2013-02-20 16:42:06 +00001160 gi[x_] := Piecewise[ {
1161 { 0 , x <= -1.5 },
1162 { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 },
1163 { 1/2 + 3/4 x - 1/3 x^3 , -.5 < x <= .5},
1164 { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3, .5 < x <= 1.5}
1165 },1]
humper@google.com7c7292c2013-01-04 20:29:03 +00001166*/
1167
humper@google.coma99a92c2013-02-20 16:42:06 +00001168static float gaussianIntegral(float x) {
1169 if (x > 1.5f) {
humper@google.com7c7292c2013-01-04 20:29:03 +00001170 return 0.0f;
1171 }
humper@google.coma99a92c2013-02-20 16:42:06 +00001172 if (x < -1.5f) {
humper@google.com7c7292c2013-01-04 20:29:03 +00001173 return 1.0f;
1174 }
1175
1176 float x2 = x*x;
1177 float x3 = x2*x;
1178
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001179 if ( x > 0.5f ) {
humper@google.coma99a92c2013-02-20 16:42:06 +00001180 return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001181 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001182 if ( x > -0.5f ) {
1183 return 0.5f - (0.75f * x - x3 / 3.0f);
humper@google.com7c7292c2013-01-04 20:29:03 +00001184 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001185 return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001186}
1187
humper@google.com7c5d7b72013-03-11 20:16:28 +00001188// Compute the size of the array allocated for the profile.
1189
1190static int compute_profile_size(SkScalar radius) {
1191 return SkScalarRoundToInt(radius * 3);
1192
1193}
1194
1195/* compute_profile allocates and fills in an array of floating
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001196 point values between 0 and 255 for the profile signature of
humper@google.com7c7292c2013-01-04 20:29:03 +00001197 a blurred half-plane with the given blur radius. Since we're
1198 going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
1199 all the time, we actually fill in the profile pre-inverted
1200 (already done 255-x).
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001201
humper@google.com7c5d7b72013-03-11 20:16:28 +00001202 It's the responsibility of the caller to delete the
humper@google.com7c7292c2013-01-04 20:29:03 +00001203 memory returned in profile_out.
1204*/
1205
humper@google.com7c5d7b72013-03-11 20:16:28 +00001206static void compute_profile(SkScalar radius, unsigned int **profile_out) {
1207 int size = compute_profile_size(radius);
1208
humper@google.com7c7292c2013-01-04 20:29:03 +00001209 int center = size >> 1;
bsalomon@google.com33cdbde2013-01-11 20:54:44 +00001210 unsigned int *profile = SkNEW_ARRAY(unsigned int, size);
humper@google.com7c7292c2013-01-04 20:29:03 +00001211
humper@google.coma99a92c2013-02-20 16:42:06 +00001212 float invr = 1.f/radius;
humper@google.com7c7292c2013-01-04 20:29:03 +00001213
1214 profile[0] = 255;
humper@google.coma99a92c2013-02-20 16:42:06 +00001215 for (int x = 1 ; x < size ; ++x) {
jvanverth@google.comd98df1a2013-02-20 19:02:34 +00001216 float scaled_x = (center - x - .5f) * invr;
humper@google.coma99a92c2013-02-20 16:42:06 +00001217 float gi = gaussianIntegral(scaled_x);
1218 profile[x] = 255 - (uint8_t) (255.f * gi);
humper@google.com7c7292c2013-01-04 20:29:03 +00001219 }
1220
1221 *profile_out = profile;
humper@google.com7c7292c2013-01-04 20:29:03 +00001222}
1223
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001224// TODO MAYBE: Maintain a profile cache to avoid recomputing this for
humper@google.com7c7292c2013-01-04 20:29:03 +00001225// commonly used radii. Consider baking some of the most common blur radii
1226// directly in as static data?
1227
1228// Implementation adapted from Michael Herf's approach:
1229// http://stereopsis.com/shadowrect/
1230
humper@google.coma99a92c2013-02-20 16:42:06 +00001231static inline unsigned int profile_lookup( unsigned int *profile, int loc, int blurred_width, int sharp_width ) {
1232 int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge?
1233 int ox = dx >> 1;
1234 if (ox < 0) {
1235 ox = 0;
1236 }
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001237
humper@google.coma99a92c2013-02-20 16:42:06 +00001238 return profile[ox];
1239}
1240
humper@google.com7c7292c2013-01-04 20:29:03 +00001241bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src,
humper@google.coma99a92c2013-02-20 16:42:06 +00001242 SkScalar provided_radius, Style style,
humper@google.com7c5d7b72013-03-11 20:16:28 +00001243 SkIPoint *margin, SkMask::CreateMode createMode) {
humper@google.com7c7292c2013-01-04 20:29:03 +00001244 int profile_size;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001245
humper@google.com7c5d7b72013-03-11 20:16:28 +00001246 float radius = SkScalarToFloat(SkScalarMul(provided_radius, kBlurRadiusFudgeFactor));
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001247
humper@google.com1e1a24e2013-02-20 18:35:40 +00001248 // adjust blur radius to match interpretation from boxfilter code
humper@google.com7c5d7b72013-03-11 20:16:28 +00001249 radius = (radius + .5f) * 2.f;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001250
humper@google.com7c5d7b72013-03-11 20:16:28 +00001251 profile_size = compute_profile_size(radius);
1252
humper@google.coma99a92c2013-02-20 16:42:06 +00001253 int pad = profile_size/2;
humper@google.com7c7292c2013-01-04 20:29:03 +00001254 if (margin) {
1255 margin->set( pad, pad );
1256 }
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001257
humper@google.com68a690c2013-03-11 21:16:20 +00001258 dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad),
1259 SkScalarRoundToInt(src.fTop - pad),
1260 SkScalarRoundToInt(src.fRight + pad),
1261 SkScalarRoundToInt(src.fBottom + pad));
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001262
humper@google.com7c7292c2013-01-04 20:29:03 +00001263 dst->fRowBytes = dst->fBounds.width();
1264 dst->fFormat = SkMask::kA8_Format;
1265 dst->fImage = NULL;
humper@google.com7c5d7b72013-03-11 20:16:28 +00001266
1267 int sw = SkScalarFloorToInt(src.width());
1268 int sh = SkScalarFloorToInt(src.height());
1269
1270 if (createMode == SkMask::kJustComputeBounds_CreateMode) {
1271 if (style == kInner_Style) {
humper@google.com68a690c2013-03-11 21:16:20 +00001272 dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
1273 SkScalarRoundToInt(src.fTop),
1274 SkScalarRoundToInt(src.fRight),
1275 SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
humper@google.com7c5d7b72013-03-11 20:16:28 +00001276 dst->fRowBytes = sw;
1277 }
1278 return true;
1279 }
1280 unsigned int *profile = NULL;
1281
1282 compute_profile(radius, &profile);
1283 SkAutoTDeleteArray<unsigned int> ada(profile);
1284
humper@google.com7c7292c2013-01-04 20:29:03 +00001285 size_t dstSize = dst->computeImageSize();
1286 if (0 == dstSize) {
1287 return false; // too big to allocate, abort
1288 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001289
humper@google.com7c7292c2013-01-04 20:29:03 +00001290 uint8_t* dp = SkMask::AllocImage(dstSize);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001291
humper@google.com7c7292c2013-01-04 20:29:03 +00001292 dst->fImage = dp;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001293
humper@google.coma99a92c2013-02-20 16:42:06 +00001294 int dstHeight = dst->fBounds.height();
1295 int dstWidth = dst->fBounds.width();
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001296
humper@google.com7c7292c2013-01-04 20:29:03 +00001297 // nearest odd number less than the profile size represents the center
1298 // of the (2x scaled) profile
1299 int center = ( profile_size & ~1 ) - 1;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001300
humper@google.com7c7292c2013-01-04 20:29:03 +00001301 int w = sw - center;
1302 int h = sh - center;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001303
humper@google.com7c7292c2013-01-04 20:29:03 +00001304 uint8_t *outptr = dp;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001305
humper@google.coma99a92c2013-02-20 16:42:06 +00001306 SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001307
humper@google.coma99a92c2013-02-20 16:42:06 +00001308 for (int x = 0 ; x < dstWidth ; ++x) {
1309 if (profile_size <= sw) {
1310 horizontalScanline[x] = profile_lookup(profile, x, dstWidth, w);
1311 } else {
1312 float span = float(sw)/radius;
jvanverth@google.comd98df1a2013-02-20 19:02:34 +00001313 float giX = 1.5f - (x+.5f)/radius;
humper@google.coma99a92c2013-02-20 16:42:06 +00001314 horizontalScanline[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span)));
1315 }
1316 }
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001317
humper@google.coma99a92c2013-02-20 16:42:06 +00001318 for (int y = 0 ; y < dstHeight ; ++y) {
1319 unsigned int profile_y;
1320 if (profile_size <= sh) {
1321 profile_y = profile_lookup(profile, y, dstHeight, h);
1322 } else {
1323 float span = float(sh)/radius;
jvanverth@google.comd98df1a2013-02-20 19:02:34 +00001324 float giY = 1.5f - (y+.5f)/radius;
humper@google.coma99a92c2013-02-20 16:42:06 +00001325 profile_y = (uint8_t) (255 * (gaussianIntegral(giY) - gaussianIntegral(giY + span)));
1326 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001327
humper@google.coma99a92c2013-02-20 16:42:06 +00001328 for (int x = 0 ; x < dstWidth ; x++) {
1329 unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], profile_y);
humper@google.com7c7292c2013-01-04 20:29:03 +00001330 *(outptr++) = maskval;
1331 }
1332 }
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001333
humper@google.coma99a92c2013-02-20 16:42:06 +00001334 if (style == kInner_Style) {
1335 // now we allocate the "real" dst, mirror the size of src
jvanverth@google.comd98df1a2013-02-20 19:02:34 +00001336 size_t srcSize = (size_t)(src.width() * src.height());
humper@google.coma99a92c2013-02-20 16:42:06 +00001337 if (0 == srcSize) {
1338 return false; // too big to allocate, abort
1339 }
1340 dst->fImage = SkMask::AllocImage(srcSize);
1341 for (int y = 0 ; y < sh ; y++) {
1342 uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad;
1343 uint8_t *inner_scanline = dst->fImage + y*sw;
1344 memcpy(inner_scanline, blur_scanline, sw);
1345 }
1346 SkMask::FreeImage(dp);
1347
humper@google.com68a690c2013-03-11 21:16:20 +00001348 dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
1349 SkScalarRoundToInt(src.fTop),
1350 SkScalarRoundToInt(src.fRight),
1351 SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
humper@google.coma99a92c2013-02-20 16:42:06 +00001352 dst->fRowBytes = sw;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001353
humper@google.coma99a92c2013-02-20 16:42:06 +00001354 } else if (style == kOuter_Style) {
1355 for (int y = pad ; y < dstHeight-pad ; y++) {
1356 uint8_t *dst_scanline = dp + y*dstWidth + pad;
1357 memset(dst_scanline, 0, sw);
1358 }
1359 }
1360 // normal and solid styles are the same for analytic rect blurs, so don't
1361 // need to handle solid specially.
1362
1363 return true;
1364}
1365
1366// The "simple" blur is a direct implementation of separable convolution with a discrete
1367// gaussian kernel. It's "ground truth" in a sense; too slow to be used, but very
1368// useful for correctness comparisons.
1369
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001370bool SkBlurMask::BlurGroundTruth(SkMask* dst, const SkMask& src, SkScalar provided_radius,
humper@google.coma99a92c2013-02-20 16:42:06 +00001371 Style style, SkIPoint* margin) {
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001372
humper@google.coma99a92c2013-02-20 16:42:06 +00001373 if (src.fFormat != SkMask::kA8_Format) {
1374 return false;
1375 }
1376
1377 float radius = SkScalarToFloat(SkScalarMul(provided_radius, kBlurRadiusFudgeFactor));
1378 float stddev = SkScalarToFloat(radius) /2.0f;
1379 float variance = stddev * stddev;
1380
1381 int windowSize = SkScalarCeil(stddev*4);
1382 // round window size up to nearest odd number
1383 windowSize |= 1;
1384
1385 SkAutoTMalloc<float> gaussWindow(windowSize);
1386
1387 int halfWindow = windowSize >> 1;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001388
humper@google.coma99a92c2013-02-20 16:42:06 +00001389 gaussWindow[halfWindow] = 1;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001390
humper@google.coma99a92c2013-02-20 16:42:06 +00001391 float windowSum = 1;
1392 for (int x = 1 ; x <= halfWindow ; ++x) {
1393 float gaussian = expf(-x*x / variance);
1394 gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
1395 windowSum += 2*gaussian;
1396 }
1397
1398 // leave the filter un-normalized for now; we will divide by the normalization
1399 // sum later;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001400
humper@google.coma99a92c2013-02-20 16:42:06 +00001401 int pad = halfWindow;
1402 if (margin) {
1403 margin->set( pad, pad );
1404 }
1405
1406 dst->fBounds = src.fBounds;
1407 dst->fBounds.outset(pad, pad);
1408
1409 dst->fRowBytes = dst->fBounds.width();
1410 dst->fFormat = SkMask::kA8_Format;
1411 dst->fImage = NULL;
1412
1413 if (src.fImage) {
1414
1415 size_t dstSize = dst->computeImageSize();
1416 if (0 == dstSize) {
1417 return false; // too big to allocate, abort
1418 }
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001419
humper@google.coma99a92c2013-02-20 16:42:06 +00001420 int srcWidth = src.fBounds.width();
1421 int srcHeight = src.fBounds.height();
1422 int dstWidth = dst->fBounds.width();
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001423
humper@google.coma99a92c2013-02-20 16:42:06 +00001424 const uint8_t* srcPixels = src.fImage;
1425 uint8_t* dstPixels = SkMask::AllocImage(dstSize);
1426 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);
1427
1428 // do the actual blur. First, make a padded copy of the source.
1429 // use double pad so we never have to check if we're outside anything
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001430
humper@google.coma99a92c2013-02-20 16:42:06 +00001431 int padWidth = srcWidth + 4*pad;
1432 int padHeight = srcHeight;
1433 int padSize = padWidth * padHeight;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001434
humper@google.coma99a92c2013-02-20 16:42:06 +00001435 SkAutoTMalloc<uint8_t> padPixels(padSize);
1436 memset(padPixels, 0, padSize);
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001437
humper@google.coma99a92c2013-02-20 16:42:06 +00001438 for (int y = 0 ; y < srcHeight; ++y) {
1439 uint8_t* padptr = padPixels + y * padWidth + 2*pad;
1440 const uint8_t* srcptr = srcPixels + y * srcWidth;
1441 memcpy(padptr, srcptr, srcWidth);
1442 }
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001443
humper@google.coma99a92c2013-02-20 16:42:06 +00001444 // blur in X, transposing the result into a temporary floating point buffer.
1445 // also double-pad the intermediate result so that the second blur doesn't
1446 // have to do extra conditionals.
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001447
humper@google.coma99a92c2013-02-20 16:42:06 +00001448 int tmpWidth = padHeight + 4*pad;
1449 int tmpHeight = padWidth - 2*pad;
1450 int tmpSize = tmpWidth * tmpHeight;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001451
humper@google.coma99a92c2013-02-20 16:42:06 +00001452 SkAutoTMalloc<float> tmpImage(tmpSize);
1453 memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));
1454
1455 for (int y = 0 ; y < padHeight ; ++y) {
1456 uint8_t *srcScanline = padPixels + y*padWidth;
1457 for (int x = pad ; x < padWidth - pad ; ++x) {
1458 float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
1459 uint8_t *windowCenter = srcScanline + x;
1460 for (int i = -pad ; i <= pad ; ++i) {
1461 *outPixel += gaussWindow[pad+i]*windowCenter[i];
1462 }
1463 *outPixel /= windowSum;
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001464 }
humper@google.coma99a92c2013-02-20 16:42:06 +00001465 }
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001466
humper@google.coma99a92c2013-02-20 16:42:06 +00001467 // blur in Y; now filling in the actual desired destination. We have to do
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001468 // the transpose again; these transposes guarantee that we read memory in
humper@google.coma99a92c2013-02-20 16:42:06 +00001469 // linear order.
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001470
humper@google.coma99a92c2013-02-20 16:42:06 +00001471 for (int y = 0 ; y < tmpHeight ; ++y) {
1472 float *srcScanline = tmpImage + y*tmpWidth;
1473 for (int x = pad ; x < tmpWidth - pad ; ++x) {
1474 float *windowCenter = srcScanline + x;
1475 float finalValue = 0;
1476 for (int i = -pad ; i <= pad ; ++i) {
1477 finalValue += gaussWindow[pad+i]*windowCenter[i];
1478 }
1479 finalValue /= windowSum;
1480 uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
1481 int integerPixel = int(finalValue + 0.5f);
1482 *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
1483 }
1484 }
skia.committer@gmail.comd454ec12013-02-21 07:15:03 +00001485
humper@google.coma99a92c2013-02-20 16:42:06 +00001486 dst->fImage = dstPixels;
1487 // if need be, alloc the "real" dst (same size as src) and copy/merge
1488 // the blur into it (applying the src)
1489 if (style == kInner_Style) {
1490 // now we allocate the "real" dst, mirror the size of src
1491 size_t srcSize = src.computeImageSize();
1492 if (0 == srcSize) {
1493 return false; // too big to allocate, abort
1494 }
1495 dst->fImage = SkMask::AllocImage(srcSize);
1496 merge_src_with_blur(dst->fImage, src.fRowBytes,
1497 srcPixels, src.fRowBytes,
1498 dstPixels + pad*dst->fRowBytes + pad,
1499 dst->fRowBytes, srcWidth, srcHeight);
1500 SkMask::FreeImage(dstPixels);
1501 } else if (style != kNormal_Style) {
1502 clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
1503 dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
1504 }
1505 (void)autoCall.detach();
1506 }
1507
1508 if (style == kInner_Style) {
1509 dst->fBounds = src.fBounds; // restore trimmed bounds
1510 dst->fRowBytes = src.fRowBytes;
1511 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001512
humper@google.com7c7292c2013-01-04 20:29:03 +00001513 return true;
1514}