blob: e2dfd2b1123bcd9ca1ea8379db0b623fe72a5c8b [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000015// scale factor for the blur radius to match the behavior of the all existing blur
humper@google.com7c7292c2013-01-04 20:29:03 +000016// code (both on the CPU and the GPU). This magic constant is 1/sqrt(3).
17
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000018// TODO: get rid of this fudge factor and move any required fudging up into
humper@google.com7c7292c2013-01-04 20:29:03 +000019// the calling library
20
21#define kBlurRadiusFudgeFactor SkFloatToScalar( .57735f )
22
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000023#define UNROLL_SEPARABLE_LOOPS
24
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000025/**
26 * This function performs a box blur in X, of the given radius. If the
skia.committer@gmail.com884e60b2012-11-16 02:01:17 +000027 * "transpose" parameter is true, it will transpose the pixels on write,
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000028 * such that X and Y are swapped. Reads are always performed from contiguous
29 * memory in X, for speed. The destination buffer (dst) must be at least
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000030 * (width + leftRadius + rightRadius) * height bytes in size.
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000031 *
32 * This is what the inner loop looks like before unrolling, and with the two
33 * cases broken out separately (width < diameter, width >= diameter):
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +000034 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000035 * if (width < diameter) {
36 * for (int x = 0; x < width; ++x) {
37 * sum += *right++;
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +000038 * *dptr = (sum * scale + half) >> 24;
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000039 * dptr += dst_x_stride;
40 * }
41 * for (int x = width; x < diameter; ++x) {
42 * *dptr = (sum * scale + half) >> 24;
43 * dptr += dst_x_stride;
44 * }
45 * for (int x = 0; x < width; ++x) {
46 * *dptr = (sum * scale + half) >> 24;
47 * sum -= *left++;
48 * dptr += dst_x_stride;
49 * }
50 * } else {
51 * for (int x = 0; x < diameter; ++x) {
52 * sum += *right++;
53 * *dptr = (sum * scale + half) >> 24;
54 * dptr += dst_x_stride;
55 * }
56 * for (int x = diameter; x < width; ++x) {
57 * sum += *right++;
58 * *dptr = (sum * scale + half) >> 24;
59 * sum -= *left++;
60 * dptr += dst_x_stride;
61 * }
62 * for (int x = 0; x < diameter; ++x) {
63 * *dptr = (sum * scale + half) >> 24;
64 * sum -= *left++;
65 * dptr += dst_x_stride;
66 * }
67 * }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000068 */
69static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000070 int leftRadius, int rightRadius, int width, int height,
71 bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000072{
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000073 int diameter = leftRadius + rightRadius;
74 int kernelSize = diameter + 1;
75 int border = SkMin32(width, diameter);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000076 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000077 int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000078 int dst_x_stride = transpose ? height : 1;
79 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000080#ifndef SK_DISABLE_BLUR_ROUNDING
81 uint32_t half = 1 << 23;
82#else
83 uint32_t half = 0;
84#endif
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000085 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000086 uint32_t sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000087 uint8_t* dptr = dst + y * dst_y_stride;
88 const uint8_t* right = src + y * src_y_stride;
89 const uint8_t* left = right;
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +000090 for (int x = 0; x < rightRadius - leftRadius; x++) {
91 *dptr = 0;
92 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000093 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000094#define LEFT_BORDER_ITER \
95 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000096 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000097 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000098
99 int x = 0;
100#ifdef UNROLL_SEPARABLE_LOOPS
101 for (; x < border - 16; x += 16) {
102 LEFT_BORDER_ITER
103 LEFT_BORDER_ITER
104 LEFT_BORDER_ITER
105 LEFT_BORDER_ITER
106 LEFT_BORDER_ITER
107 LEFT_BORDER_ITER
108 LEFT_BORDER_ITER
109 LEFT_BORDER_ITER
110 LEFT_BORDER_ITER
111 LEFT_BORDER_ITER
112 LEFT_BORDER_ITER
113 LEFT_BORDER_ITER
114 LEFT_BORDER_ITER
115 LEFT_BORDER_ITER
116 LEFT_BORDER_ITER
117 LEFT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000118 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000119#endif
120 for (; x < border; ++x) {
121 LEFT_BORDER_ITER
122 }
123#undef LEFT_BORDER_ITER
124#define TRIVIAL_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000125 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000126 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000127 x = width;
128#ifdef UNROLL_SEPARABLE_LOOPS
129 for (; x < diameter - 16; x += 16) {
130 TRIVIAL_ITER
131 TRIVIAL_ITER
132 TRIVIAL_ITER
133 TRIVIAL_ITER
134 TRIVIAL_ITER
135 TRIVIAL_ITER
136 TRIVIAL_ITER
137 TRIVIAL_ITER
138 TRIVIAL_ITER
139 TRIVIAL_ITER
140 TRIVIAL_ITER
141 TRIVIAL_ITER
142 TRIVIAL_ITER
143 TRIVIAL_ITER
144 TRIVIAL_ITER
145 TRIVIAL_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000146 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000147#endif
148 for (; x < diameter; ++x) {
149 TRIVIAL_ITER
150 }
151#undef TRIVIAL_ITER
152#define CENTER_ITER \
153 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000154 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000155 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000156 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000157
158 x = diameter;
159#ifdef UNROLL_SEPARABLE_LOOPS
160 for (; x < width - 16; x += 16) {
161 CENTER_ITER
162 CENTER_ITER
163 CENTER_ITER
164 CENTER_ITER
165 CENTER_ITER
166 CENTER_ITER
167 CENTER_ITER
168 CENTER_ITER
169 CENTER_ITER
170 CENTER_ITER
171 CENTER_ITER
172 CENTER_ITER
173 CENTER_ITER
174 CENTER_ITER
175 CENTER_ITER
176 CENTER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000177 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000178#endif
179 for (; x < width; ++x) {
180 CENTER_ITER
181 }
182#undef CENTER_ITER
183#define RIGHT_BORDER_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000184 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000185 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000186 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000187
188 x = 0;
189#ifdef UNROLL_SEPARABLE_LOOPS
190 for (; x < border - 16; x += 16) {
191 RIGHT_BORDER_ITER
192 RIGHT_BORDER_ITER
193 RIGHT_BORDER_ITER
194 RIGHT_BORDER_ITER
195 RIGHT_BORDER_ITER
196 RIGHT_BORDER_ITER
197 RIGHT_BORDER_ITER
198 RIGHT_BORDER_ITER
199 RIGHT_BORDER_ITER
200 RIGHT_BORDER_ITER
201 RIGHT_BORDER_ITER
202 RIGHT_BORDER_ITER
203 RIGHT_BORDER_ITER
204 RIGHT_BORDER_ITER
205 RIGHT_BORDER_ITER
206 RIGHT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000207 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000208#endif
209 for (; x < border; ++x) {
210 RIGHT_BORDER_ITER
211 }
212#undef RIGHT_BORDER_ITER
humper@google.coma99a92c2013-02-20 16:42:06 +0000213 for (int x = 0; x < leftRadius - rightRadius; ++x) {
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +0000214 *dptr = 0;
215 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000216 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000217 SkASSERT(sum == 0);
218 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000219 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000220}
221
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000222/**
223 * This variant of the box blur handles blurring of non-integer radii. It
224 * keeps two running sums: an outer sum for the rounded-up kernel radius, and
225 * an inner sum for the rounded-down kernel radius. For each pixel, it linearly
226 * interpolates between them. In float this would be:
227 * outer_weight * outer_sum / kernelSize +
228 * (1.0 - outer_weight) * innerSum / (kernelSize - 2)
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +0000229 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000230 * This is what the inner loop looks like before unrolling, and with the two
231 * cases broken out separately (width < diameter, width >= diameter):
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +0000232 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000233 * if (width < diameter) {
234 * for (int x = 0; x < width; x++) {
235 * inner_sum = outer_sum;
236 * outer_sum += *right++;
237 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
238 * dptr += dst_x_stride;
239 * }
240 * for (int x = width; x < diameter; ++x) {
241 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
242 * dptr += dst_x_stride;
243 * }
244 * for (int x = 0; x < width; x++) {
245 * inner_sum = outer_sum - *left++;
246 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
247 * dptr += dst_x_stride;
248 * outer_sum = inner_sum;
249 * }
250 * } else {
251 * for (int x = 0; x < diameter; x++) {
252 * inner_sum = outer_sum;
253 * outer_sum += *right++;
254 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
255 * dptr += dst_x_stride;
256 * }
257 * for (int x = diameter; x < width; ++x) {
258 * inner_sum = outer_sum - *left;
259 * outer_sum += *right++;
260 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
261 * dptr += dst_x_stride;
262 * outer_sum -= *left++;
263 * }
264 * for (int x = 0; x < diameter; x++) {
265 * inner_sum = outer_sum - *left++;
266 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
267 * dptr += dst_x_stride;
268 * outer_sum = inner_sum;
269 * }
270 * }
271 * }
272 * return new_width;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000273 */
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000274
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000275static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
276 int radius, int width, int height,
277 bool transpose, uint8_t outer_weight)
278{
279 int diameter = radius * 2;
280 int kernelSize = diameter + 1;
281 int border = SkMin32(width, diameter);
282 int inner_weight = 255 - outer_weight;
283 outer_weight += outer_weight >> 7;
284 inner_weight += inner_weight >> 7;
285 uint32_t outer_scale = (outer_weight << 16) / kernelSize;
286 uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000287#ifndef SK_DISABLE_BLUR_ROUNDING
288 uint32_t half = 1 << 23;
289#else
290 uint32_t half = 0;
291#endif
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000292 int new_width = width + diameter;
293 int dst_x_stride = transpose ? height : 1;
294 int dst_y_stride = transpose ? 1 : new_width;
295 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000296 uint32_t outer_sum = 0, inner_sum = 0;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000297 uint8_t* dptr = dst + y * dst_y_stride;
298 const uint8_t* right = src + y * src_y_stride;
299 const uint8_t* left = right;
300 int x = 0;
301
302#define LEFT_BORDER_ITER \
303 inner_sum = outer_sum; \
304 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000305 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000306 dptr += dst_x_stride;
307
308#ifdef UNROLL_SEPARABLE_LOOPS
309 for (;x < border - 16; x += 16) {
310 LEFT_BORDER_ITER
311 LEFT_BORDER_ITER
312 LEFT_BORDER_ITER
313 LEFT_BORDER_ITER
314 LEFT_BORDER_ITER
315 LEFT_BORDER_ITER
316 LEFT_BORDER_ITER
317 LEFT_BORDER_ITER
318 LEFT_BORDER_ITER
319 LEFT_BORDER_ITER
320 LEFT_BORDER_ITER
321 LEFT_BORDER_ITER
322 LEFT_BORDER_ITER
323 LEFT_BORDER_ITER
324 LEFT_BORDER_ITER
325 LEFT_BORDER_ITER
326 }
327#endif
328
humper@google.coma99a92c2013-02-20 16:42:06 +0000329 for (;x < border; ++x) {
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000330 LEFT_BORDER_ITER
331 }
332#undef LEFT_BORDER_ITER
333 for (int x = width; x < diameter; ++x) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000334 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000335 dptr += dst_x_stride;
336 }
337 x = diameter;
338
339#define CENTER_ITER \
340 inner_sum = outer_sum - *left; \
341 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000342 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000343 dptr += dst_x_stride; \
344 outer_sum -= *left++;
345
346#ifdef UNROLL_SEPARABLE_LOOPS
347 for (; x < width - 16; x += 16) {
348 CENTER_ITER
349 CENTER_ITER
350 CENTER_ITER
351 CENTER_ITER
352 CENTER_ITER
353 CENTER_ITER
354 CENTER_ITER
355 CENTER_ITER
356 CENTER_ITER
357 CENTER_ITER
358 CENTER_ITER
359 CENTER_ITER
360 CENTER_ITER
361 CENTER_ITER
362 CENTER_ITER
363 CENTER_ITER
364 }
365#endif
366 for (; x < width; ++x) {
367 CENTER_ITER
368 }
369#undef CENTER_ITER
370
371 #define RIGHT_BORDER_ITER \
372 inner_sum = outer_sum - *left++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000373 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000374 dptr += dst_x_stride; \
375 outer_sum = inner_sum;
376
377 x = 0;
378#ifdef UNROLL_SEPARABLE_LOOPS
379 for (; x < border - 16; x += 16) {
380 RIGHT_BORDER_ITER
381 RIGHT_BORDER_ITER
382 RIGHT_BORDER_ITER
383 RIGHT_BORDER_ITER
384 RIGHT_BORDER_ITER
385 RIGHT_BORDER_ITER
386 RIGHT_BORDER_ITER
387 RIGHT_BORDER_ITER
388 RIGHT_BORDER_ITER
389 RIGHT_BORDER_ITER
390 RIGHT_BORDER_ITER
391 RIGHT_BORDER_ITER
392 RIGHT_BORDER_ITER
393 RIGHT_BORDER_ITER
394 RIGHT_BORDER_ITER
395 RIGHT_BORDER_ITER
396 }
397#endif
humper@google.coma99a92c2013-02-20 16:42:06 +0000398 for (; x < border; ++x) {
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000399 RIGHT_BORDER_ITER
400 }
401#undef RIGHT_BORDER_ITER
402 SkASSERT(outer_sum == 0 && inner_sum == 0);
403 }
404 return new_width;
405}
406
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000407static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
408{
409 *loRadius = *hiRadius = SkScalarCeil(passRadius);
410 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
411 *loRadius = *hiRadius - 1;
412 }
413}
414
tomhudson@google.com01224d52011-11-28 18:22:01 +0000415// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
416// breakeven on Mac, and ~15% slowdown on Linux.
417// Reading a word at a time when bulding the sum buffer seems to give
418// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +0000419#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +0000420#define UNROLL_KERNEL_LOOP 1
421#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +0000422
reed@android.com45607672009-09-21 00:27:08 +0000423/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
424 src values at their position, plus all values above and to the left.
425 When we sample into this buffer, we need an initial row and column of 0s,
426 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000427
reed@android.com45607672009-09-21 00:27:08 +0000428 src[i, j] == sum[i+1, j+1]
429 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000430
reed@android.com45607672009-09-21 00:27:08 +0000431 We assume that the sum buffer's stride == its width
432 */
reed@google.com03016a32011-08-12 14:59:59 +0000433static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
434 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +0000435 int sumW = srcW + 1;
436
437 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000438 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000439 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000440
441 int x, y;
442
reed@android.com45607672009-09-21 00:27:08 +0000443 // zero out the top row and column
444 memset(sum, 0, sumW * sizeof(sum[0]));
445 sum += sumW;
446
reed@android.com8a1c16f2008-12-17 15:59:43 +0000447 // special case first row
448 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000449 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000450 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000451 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000452 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000453 }
454 src += srcRB;
455
456 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000457 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000458 uint32_t L = 0;
459 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000460 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000461
462 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
463 uint32_t T = sum[-sumW];
464 X = *src++ + L + T - C;
465 *sum++ = X;
466 L = X;
467 C = T;
468 }
469
470 for (; x >= 4; x-=4) {
471 uint32_t T = sum[-sumW];
472 X = *src++ + L + T - C;
473 *sum++ = X;
474 L = X;
475 C = T;
476 T = sum[-sumW];
477 X = *src++ + L + T - C;
478 *sum++ = X;
479 L = X;
480 C = T;
481 T = sum[-sumW];
482 X = *src++ + L + T - C;
483 *sum++ = X;
484 L = X;
485 C = T;
486 T = sum[-sumW];
487 X = *src++ + L + T - C;
488 *sum++ = X;
489 L = X;
490 C = T;
491 }
492
493 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000494 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000495 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000496 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000497 L = X;
498 C = T;
499 }
500 src += srcRB;
501 }
502}
503
reed@google.com03016a32011-08-12 14:59:59 +0000504/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000505 * This is the path for apply_kernel() to be taken when the kernel
506 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000507 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000508static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
509 int sw, int sh) {
510 SkASSERT(2*rx > sw);
511
reed@android.com8a1c16f2008-12-17 15:59:43 +0000512 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
513
reed@android.com45607672009-09-21 00:27:08 +0000514 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000515
516 int dw = sw + 2*rx;
517 int dh = sh + 2*ry;
518
reed@android.com45607672009-09-21 00:27:08 +0000519 int prev_y = -2*ry;
520 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000521
humper@google.coma99a92c2013-02-20 16:42:06 +0000522 for (int y = 0; y < dh; ++y) {
reed@android.com45607672009-09-21 00:27:08 +0000523 int py = SkClampPos(prev_y) * sumStride;
524 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000525
reed@android.com45607672009-09-21 00:27:08 +0000526 int prev_x = -2*rx;
527 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000528
humper@google.coma99a92c2013-02-20 16:42:06 +0000529 for (int x = 0; x < dw; ++x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000530 int px = SkClampPos(prev_x);
531 int nx = SkFastMin32(next_x, sw);
532
humper@google.coma99a92c2013-02-20 16:42:06 +0000533 // TODO: should we be adding 1/2 (1 << 23) to round to the
534 // nearest integer here?
reed@android.com45607672009-09-21 00:27:08 +0000535 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
536 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000537
538 prev_x += 1;
539 next_x += 1;
540 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000541
542 prev_y += 1;
543 next_y += 1;
544 }
545}
546/**
547 * sw and sh are the width and height of the src. Since the sum buffer
548 * matches that, but has an extra row and col at the beginning (with zeros),
549 * we can just use sw and sh as our "max" values for pinning coordinates
550 * when sampling into sum[][]
551 *
552 * The inner loop is conceptually simple; we break it into several sections
553 * to improve performance. Here's the original version:
humper@google.coma99a92c2013-02-20 16:42:06 +0000554 for (int x = 0; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000555 int px = SkClampPos(prev_x);
556 int nx = SkFastMin32(next_x, sw);
557
558 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
559 *dst++ = SkToU8(tmp * scale >> 24);
560
561 prev_x += 1;
562 next_x += 1;
563 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000564 * The sections are:
565 * left-hand section, where prev_x is clamped to 0
566 * center section, where neither prev_x nor next_x is clamped
567 * right-hand section, where next_x is clamped to sw
568 * On some operating systems, the center section is unrolled for additional
569 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000570*/
571static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
572 int sw, int sh) {
573 if (2*rx > sw) {
574 kernel_clamped(dst, rx, ry, sum, sw, sh);
575 return;
576 }
577
578 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
579
580 int sumStride = sw + 1;
581
582 int dw = sw + 2*rx;
583 int dh = sh + 2*ry;
584
585 int prev_y = -2*ry;
586 int next_y = 1;
587
588 SkASSERT(2*rx <= dw - 2*rx);
589
humper@google.coma99a92c2013-02-20 16:42:06 +0000590 for (int y = 0; y < dh; ++y) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000591 int py = SkClampPos(prev_y) * sumStride;
592 int ny = SkFastMin32(next_y, sh) * sumStride;
593
594 int prev_x = -2*rx;
595 int next_x = 1;
596 int x = 0;
597
humper@google.coma99a92c2013-02-20 16:42:06 +0000598 for (; x < 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000599 SkASSERT(prev_x <= 0);
600 SkASSERT(next_x <= sw);
601
602 int px = 0;
603 int nx = next_x;
604
605 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
606 *dst++ = SkToU8(tmp * scale >> 24);
607
608 prev_x += 1;
609 next_x += 1;
610 }
611
tomhudson@google.com01224d52011-11-28 18:22:01 +0000612 int i0 = prev_x + py;
613 int i1 = next_x + ny;
614 int i2 = next_x + py;
615 int i3 = prev_x + ny;
616
617#if UNROLL_KERNEL_LOOP
618 for (; x < dw - 2*rx - 4; x += 4) {
619 SkASSERT(prev_x >= 0);
620 SkASSERT(next_x <= sw);
621
622 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
623 *dst++ = SkToU8(tmp * scale >> 24);
624 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
625 *dst++ = SkToU8(tmp * scale >> 24);
626 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
627 *dst++ = SkToU8(tmp * scale >> 24);
628 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
629 *dst++ = SkToU8(tmp * scale >> 24);
630
631 prev_x += 4;
632 next_x += 4;
633 }
634#endif
635
humper@google.coma99a92c2013-02-20 16:42:06 +0000636 for (; x < dw - 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000637 SkASSERT(prev_x >= 0);
638 SkASSERT(next_x <= sw);
639
tomhudson@google.com01224d52011-11-28 18:22:01 +0000640 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000641 *dst++ = SkToU8(tmp * scale >> 24);
642
643 prev_x += 1;
644 next_x += 1;
645 }
646
humper@google.coma99a92c2013-02-20 16:42:06 +0000647 for (; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000648 SkASSERT(prev_x >= 0);
649 SkASSERT(next_x > sw);
650
651 int px = prev_x;
652 int nx = sw;
653
654 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
655 *dst++ = SkToU8(tmp * scale >> 24);
656
657 prev_x += 1;
658 next_x += 1;
659 }
660
reed@android.com8a1c16f2008-12-17 15:59:43 +0000661 prev_y += 1;
662 next_y += 1;
663 }
664}
665
reed@google.com03016a32011-08-12 14:59:59 +0000666/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000667 * This is the path for apply_kernel_interp() to be taken when the kernel
668 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000669 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000670static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
humper@google.coma99a92c2013-02-20 16:42:06 +0000671 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000672 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000673
humper@google.coma99a92c2013-02-20 16:42:06 +0000674 int innerWeight = 255 - outerWeight;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000675
676 // round these guys up if they're bigger than 127
humper@google.coma99a92c2013-02-20 16:42:06 +0000677 outerWeight += outerWeight >> 7;
678 innerWeight += innerWeight >> 7;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000679
humper@google.coma99a92c2013-02-20 16:42:06 +0000680 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
681 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000682
reed@android.com45607672009-09-21 00:27:08 +0000683 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000684
685 int dw = sw + 2*rx;
686 int dh = sh + 2*ry;
687
reed@android.com45607672009-09-21 00:27:08 +0000688 int prev_y = -2*ry;
689 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000690
humper@google.coma99a92c2013-02-20 16:42:06 +0000691 for (int y = 0; y < dh; ++y) {
reed@android.com45607672009-09-21 00:27:08 +0000692 int py = SkClampPos(prev_y) * sumStride;
693 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000694
reed@android.com45607672009-09-21 00:27:08 +0000695 int ipy = SkClampPos(prev_y + 1) * sumStride;
696 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000697
reed@android.com45607672009-09-21 00:27:08 +0000698 int prev_x = -2*rx;
699 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000700
humper@google.coma99a92c2013-02-20 16:42:06 +0000701 for (int x = 0; x < dw; ++x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000702 int px = SkClampPos(prev_x);
703 int nx = SkFastMin32(next_x, sw);
704
705 int ipx = SkClampPos(prev_x + 1);
706 int inx = SkClampMax(next_x - 1, sw);
707
humper@google.coma99a92c2013-02-20 16:42:06 +0000708 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000709 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000710 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000711 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000712 *dst++ = SkToU8((outerSum * outerScale
713 + innerSum * innerScale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000714
715 prev_x += 1;
716 next_x += 1;
717 }
718 prev_y += 1;
719 next_y += 1;
720 }
721}
722
tomhudson@google.com8caac642011-11-22 15:58:06 +0000723/**
724 * sw and sh are the width and height of the src. Since the sum buffer
725 * matches that, but has an extra row and col at the beginning (with zeros),
726 * we can just use sw and sh as our "max" values for pinning coordinates
727 * when sampling into sum[][]
728 *
729 * The inner loop is conceptually simple; we break it into several variants
730 * to improve performance. Here's the original version:
humper@google.coma99a92c2013-02-20 16:42:06 +0000731 for (int x = 0; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000732 int px = SkClampPos(prev_x);
733 int nx = SkFastMin32(next_x, sw);
734
735 int ipx = SkClampPos(prev_x + 1);
736 int inx = SkClampMax(next_x - 1, sw);
737
humper@google.coma99a92c2013-02-20 16:42:06 +0000738 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000739 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000740 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000741 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000742 *dst++ = SkToU8((outerSum * outerScale
743 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000744
745 prev_x += 1;
746 next_x += 1;
747 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000748 * The sections are:
749 * left-hand section, where prev_x is clamped to 0
750 * center section, where neither prev_x nor next_x is clamped
751 * right-hand section, where next_x is clamped to sw
752 * On some operating systems, the center section is unrolled for additional
753 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000754*/
755static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
humper@google.coma99a92c2013-02-20 16:42:06 +0000756 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000757 SkASSERT(rx > 0 && ry > 0);
humper@google.coma99a92c2013-02-20 16:42:06 +0000758 SkASSERT(outerWeight <= 255);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000759
760 if (2*rx > sw) {
humper@google.coma99a92c2013-02-20 16:42:06 +0000761 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outerWeight);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000762 return;
763 }
764
humper@google.coma99a92c2013-02-20 16:42:06 +0000765 int innerWeight = 255 - outerWeight;
tomhudson@google.com8caac642011-11-22 15:58:06 +0000766
767 // round these guys up if they're bigger than 127
humper@google.coma99a92c2013-02-20 16:42:06 +0000768 outerWeight += outerWeight >> 7;
769 innerWeight += innerWeight >> 7;
tomhudson@google.com8caac642011-11-22 15:58:06 +0000770
humper@google.coma99a92c2013-02-20 16:42:06 +0000771 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
772 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
tomhudson@google.com8caac642011-11-22 15:58:06 +0000773
774 int sumStride = sw + 1;
775
776 int dw = sw + 2*rx;
777 int dh = sh + 2*ry;
778
779 int prev_y = -2*ry;
780 int next_y = 1;
781
782 SkASSERT(2*rx <= dw - 2*rx);
783
humper@google.coma99a92c2013-02-20 16:42:06 +0000784 for (int y = 0; y < dh; ++y) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000785 int py = SkClampPos(prev_y) * sumStride;
786 int ny = SkFastMin32(next_y, sh) * sumStride;
787
788 int ipy = SkClampPos(prev_y + 1) * sumStride;
789 int iny = SkClampMax(next_y - 1, sh) * sumStride;
790
791 int prev_x = -2*rx;
792 int next_x = 1;
793 int x = 0;
794
humper@google.coma99a92c2013-02-20 16:42:06 +0000795 for (; x < 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000796 SkASSERT(prev_x < 0);
797 SkASSERT(next_x <= sw);
798
799 int px = 0;
800 int nx = next_x;
801
802 int ipx = 0;
803 int inx = next_x - 1;
804
humper@google.coma99a92c2013-02-20 16:42:06 +0000805 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000806 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000807 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000808 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000809 *dst++ = SkToU8((outerSum * outerScale
810 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000811
812 prev_x += 1;
813 next_x += 1;
814 }
815
tomhudson@google.com01224d52011-11-28 18:22:01 +0000816 int i0 = prev_x + py;
817 int i1 = next_x + ny;
818 int i2 = next_x + py;
819 int i3 = prev_x + ny;
820 int i4 = prev_x + 1 + ipy;
821 int i5 = next_x - 1 + iny;
822 int i6 = next_x - 1 + ipy;
823 int i7 = prev_x + 1 + iny;
824
825#if UNROLL_KERNEL_LOOP
826 for (; x < dw - 2*rx - 4; x += 4) {
827 SkASSERT(prev_x >= 0);
828 SkASSERT(next_x <= sw);
829
humper@google.coma99a92c2013-02-20 16:42:06 +0000830 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
831 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
832 *dst++ = SkToU8((outerSum * outerScale
833 + innerSum * innerScale) >> 24);
834 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
835 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
836 *dst++ = SkToU8((outerSum * outerScale
837 + innerSum * innerScale) >> 24);
838 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
839 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
840 *dst++ = SkToU8((outerSum * outerScale
841 + innerSum * innerScale) >> 24);
842 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
843 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
844 *dst++ = SkToU8((outerSum * outerScale
845 + innerSum * innerScale) >> 24);
tomhudson@google.com01224d52011-11-28 18:22:01 +0000846
847 prev_x += 4;
848 next_x += 4;
849 }
850#endif
851
humper@google.coma99a92c2013-02-20 16:42:06 +0000852 for (; x < dw - 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000853 SkASSERT(prev_x >= 0);
854 SkASSERT(next_x <= sw);
855
humper@google.coma99a92c2013-02-20 16:42:06 +0000856 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
857 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
858 *dst++ = SkToU8((outerSum * outerScale
859 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000860
861 prev_x += 1;
862 next_x += 1;
863 }
864
humper@google.coma99a92c2013-02-20 16:42:06 +0000865 for (; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000866 SkASSERT(prev_x >= 0);
867 SkASSERT(next_x > sw);
868
869 int px = prev_x;
870 int nx = sw;
871
872 int ipx = prev_x + 1;
873 int inx = sw;
874
humper@google.coma99a92c2013-02-20 16:42:06 +0000875 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000876 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000877 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000878 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000879 *dst++ = SkToU8((outerSum * outerScale
880 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000881
882 prev_x += 1;
883 next_x += 1;
884 }
885
886 prev_y += 1;
887 next_y += 1;
888 }
889}
890
reed@android.com8a1c16f2008-12-17 15:59:43 +0000891#include "SkColorPriv.h"
892
reed@android.com0e3c6642009-09-18 13:41:56 +0000893static void merge_src_with_blur(uint8_t dst[], int dstRB,
894 const uint8_t src[], int srcRB,
895 const uint8_t blur[], int blurRB,
896 int sw, int sh) {
897 dstRB -= sw;
898 srcRB -= sw;
899 blurRB -= sw;
900 while (--sh >= 0) {
901 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000902 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
903 dst += 1;
904 src += 1;
905 blur += 1;
906 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000907 dst += dstRB;
908 src += srcRB;
909 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000910 }
911}
912
913static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000914 const uint8_t src[], int srcRowBytes,
915 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000916 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000917 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000918 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000919 switch (style) {
920 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000921 for (x = sw - 1; x >= 0; --x) {
922 int s = *src;
923 int d = *dst;
924 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000925 dst += 1;
926 src += 1;
927 }
928 break;
929 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000930 for (x = sw - 1; x >= 0; --x) {
931 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000932 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000933 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000934 dst += 1;
935 src += 1;
936 }
937 break;
938 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000939 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000940 break;
941 }
942 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000943 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000944 }
945}
946
reed@google.com03016a32011-08-12 14:59:59 +0000947///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000948
bsalomon@google.com33cdbde2013-01-11 20:54:44 +0000949// we use a local function to wrap the class static method to work around
reed@android.com8a1c16f2008-12-17 15:59:43 +0000950// a bug in gcc98
951void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000952void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000953 SkMask::FreeImage(image);
954}
955
956bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000957 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000958 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000959{
humper@google.coma99a92c2013-02-20 16:42:06 +0000960
reed@google.com03016a32011-08-12 14:59:59 +0000961 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000962 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000963 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000964
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000965 // Force high quality off for small radii (performance)
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000966 if (radius < SkIntToScalar(3)) {
967 quality = kLow_Quality;
968 }
humper@google.coma99a92c2013-02-20 16:42:06 +0000969
970 // highQuality: use three box blur passes as a cheap way
971 // to approximate a Gaussian blur
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000972 int passCount = (kHigh_Quality == quality) ? 3 : 1;
humper@google.coma99a92c2013-02-20 16:42:06 +0000973 SkScalar passRadius = (kHigh_Quality == quality) ?
974 SkScalarMul( radius, kBlurRadiusFudgeFactor):
975 radius;
976
977#ifndef SK_IGNORE_BLUR_RADIUS_CORRECTNESS
978 // multiply the given radius by sqrt(2)/2 to convert
979 // from (2x) standard deviation to needed box width
980 const SkScalar radiusMultiplier = SkFloatToScalar(0.707f);
981 SkScalar boxWidth = SkScalarMul(passRadius, radiusMultiplier);
982 passRadius = SkScalarMul(boxWidth,SK_ScalarHalf) - SK_ScalarHalf;
983#endif
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000984
985 int rx = SkScalarCeil(passRadius);
humper@google.coma99a92c2013-02-20 16:42:06 +0000986 int outerWeight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000987
988 SkASSERT(rx >= 0);
humper@google.coma99a92c2013-02-20 16:42:06 +0000989 SkASSERT((unsigned)outerWeight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000990 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000991 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000992 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000993
994 int ry = rx; // only do square blur for now
995
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000996 int padx = passCount * rx;
997 int pady = passCount * ry;
humper@google.coma99a92c2013-02-20 16:42:06 +0000998
bungeman@google.com5af16f82011-09-02 15:06:44 +0000999 if (margin) {
1000 margin->set(padx, pady);
1001 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001002 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
1003 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
humper@google.coma99a92c2013-02-20 16:42:06 +00001004
reed@android.com49f0ff22009-03-19 21:52:42 +00001005 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +00001006 dst->fFormat = SkMask::kA8_Format;
1007 dst->fImage = NULL;
1008
reed@android.com0e3c6642009-09-18 13:41:56 +00001009 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +00001010 size_t dstSize = dst->computeImageSize();
1011 if (0 == dstSize) {
1012 return false; // too big to allocate, abort
1013 }
1014
reed@android.com8a1c16f2008-12-17 15:59:43 +00001015 int sw = src.fBounds.width();
1016 int sh = src.fBounds.height();
1017 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +00001018 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001019 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
1020
1021 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001022 if (separable) {
1023 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1024 uint8_t* tp = tmpBuffer.get();
1025 int w = sw, h = sh;
humper@google.coma99a92c2013-02-20 16:42:06 +00001026
1027 if (outerWeight == 255) {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +00001028 int loRadius, hiRadius;
1029 get_adjusted_radii(passRadius, &loRadius, &hiRadius);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001030 if (kHigh_Quality == quality) {
1031 // Do three X blurs, with a transpose on the final one.
1032 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
1033 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
1034 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
1035 // Do three Y blurs, with a transpose on the final one.
1036 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
1037 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
1038 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
1039 } else {
1040 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
1041 h = boxBlur(tp, h, dp, ry, ry, h, w, true);
1042 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +00001043 } else {
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001044 if (kHigh_Quality == quality) {
1045 // Do three X blurs, with a transpose on the final one.
humper@google.coma99a92c2013-02-20 16:42:06 +00001046 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
1047 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight);
1048 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001049 // Do three Y blurs, with a transpose on the final one.
humper@google.coma99a92c2013-02-20 16:42:06 +00001050 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight);
1051 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight);
1052 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001053 } else {
humper@google.coma99a92c2013-02-20 16:42:06 +00001054 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
1055 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001056 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001057 }
1058 } else {
reed@google.com03016a32011-08-12 14:59:59 +00001059 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
1060 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
1061 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001062 uint32_t* sumBuffer = storage.get();
1063
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001064 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +00001065 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
humper@google.coma99a92c2013-02-20 16:42:06 +00001066 if (outerWeight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001067 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +00001068 } else {
humper@google.coma99a92c2013-02-20 16:42:06 +00001069 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outerWeight);
reed@google.com03016a32011-08-12 14:59:59 +00001070 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001071
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001072 if (kHigh_Quality == quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001073 //pass2: dp is source, tmpBuffer is destination
1074 int tmp_sw = sw + 2 * rx;
1075 int tmp_sh = sh + 2 * ry;
1076 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1077 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
humper@google.coma99a92c2013-02-20 16:42:06 +00001078 if (outerWeight == 255)
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001079 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
1080 else
reed@google.com03016a32011-08-12 14:59:59 +00001081 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
humper@google.coma99a92c2013-02-20 16:42:06 +00001082 tmp_sw, tmp_sh, outerWeight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001083
1084 //pass3: tmpBuffer is source, dp is destination
1085 tmp_sw += 2 * rx;
1086 tmp_sh += 2 * ry;
1087 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
humper@google.coma99a92c2013-02-20 16:42:06 +00001088 if (outerWeight == 255)
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001089 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
1090 else
reed@google.com03016a32011-08-12 14:59:59 +00001091 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
humper@google.coma99a92c2013-02-20 16:42:06 +00001092 outerWeight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001093 }
reed@android.com8a1c16f2008-12-17 15:59:43 +00001094 }
1095
1096 dst->fImage = dp;
1097 // if need be, alloc the "real" dst (same size as src) and copy/merge
1098 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +00001099 if (style == kInner_Style) {
1100 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +00001101 size_t srcSize = src.computeImageSize();
1102 if (0 == srcSize) {
1103 return false; // too big to allocate, abort
1104 }
1105 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +00001106 merge_src_with_blur(dst->fImage, src.fRowBytes,
1107 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +00001108 dp + passCount * (rx + ry * dst->fRowBytes),
1109 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001110 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +00001111 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +00001112 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
1113 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001114 }
1115 (void)autoCall.detach();
1116 }
1117
reed@android.com0e3c6642009-09-18 13:41:56 +00001118 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001119 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +00001120 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +00001121 }
1122
reed@android.com8a1c16f2008-12-17 15:59:43 +00001123 return true;
1124}
1125
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001126bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
1127 SkScalar radius, Style style, Quality quality,
1128 SkIPoint* margin)
1129{
1130 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
1131}
1132
1133bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
1134 SkScalar radius, Style style, Quality quality,
1135 SkIPoint* margin)
1136{
1137 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
1138}
humper@google.com7c7292c2013-01-04 20:29:03 +00001139
1140/* Convolving a box with itself three times results in a piecewise
1141 quadratic function:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001142
humper@google.com7c7292c2013-01-04 20:29:03 +00001143 0 x <= -1.5
humper@google.coma99a92c2013-02-20 16:42:06 +00001144 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5
humper@google.com7c7292c2013-01-04 20:29:03 +00001145 3/4 - x^2 -.5 < x <= .5
1146 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5
1147 0 1.5 < x
humper@google.coma99a92c2013-02-20 16:42:06 +00001148
1149 Mathematica:
1150
1151 g[x_] := Piecewise [ {
1152 {9/8 + 3/2 x + 1/2 x^2 , -1.5 < x <= -.5},
1153 {3/4 - x^2 , -.5 < x <= .5},
1154 {9/8 - 3/2 x + 1/2 x^2 , 0.5 < x <= 1.5}
1155 }, 0]
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001156
humper@google.com7c7292c2013-01-04 20:29:03 +00001157 To get the profile curve of the blurred step function at the rectangle
1158 edge, we evaluate the indefinite integral, which is piecewise cubic:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001159
humper@google.com7c7292c2013-01-04 20:29:03 +00001160 0 x <= -1.5
humper@google.coma99a92c2013-02-20 16:42:06 +00001161 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5
humper@google.com7c7292c2013-01-04 20:29:03 +00001162 1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5
humper@google.coma99a92c2013-02-20 16:42:06 +00001163 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5
humper@google.com7c7292c2013-01-04 20:29:03 +00001164 1 1.5 < x
humper@google.coma99a92c2013-02-20 16:42:06 +00001165
1166 in Mathematica code:
1167
1168 gi[x_] := Piecewise[ {
1169 { 0 , x <= -1.5 },
1170 { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 },
1171 { 1/2 + 3/4 x - 1/3 x^3 , -.5 < x <= .5},
1172 { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3, .5 < x <= 1.5}
1173 },1]
humper@google.com7c7292c2013-01-04 20:29:03 +00001174*/
1175
humper@google.coma99a92c2013-02-20 16:42:06 +00001176static float gaussianIntegral(float x) {
1177 if (x > 1.5f) {
humper@google.com7c7292c2013-01-04 20:29:03 +00001178 return 0.0f;
1179 }
humper@google.coma99a92c2013-02-20 16:42:06 +00001180 if (x < -1.5f) {
humper@google.com7c7292c2013-01-04 20:29:03 +00001181 return 1.0f;
1182 }
1183
1184 float x2 = x*x;
1185 float x3 = x2*x;
1186
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001187 if ( x > 0.5f ) {
humper@google.coma99a92c2013-02-20 16:42:06 +00001188 return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001189 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001190 if ( x > -0.5f ) {
1191 return 0.5f - (0.75f * x - x3 / 3.0f);
humper@google.com7c7292c2013-01-04 20:29:03 +00001192 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001193 return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001194}
1195
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001196/*
humper@google.com7c7292c2013-01-04 20:29:03 +00001197 compute_profile allocates and fills in an array of floating
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001198 point values between 0 and 255 for the profile signature of
humper@google.com7c7292c2013-01-04 20:29:03 +00001199 a blurred half-plane with the given blur radius. Since we're
1200 going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
1201 all the time, we actually fill in the profile pre-inverted
1202 (already done 255-x).
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001203
humper@google.com7c7292c2013-01-04 20:29:03 +00001204 The function returns the size of the array allocated for the
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001205 profile. It's the responsibility of the caller to delete the
humper@google.com7c7292c2013-01-04 20:29:03 +00001206 memory returned in profile_out.
1207*/
1208
humper@google.coma99a92c2013-02-20 16:42:06 +00001209static int compute_profile(SkScalar radius, unsigned int **profile_out) {
1210 int size = SkScalarRoundToInt(radius * 3);
humper@google.com7c7292c2013-01-04 20:29:03 +00001211 int center = size >> 1;
1212
bsalomon@google.com33cdbde2013-01-11 20:54:44 +00001213 unsigned int *profile = SkNEW_ARRAY(unsigned int, size);
humper@google.com7c7292c2013-01-04 20:29:03 +00001214
humper@google.coma99a92c2013-02-20 16:42:06 +00001215 float invr = 1.f/radius;
humper@google.com7c7292c2013-01-04 20:29:03 +00001216
1217 profile[0] = 255;
humper@google.coma99a92c2013-02-20 16:42:06 +00001218 for (int x = 1 ; x < size ; ++x) {
1219 float scaled_x = (center - x - .5) * invr;
1220 float gi = gaussianIntegral(scaled_x);
1221 profile[x] = 255 - (uint8_t) (255.f * gi);
humper@google.com7c7292c2013-01-04 20:29:03 +00001222 }
1223
1224 *profile_out = profile;
1225 return size;
1226}
1227
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001228// TODO MAYBE: Maintain a profile cache to avoid recomputing this for
humper@google.com7c7292c2013-01-04 20:29:03 +00001229// commonly used radii. Consider baking some of the most common blur radii
1230// directly in as static data?
1231
1232// Implementation adapted from Michael Herf's approach:
1233// http://stereopsis.com/shadowrect/
1234
humper@google.coma99a92c2013-02-20 16:42:06 +00001235static inline unsigned int profile_lookup( unsigned int *profile, int loc, int blurred_width, int sharp_width ) {
1236 int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge?
1237 int ox = dx >> 1;
1238 if (ox < 0) {
1239 ox = 0;
1240 }
1241
1242 return profile[ox];
1243}
1244
humper@google.com7c7292c2013-01-04 20:29:03 +00001245bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src,
humper@google.coma99a92c2013-02-20 16:42:06 +00001246 SkScalar provided_radius, Style style,
humper@google.com7c7292c2013-01-04 20:29:03 +00001247 SkIPoint *margin) {
1248 int profile_size;
1249 unsigned int *profile;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001250
humper@google.com7c7292c2013-01-04 20:29:03 +00001251 float radius = SkScalarToFloat( SkScalarMul( provided_radius, kBlurRadiusFudgeFactor ) );
humper@google.coma99a92c2013-02-20 16:42:06 +00001252
1253#ifndef SK_IGNORE_BLUR_RADIUS_CORRECTNESS
1254 float stddev = SkScalarToFloat( radius ) /2.0f;
1255 radius = stddev * 1.414f;
1256#endif
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001257
humper@google.com7c7292c2013-01-04 20:29:03 +00001258 profile_size = compute_profile( radius, &profile );
humper@google.coma99a92c2013-02-20 16:42:06 +00001259
bsalomon@google.com33cdbde2013-01-11 20:54:44 +00001260 SkAutoTDeleteArray<unsigned int> ada(profile);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001261
humper@google.coma99a92c2013-02-20 16:42:06 +00001262 int pad = profile_size/2;
humper@google.com7c7292c2013-01-04 20:29:03 +00001263 if (margin) {
1264 margin->set( pad, pad );
1265 }
humper@google.coma99a92c2013-02-20 16:42:06 +00001266
1267 int shadow_left = -pad;
1268 int shadow_top = -pad;
1269 int shadow_right = src.width() + pad;
1270 int shadow_bottom = src.height() + pad;
1271
1272 dst->fBounds.set(shadow_left, shadow_top, shadow_right, shadow_bottom);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001273
humper@google.com7c7292c2013-01-04 20:29:03 +00001274 dst->fRowBytes = dst->fBounds.width();
1275 dst->fFormat = SkMask::kA8_Format;
1276 dst->fImage = NULL;
humper@google.coma99a92c2013-02-20 16:42:06 +00001277
humper@google.com7c7292c2013-01-04 20:29:03 +00001278 size_t dstSize = dst->computeImageSize();
1279 if (0 == dstSize) {
1280 return false; // too big to allocate, abort
1281 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001282
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001283 int sw = SkScalarFloorToInt(src.width());
1284 int sh = SkScalarFloorToInt(src.height());
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001285
humper@google.com7c7292c2013-01-04 20:29:03 +00001286 uint8_t* dp = SkMask::AllocImage(dstSize);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001287
humper@google.com7c7292c2013-01-04 20:29:03 +00001288 dst->fImage = dp;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001289
humper@google.coma99a92c2013-02-20 16:42:06 +00001290 int dstHeight = dst->fBounds.height();
1291 int dstWidth = dst->fBounds.width();
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001292
humper@google.com7c7292c2013-01-04 20:29:03 +00001293 // nearest odd number less than the profile size represents the center
1294 // of the (2x scaled) profile
1295 int center = ( profile_size & ~1 ) - 1;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001296
humper@google.com7c7292c2013-01-04 20:29:03 +00001297 int w = sw - center;
1298 int h = sh - center;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001299
humper@google.com7c7292c2013-01-04 20:29:03 +00001300 uint8_t *outptr = dp;
humper@google.coma99a92c2013-02-20 16:42:06 +00001301
1302 SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001303
humper@google.coma99a92c2013-02-20 16:42:06 +00001304 for (int x = 0 ; x < dstWidth ; ++x) {
1305 if (profile_size <= sw) {
1306 horizontalScanline[x] = profile_lookup(profile, x, dstWidth, w);
1307 } else {
1308 float span = float(sw)/radius;
1309 float giX = 1.5 - (x+.5)/radius;
1310 horizontalScanline[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span)));
1311 }
1312 }
1313
1314 for (int y = 0 ; y < dstHeight ; ++y) {
1315 unsigned int profile_y;
1316 if (profile_size <= sh) {
1317 profile_y = profile_lookup(profile, y, dstHeight, h);
1318 } else {
1319 float span = float(sh)/radius;
1320 float giY = 1.5 - (y+.5)/radius;
1321 profile_y = (uint8_t) (255 * (gaussianIntegral(giY) - gaussianIntegral(giY + span)));
1322 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001323
humper@google.coma99a92c2013-02-20 16:42:06 +00001324 for (int x = 0 ; x < dstWidth ; x++) {
1325 unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], profile_y);
humper@google.com7c7292c2013-01-04 20:29:03 +00001326 *(outptr++) = maskval;
1327 }
1328 }
humper@google.coma99a92c2013-02-20 16:42:06 +00001329
1330 if (style == kInner_Style) {
1331 // now we allocate the "real" dst, mirror the size of src
1332 size_t srcSize = src.width() * src.height();
1333 if (0 == srcSize) {
1334 return false; // too big to allocate, abort
1335 }
1336 dst->fImage = SkMask::AllocImage(srcSize);
1337 for (int y = 0 ; y < sh ; y++) {
1338 uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad;
1339 uint8_t *inner_scanline = dst->fImage + y*sw;
1340 memcpy(inner_scanline, blur_scanline, sw);
1341 }
1342 SkMask::FreeImage(dp);
1343
1344 dst->fBounds.set(0, 0, sw, sh); // restore trimmed bounds
1345 dst->fRowBytes = sw;
1346
1347 } else if (style == kOuter_Style) {
1348 for (int y = pad ; y < dstHeight-pad ; y++) {
1349 uint8_t *dst_scanline = dp + y*dstWidth + pad;
1350 memset(dst_scanline, 0, sw);
1351 }
1352 }
1353 // normal and solid styles are the same for analytic rect blurs, so don't
1354 // need to handle solid specially.
1355
1356 return true;
1357}
1358
1359// The "simple" blur is a direct implementation of separable convolution with a discrete
1360// gaussian kernel. It's "ground truth" in a sense; too slow to be used, but very
1361// useful for correctness comparisons.
1362
1363bool SkBlurMask::BlurGroundTruth(SkMask* dst, const SkMask& src, SkScalar provided_radius,
1364 Style style, SkIPoint* margin) {
1365
1366 if (src.fFormat != SkMask::kA8_Format) {
1367 return false;
1368 }
1369
1370 float radius = SkScalarToFloat(SkScalarMul(provided_radius, kBlurRadiusFudgeFactor));
1371 float stddev = SkScalarToFloat(radius) /2.0f;
1372 float variance = stddev * stddev;
1373
1374 int windowSize = SkScalarCeil(stddev*4);
1375 // round window size up to nearest odd number
1376 windowSize |= 1;
1377
1378 SkAutoTMalloc<float> gaussWindow(windowSize);
1379
1380 int halfWindow = windowSize >> 1;
1381
1382 gaussWindow[halfWindow] = 1;
1383
1384 float windowSum = 1;
1385 for (int x = 1 ; x <= halfWindow ; ++x) {
1386 float gaussian = expf(-x*x / variance);
1387 gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
1388 windowSum += 2*gaussian;
1389 }
1390
1391 // leave the filter un-normalized for now; we will divide by the normalization
1392 // sum later;
1393
1394 int pad = halfWindow;
1395 if (margin) {
1396 margin->set( pad, pad );
1397 }
1398
1399 dst->fBounds = src.fBounds;
1400 dst->fBounds.outset(pad, pad);
1401
1402 dst->fRowBytes = dst->fBounds.width();
1403 dst->fFormat = SkMask::kA8_Format;
1404 dst->fImage = NULL;
1405
1406 if (src.fImage) {
1407
1408 size_t dstSize = dst->computeImageSize();
1409 if (0 == dstSize) {
1410 return false; // too big to allocate, abort
1411 }
1412
1413 int srcWidth = src.fBounds.width();
1414 int srcHeight = src.fBounds.height();
1415 int dstWidth = dst->fBounds.width();
1416
1417 const uint8_t* srcPixels = src.fImage;
1418 uint8_t* dstPixels = SkMask::AllocImage(dstSize);
1419 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);
1420
1421 // do the actual blur. First, make a padded copy of the source.
1422 // use double pad so we never have to check if we're outside anything
1423
1424 int padWidth = srcWidth + 4*pad;
1425 int padHeight = srcHeight;
1426 int padSize = padWidth * padHeight;
1427
1428 SkAutoTMalloc<uint8_t> padPixels(padSize);
1429 memset(padPixels, 0, padSize);
1430
1431 for (int y = 0 ; y < srcHeight; ++y) {
1432 uint8_t* padptr = padPixels + y * padWidth + 2*pad;
1433 const uint8_t* srcptr = srcPixels + y * srcWidth;
1434 memcpy(padptr, srcptr, srcWidth);
1435 }
1436
1437 // blur in X, transposing the result into a temporary floating point buffer.
1438 // also double-pad the intermediate result so that the second blur doesn't
1439 // have to do extra conditionals.
1440
1441 int tmpWidth = padHeight + 4*pad;
1442 int tmpHeight = padWidth - 2*pad;
1443 int tmpSize = tmpWidth * tmpHeight;
1444
1445 SkAutoTMalloc<float> tmpImage(tmpSize);
1446 memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));
1447
1448 for (int y = 0 ; y < padHeight ; ++y) {
1449 uint8_t *srcScanline = padPixels + y*padWidth;
1450 for (int x = pad ; x < padWidth - pad ; ++x) {
1451 float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
1452 uint8_t *windowCenter = srcScanline + x;
1453 for (int i = -pad ; i <= pad ; ++i) {
1454 *outPixel += gaussWindow[pad+i]*windowCenter[i];
1455 }
1456 *outPixel /= windowSum;
1457 }
1458 }
1459
1460 // blur in Y; now filling in the actual desired destination. We have to do
1461 // the transpose again; these transposes guarantee that we read memory in
1462 // linear order.
1463
1464 for (int y = 0 ; y < tmpHeight ; ++y) {
1465 float *srcScanline = tmpImage + y*tmpWidth;
1466 for (int x = pad ; x < tmpWidth - pad ; ++x) {
1467 float *windowCenter = srcScanline + x;
1468 float finalValue = 0;
1469 for (int i = -pad ; i <= pad ; ++i) {
1470 finalValue += gaussWindow[pad+i]*windowCenter[i];
1471 }
1472 finalValue /= windowSum;
1473 uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
1474 int integerPixel = int(finalValue + 0.5f);
1475 *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
1476 }
1477 }
1478
1479 dst->fImage = dstPixels;
1480 // if need be, alloc the "real" dst (same size as src) and copy/merge
1481 // the blur into it (applying the src)
1482 if (style == kInner_Style) {
1483 // now we allocate the "real" dst, mirror the size of src
1484 size_t srcSize = src.computeImageSize();
1485 if (0 == srcSize) {
1486 return false; // too big to allocate, abort
1487 }
1488 dst->fImage = SkMask::AllocImage(srcSize);
1489 merge_src_with_blur(dst->fImage, src.fRowBytes,
1490 srcPixels, src.fRowBytes,
1491 dstPixels + pad*dst->fRowBytes + pad,
1492 dst->fRowBytes, srcWidth, srcHeight);
1493 SkMask::FreeImage(dstPixels);
1494 } else if (style != kNormal_Style) {
1495 clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
1496 dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
1497 }
1498 (void)autoCall.detach();
1499 }
1500
1501 if (style == kInner_Style) {
1502 dst->fBounds = src.fBounds; // restore trimmed bounds
1503 dst->fRowBytes = src.fRowBytes;
1504 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001505
humper@google.com7c7292c2013-01-04 20:29:03 +00001506 return true;
1507}