blob: d220a5436e0389ebc5aed02e1204f7b220313585 [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000015// scale factor for the blur radius to match the behavior of the all existing blur
humper@google.com7c7292c2013-01-04 20:29:03 +000016// code (both on the CPU and the GPU). This magic constant is 1/sqrt(3).
17
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +000018// TODO: get rid of this fudge factor and move any required fudging up into
humper@google.com7c7292c2013-01-04 20:29:03 +000019// the calling library
20
21#define kBlurRadiusFudgeFactor SkFloatToScalar( .57735f )
22
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000023#define UNROLL_SEPARABLE_LOOPS
24
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000025/**
26 * This function performs a box blur in X, of the given radius. If the
skia.committer@gmail.com884e60b2012-11-16 02:01:17 +000027 * "transpose" parameter is true, it will transpose the pixels on write,
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000028 * such that X and Y are swapped. Reads are always performed from contiguous
29 * memory in X, for speed. The destination buffer (dst) must be at least
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000030 * (width + leftRadius + rightRadius) * height bytes in size.
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000031 *
32 * This is what the inner loop looks like before unrolling, and with the two
33 * cases broken out separately (width < diameter, width >= diameter):
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +000034 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000035 * if (width < diameter) {
36 * for (int x = 0; x < width; ++x) {
37 * sum += *right++;
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +000038 * *dptr = (sum * scale + half) >> 24;
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000039 * dptr += dst_x_stride;
40 * }
41 * for (int x = width; x < diameter; ++x) {
42 * *dptr = (sum * scale + half) >> 24;
43 * dptr += dst_x_stride;
44 * }
45 * for (int x = 0; x < width; ++x) {
46 * *dptr = (sum * scale + half) >> 24;
47 * sum -= *left++;
48 * dptr += dst_x_stride;
49 * }
50 * } else {
51 * for (int x = 0; x < diameter; ++x) {
52 * sum += *right++;
53 * *dptr = (sum * scale + half) >> 24;
54 * dptr += dst_x_stride;
55 * }
56 * for (int x = diameter; x < width; ++x) {
57 * sum += *right++;
58 * *dptr = (sum * scale + half) >> 24;
59 * sum -= *left++;
60 * dptr += dst_x_stride;
61 * }
62 * for (int x = 0; x < diameter; ++x) {
63 * *dptr = (sum * scale + half) >> 24;
64 * sum -= *left++;
65 * dptr += dst_x_stride;
66 * }
67 * }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000068 */
69static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000070 int leftRadius, int rightRadius, int width, int height,
71 bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000072{
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000073 int diameter = leftRadius + rightRadius;
74 int kernelSize = diameter + 1;
75 int border = SkMin32(width, diameter);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000076 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000077 int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000078 int dst_x_stride = transpose ? height : 1;
79 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000080#ifndef SK_DISABLE_BLUR_ROUNDING
81 uint32_t half = 1 << 23;
82#else
83 uint32_t half = 0;
84#endif
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000085 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000086 uint32_t sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000087 uint8_t* dptr = dst + y * dst_y_stride;
88 const uint8_t* right = src + y * src_y_stride;
89 const uint8_t* left = right;
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +000090 for (int x = 0; x < rightRadius - leftRadius; x++) {
91 *dptr = 0;
92 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000093 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000094#define LEFT_BORDER_ITER \
95 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +000096 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000097 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +000098
99 int x = 0;
100#ifdef UNROLL_SEPARABLE_LOOPS
101 for (; x < border - 16; x += 16) {
102 LEFT_BORDER_ITER
103 LEFT_BORDER_ITER
104 LEFT_BORDER_ITER
105 LEFT_BORDER_ITER
106 LEFT_BORDER_ITER
107 LEFT_BORDER_ITER
108 LEFT_BORDER_ITER
109 LEFT_BORDER_ITER
110 LEFT_BORDER_ITER
111 LEFT_BORDER_ITER
112 LEFT_BORDER_ITER
113 LEFT_BORDER_ITER
114 LEFT_BORDER_ITER
115 LEFT_BORDER_ITER
116 LEFT_BORDER_ITER
117 LEFT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000118 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000119#endif
120 for (; x < border; ++x) {
121 LEFT_BORDER_ITER
122 }
123#undef LEFT_BORDER_ITER
124#define TRIVIAL_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000125 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000126 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000127 x = width;
128#ifdef UNROLL_SEPARABLE_LOOPS
129 for (; x < diameter - 16; x += 16) {
130 TRIVIAL_ITER
131 TRIVIAL_ITER
132 TRIVIAL_ITER
133 TRIVIAL_ITER
134 TRIVIAL_ITER
135 TRIVIAL_ITER
136 TRIVIAL_ITER
137 TRIVIAL_ITER
138 TRIVIAL_ITER
139 TRIVIAL_ITER
140 TRIVIAL_ITER
141 TRIVIAL_ITER
142 TRIVIAL_ITER
143 TRIVIAL_ITER
144 TRIVIAL_ITER
145 TRIVIAL_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000146 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000147#endif
148 for (; x < diameter; ++x) {
149 TRIVIAL_ITER
150 }
151#undef TRIVIAL_ITER
152#define CENTER_ITER \
153 sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000154 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000155 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000156 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000157
158 x = diameter;
159#ifdef UNROLL_SEPARABLE_LOOPS
160 for (; x < width - 16; x += 16) {
161 CENTER_ITER
162 CENTER_ITER
163 CENTER_ITER
164 CENTER_ITER
165 CENTER_ITER
166 CENTER_ITER
167 CENTER_ITER
168 CENTER_ITER
169 CENTER_ITER
170 CENTER_ITER
171 CENTER_ITER
172 CENTER_ITER
173 CENTER_ITER
174 CENTER_ITER
175 CENTER_ITER
176 CENTER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000177 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000178#endif
179 for (; x < width; ++x) {
180 CENTER_ITER
181 }
182#undef CENTER_ITER
183#define RIGHT_BORDER_ITER \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000184 *dptr = (sum * scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000185 sum -= *left++; \
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000186 dptr += dst_x_stride;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000187
188 x = 0;
189#ifdef UNROLL_SEPARABLE_LOOPS
190 for (; x < border - 16; x += 16) {
191 RIGHT_BORDER_ITER
192 RIGHT_BORDER_ITER
193 RIGHT_BORDER_ITER
194 RIGHT_BORDER_ITER
195 RIGHT_BORDER_ITER
196 RIGHT_BORDER_ITER
197 RIGHT_BORDER_ITER
198 RIGHT_BORDER_ITER
199 RIGHT_BORDER_ITER
200 RIGHT_BORDER_ITER
201 RIGHT_BORDER_ITER
202 RIGHT_BORDER_ITER
203 RIGHT_BORDER_ITER
204 RIGHT_BORDER_ITER
205 RIGHT_BORDER_ITER
206 RIGHT_BORDER_ITER
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000207 }
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000208#endif
209 for (; x < border; ++x) {
210 RIGHT_BORDER_ITER
211 }
212#undef RIGHT_BORDER_ITER
humper@google.coma99a92c2013-02-20 16:42:06 +0000213 for (int x = 0; x < leftRadius - rightRadius; ++x) {
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +0000214 *dptr = 0;
215 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000216 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000217 SkASSERT(sum == 0);
218 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000219 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000220}
221
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000222/**
223 * This variant of the box blur handles blurring of non-integer radii. It
224 * keeps two running sums: an outer sum for the rounded-up kernel radius, and
225 * an inner sum for the rounded-down kernel radius. For each pixel, it linearly
226 * interpolates between them. In float this would be:
227 * outer_weight * outer_sum / kernelSize +
228 * (1.0 - outer_weight) * innerSum / (kernelSize - 2)
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +0000229 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000230 * This is what the inner loop looks like before unrolling, and with the two
231 * cases broken out separately (width < diameter, width >= diameter):
skia.committer@gmail.com76bf70d2013-02-20 07:02:30 +0000232 *
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000233 * if (width < diameter) {
234 * for (int x = 0; x < width; x++) {
235 * inner_sum = outer_sum;
236 * outer_sum += *right++;
237 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
238 * dptr += dst_x_stride;
239 * }
240 * for (int x = width; x < diameter; ++x) {
241 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
242 * dptr += dst_x_stride;
243 * }
244 * for (int x = 0; x < width; x++) {
245 * inner_sum = outer_sum - *left++;
246 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
247 * dptr += dst_x_stride;
248 * outer_sum = inner_sum;
249 * }
250 * } else {
251 * for (int x = 0; x < diameter; x++) {
252 * inner_sum = outer_sum;
253 * outer_sum += *right++;
254 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
255 * dptr += dst_x_stride;
256 * }
257 * for (int x = diameter; x < width; ++x) {
258 * inner_sum = outer_sum - *left;
259 * outer_sum += *right++;
260 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
261 * dptr += dst_x_stride;
262 * outer_sum -= *left++;
263 * }
264 * for (int x = 0; x < diameter; x++) {
265 * inner_sum = outer_sum - *left++;
266 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
267 * dptr += dst_x_stride;
268 * outer_sum = inner_sum;
269 * }
270 * }
271 * }
272 * return new_width;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000273 */
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000274
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000275static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
276 int radius, int width, int height,
277 bool transpose, uint8_t outer_weight)
278{
279 int diameter = radius * 2;
280 int kernelSize = diameter + 1;
281 int border = SkMin32(width, diameter);
282 int inner_weight = 255 - outer_weight;
283 outer_weight += outer_weight >> 7;
284 inner_weight += inner_weight >> 7;
285 uint32_t outer_scale = (outer_weight << 16) / kernelSize;
286 uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000287#ifndef SK_DISABLE_BLUR_ROUNDING
288 uint32_t half = 1 << 23;
289#else
290 uint32_t half = 0;
291#endif
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000292 int new_width = width + diameter;
293 int dst_x_stride = transpose ? height : 1;
294 int dst_y_stride = transpose ? 1 : new_width;
295 for (int y = 0; y < height; ++y) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000296 uint32_t outer_sum = 0, inner_sum = 0;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000297 uint8_t* dptr = dst + y * dst_y_stride;
298 const uint8_t* right = src + y * src_y_stride;
299 const uint8_t* left = right;
300 int x = 0;
301
302#define LEFT_BORDER_ITER \
303 inner_sum = outer_sum; \
304 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000305 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000306 dptr += dst_x_stride;
307
308#ifdef UNROLL_SEPARABLE_LOOPS
309 for (;x < border - 16; x += 16) {
310 LEFT_BORDER_ITER
311 LEFT_BORDER_ITER
312 LEFT_BORDER_ITER
313 LEFT_BORDER_ITER
314 LEFT_BORDER_ITER
315 LEFT_BORDER_ITER
316 LEFT_BORDER_ITER
317 LEFT_BORDER_ITER
318 LEFT_BORDER_ITER
319 LEFT_BORDER_ITER
320 LEFT_BORDER_ITER
321 LEFT_BORDER_ITER
322 LEFT_BORDER_ITER
323 LEFT_BORDER_ITER
324 LEFT_BORDER_ITER
325 LEFT_BORDER_ITER
326 }
327#endif
328
humper@google.coma99a92c2013-02-20 16:42:06 +0000329 for (;x < border; ++x) {
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000330 LEFT_BORDER_ITER
331 }
332#undef LEFT_BORDER_ITER
333 for (int x = width; x < diameter; ++x) {
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000334 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000335 dptr += dst_x_stride;
336 }
337 x = diameter;
338
339#define CENTER_ITER \
340 inner_sum = outer_sum - *left; \
341 outer_sum += *right++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000342 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000343 dptr += dst_x_stride; \
344 outer_sum -= *left++;
345
346#ifdef UNROLL_SEPARABLE_LOOPS
347 for (; x < width - 16; x += 16) {
348 CENTER_ITER
349 CENTER_ITER
350 CENTER_ITER
351 CENTER_ITER
352 CENTER_ITER
353 CENTER_ITER
354 CENTER_ITER
355 CENTER_ITER
356 CENTER_ITER
357 CENTER_ITER
358 CENTER_ITER
359 CENTER_ITER
360 CENTER_ITER
361 CENTER_ITER
362 CENTER_ITER
363 CENTER_ITER
364 }
365#endif
366 for (; x < width; ++x) {
367 CENTER_ITER
368 }
369#undef CENTER_ITER
370
371 #define RIGHT_BORDER_ITER \
372 inner_sum = outer_sum - *left++; \
senorblanco@chromium.org4a525d72013-02-19 16:09:10 +0000373 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000374 dptr += dst_x_stride; \
375 outer_sum = inner_sum;
376
377 x = 0;
378#ifdef UNROLL_SEPARABLE_LOOPS
379 for (; x < border - 16; x += 16) {
380 RIGHT_BORDER_ITER
381 RIGHT_BORDER_ITER
382 RIGHT_BORDER_ITER
383 RIGHT_BORDER_ITER
384 RIGHT_BORDER_ITER
385 RIGHT_BORDER_ITER
386 RIGHT_BORDER_ITER
387 RIGHT_BORDER_ITER
388 RIGHT_BORDER_ITER
389 RIGHT_BORDER_ITER
390 RIGHT_BORDER_ITER
391 RIGHT_BORDER_ITER
392 RIGHT_BORDER_ITER
393 RIGHT_BORDER_ITER
394 RIGHT_BORDER_ITER
395 RIGHT_BORDER_ITER
396 }
397#endif
humper@google.coma99a92c2013-02-20 16:42:06 +0000398 for (; x < border; ++x) {
senorblanco@chromium.org9b0d4d72012-11-27 22:57:41 +0000399 RIGHT_BORDER_ITER
400 }
401#undef RIGHT_BORDER_ITER
402 SkASSERT(outer_sum == 0 && inner_sum == 0);
403 }
404 return new_width;
405}
406
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000407static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
408{
409 *loRadius = *hiRadius = SkScalarCeil(passRadius);
410 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
411 *loRadius = *hiRadius - 1;
412 }
413}
414
tomhudson@google.com01224d52011-11-28 18:22:01 +0000415// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
416// breakeven on Mac, and ~15% slowdown on Linux.
417// Reading a word at a time when bulding the sum buffer seems to give
418// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +0000419#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +0000420#define UNROLL_KERNEL_LOOP 1
421#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +0000422
reed@android.com45607672009-09-21 00:27:08 +0000423/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
424 src values at their position, plus all values above and to the left.
425 When we sample into this buffer, we need an initial row and column of 0s,
426 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000427
reed@android.com45607672009-09-21 00:27:08 +0000428 src[i, j] == sum[i+1, j+1]
429 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000430
reed@android.com45607672009-09-21 00:27:08 +0000431 We assume that the sum buffer's stride == its width
432 */
reed@google.com03016a32011-08-12 14:59:59 +0000433static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
434 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +0000435 int sumW = srcW + 1;
436
437 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000438 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000439 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000440
441 int x, y;
442
reed@android.com45607672009-09-21 00:27:08 +0000443 // zero out the top row and column
444 memset(sum, 0, sumW * sizeof(sum[0]));
445 sum += sumW;
446
reed@android.com8a1c16f2008-12-17 15:59:43 +0000447 // special case first row
448 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000449 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000450 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000451 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000452 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000453 }
454 src += srcRB;
455
456 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000457 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000458 uint32_t L = 0;
459 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000460 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000461
462 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
463 uint32_t T = sum[-sumW];
464 X = *src++ + L + T - C;
465 *sum++ = X;
466 L = X;
467 C = T;
468 }
469
470 for (; x >= 4; x-=4) {
471 uint32_t T = sum[-sumW];
472 X = *src++ + L + T - C;
473 *sum++ = X;
474 L = X;
475 C = T;
476 T = sum[-sumW];
477 X = *src++ + L + T - C;
478 *sum++ = X;
479 L = X;
480 C = T;
481 T = sum[-sumW];
482 X = *src++ + L + T - C;
483 *sum++ = X;
484 L = X;
485 C = T;
486 T = sum[-sumW];
487 X = *src++ + L + T - C;
488 *sum++ = X;
489 L = X;
490 C = T;
491 }
492
493 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000494 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000495 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000496 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000497 L = X;
498 C = T;
499 }
500 src += srcRB;
501 }
502}
503
reed@google.com03016a32011-08-12 14:59:59 +0000504/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000505 * This is the path for apply_kernel() to be taken when the kernel
506 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000507 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000508static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
509 int sw, int sh) {
510 SkASSERT(2*rx > sw);
511
reed@android.com8a1c16f2008-12-17 15:59:43 +0000512 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
513
reed@android.com45607672009-09-21 00:27:08 +0000514 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000515
516 int dw = sw + 2*rx;
517 int dh = sh + 2*ry;
518
reed@android.com45607672009-09-21 00:27:08 +0000519 int prev_y = -2*ry;
520 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000521
humper@google.coma99a92c2013-02-20 16:42:06 +0000522 for (int y = 0; y < dh; ++y) {
reed@android.com45607672009-09-21 00:27:08 +0000523 int py = SkClampPos(prev_y) * sumStride;
524 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000525
reed@android.com45607672009-09-21 00:27:08 +0000526 int prev_x = -2*rx;
527 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000528
humper@google.coma99a92c2013-02-20 16:42:06 +0000529 for (int x = 0; x < dw; ++x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000530 int px = SkClampPos(prev_x);
531 int nx = SkFastMin32(next_x, sw);
532
humper@google.coma99a92c2013-02-20 16:42:06 +0000533 // TODO: should we be adding 1/2 (1 << 23) to round to the
534 // nearest integer here?
reed@android.com45607672009-09-21 00:27:08 +0000535 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
536 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000537
538 prev_x += 1;
539 next_x += 1;
540 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000541
542 prev_y += 1;
543 next_y += 1;
544 }
545}
546/**
547 * sw and sh are the width and height of the src. Since the sum buffer
548 * matches that, but has an extra row and col at the beginning (with zeros),
549 * we can just use sw and sh as our "max" values for pinning coordinates
550 * when sampling into sum[][]
551 *
552 * The inner loop is conceptually simple; we break it into several sections
553 * to improve performance. Here's the original version:
humper@google.coma99a92c2013-02-20 16:42:06 +0000554 for (int x = 0; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000555 int px = SkClampPos(prev_x);
556 int nx = SkFastMin32(next_x, sw);
557
558 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
559 *dst++ = SkToU8(tmp * scale >> 24);
560
561 prev_x += 1;
562 next_x += 1;
563 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000564 * The sections are:
565 * left-hand section, where prev_x is clamped to 0
566 * center section, where neither prev_x nor next_x is clamped
567 * right-hand section, where next_x is clamped to sw
568 * On some operating systems, the center section is unrolled for additional
569 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000570*/
571static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
572 int sw, int sh) {
573 if (2*rx > sw) {
574 kernel_clamped(dst, rx, ry, sum, sw, sh);
575 return;
576 }
577
578 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
579
580 int sumStride = sw + 1;
581
582 int dw = sw + 2*rx;
583 int dh = sh + 2*ry;
584
585 int prev_y = -2*ry;
586 int next_y = 1;
587
588 SkASSERT(2*rx <= dw - 2*rx);
589
humper@google.coma99a92c2013-02-20 16:42:06 +0000590 for (int y = 0; y < dh; ++y) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000591 int py = SkClampPos(prev_y) * sumStride;
592 int ny = SkFastMin32(next_y, sh) * sumStride;
593
594 int prev_x = -2*rx;
595 int next_x = 1;
596 int x = 0;
597
humper@google.coma99a92c2013-02-20 16:42:06 +0000598 for (; x < 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000599 SkASSERT(prev_x <= 0);
600 SkASSERT(next_x <= sw);
601
602 int px = 0;
603 int nx = next_x;
604
605 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
606 *dst++ = SkToU8(tmp * scale >> 24);
607
608 prev_x += 1;
609 next_x += 1;
610 }
611
tomhudson@google.com01224d52011-11-28 18:22:01 +0000612 int i0 = prev_x + py;
613 int i1 = next_x + ny;
614 int i2 = next_x + py;
615 int i3 = prev_x + ny;
616
617#if UNROLL_KERNEL_LOOP
618 for (; x < dw - 2*rx - 4; x += 4) {
619 SkASSERT(prev_x >= 0);
620 SkASSERT(next_x <= sw);
621
622 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
623 *dst++ = SkToU8(tmp * scale >> 24);
624 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
625 *dst++ = SkToU8(tmp * scale >> 24);
626 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
627 *dst++ = SkToU8(tmp * scale >> 24);
628 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
629 *dst++ = SkToU8(tmp * scale >> 24);
630
631 prev_x += 4;
632 next_x += 4;
633 }
634#endif
635
humper@google.coma99a92c2013-02-20 16:42:06 +0000636 for (; x < dw - 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000637 SkASSERT(prev_x >= 0);
638 SkASSERT(next_x <= sw);
639
tomhudson@google.com01224d52011-11-28 18:22:01 +0000640 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000641 *dst++ = SkToU8(tmp * scale >> 24);
642
643 prev_x += 1;
644 next_x += 1;
645 }
646
humper@google.coma99a92c2013-02-20 16:42:06 +0000647 for (; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000648 SkASSERT(prev_x >= 0);
649 SkASSERT(next_x > sw);
650
651 int px = prev_x;
652 int nx = sw;
653
654 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
655 *dst++ = SkToU8(tmp * scale >> 24);
656
657 prev_x += 1;
658 next_x += 1;
659 }
660
reed@android.com8a1c16f2008-12-17 15:59:43 +0000661 prev_y += 1;
662 next_y += 1;
663 }
664}
665
reed@google.com03016a32011-08-12 14:59:59 +0000666/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000667 * This is the path for apply_kernel_interp() to be taken when the kernel
668 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000669 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000670static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
humper@google.coma99a92c2013-02-20 16:42:06 +0000671 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000672 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000673
humper@google.coma99a92c2013-02-20 16:42:06 +0000674 int innerWeight = 255 - outerWeight;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000675
676 // round these guys up if they're bigger than 127
humper@google.coma99a92c2013-02-20 16:42:06 +0000677 outerWeight += outerWeight >> 7;
678 innerWeight += innerWeight >> 7;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000679
humper@google.coma99a92c2013-02-20 16:42:06 +0000680 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
681 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000682
reed@android.com45607672009-09-21 00:27:08 +0000683 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000684
685 int dw = sw + 2*rx;
686 int dh = sh + 2*ry;
687
reed@android.com45607672009-09-21 00:27:08 +0000688 int prev_y = -2*ry;
689 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000690
humper@google.coma99a92c2013-02-20 16:42:06 +0000691 for (int y = 0; y < dh; ++y) {
reed@android.com45607672009-09-21 00:27:08 +0000692 int py = SkClampPos(prev_y) * sumStride;
693 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000694
reed@android.com45607672009-09-21 00:27:08 +0000695 int ipy = SkClampPos(prev_y + 1) * sumStride;
696 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000697
reed@android.com45607672009-09-21 00:27:08 +0000698 int prev_x = -2*rx;
699 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000700
humper@google.coma99a92c2013-02-20 16:42:06 +0000701 for (int x = 0; x < dw; ++x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000702 int px = SkClampPos(prev_x);
703 int nx = SkFastMin32(next_x, sw);
704
705 int ipx = SkClampPos(prev_x + 1);
706 int inx = SkClampMax(next_x - 1, sw);
707
humper@google.coma99a92c2013-02-20 16:42:06 +0000708 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000709 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000710 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000711 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000712 *dst++ = SkToU8((outerSum * outerScale
713 + innerSum * innerScale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000714
715 prev_x += 1;
716 next_x += 1;
717 }
718 prev_y += 1;
719 next_y += 1;
720 }
721}
722
tomhudson@google.com8caac642011-11-22 15:58:06 +0000723/**
724 * sw and sh are the width and height of the src. Since the sum buffer
725 * matches that, but has an extra row and col at the beginning (with zeros),
726 * we can just use sw and sh as our "max" values for pinning coordinates
727 * when sampling into sum[][]
728 *
729 * The inner loop is conceptually simple; we break it into several variants
730 * to improve performance. Here's the original version:
humper@google.coma99a92c2013-02-20 16:42:06 +0000731 for (int x = 0; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000732 int px = SkClampPos(prev_x);
733 int nx = SkFastMin32(next_x, sw);
734
735 int ipx = SkClampPos(prev_x + 1);
736 int inx = SkClampMax(next_x - 1, sw);
737
humper@google.coma99a92c2013-02-20 16:42:06 +0000738 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000739 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000740 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000741 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000742 *dst++ = SkToU8((outerSum * outerScale
743 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000744
745 prev_x += 1;
746 next_x += 1;
747 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000748 * The sections are:
749 * left-hand section, where prev_x is clamped to 0
750 * center section, where neither prev_x nor next_x is clamped
751 * right-hand section, where next_x is clamped to sw
752 * On some operating systems, the center section is unrolled for additional
753 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000754*/
755static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
humper@google.coma99a92c2013-02-20 16:42:06 +0000756 const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000757 SkASSERT(rx > 0 && ry > 0);
humper@google.coma99a92c2013-02-20 16:42:06 +0000758 SkASSERT(outerWeight <= 255);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000759
760 if (2*rx > sw) {
humper@google.coma99a92c2013-02-20 16:42:06 +0000761 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outerWeight);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000762 return;
763 }
764
humper@google.coma99a92c2013-02-20 16:42:06 +0000765 int innerWeight = 255 - outerWeight;
tomhudson@google.com8caac642011-11-22 15:58:06 +0000766
767 // round these guys up if they're bigger than 127
humper@google.coma99a92c2013-02-20 16:42:06 +0000768 outerWeight += outerWeight >> 7;
769 innerWeight += innerWeight >> 7;
tomhudson@google.com8caac642011-11-22 15:58:06 +0000770
humper@google.coma99a92c2013-02-20 16:42:06 +0000771 uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
772 uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));
tomhudson@google.com8caac642011-11-22 15:58:06 +0000773
774 int sumStride = sw + 1;
775
776 int dw = sw + 2*rx;
777 int dh = sh + 2*ry;
778
779 int prev_y = -2*ry;
780 int next_y = 1;
781
782 SkASSERT(2*rx <= dw - 2*rx);
783
humper@google.coma99a92c2013-02-20 16:42:06 +0000784 for (int y = 0; y < dh; ++y) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000785 int py = SkClampPos(prev_y) * sumStride;
786 int ny = SkFastMin32(next_y, sh) * sumStride;
787
788 int ipy = SkClampPos(prev_y + 1) * sumStride;
789 int iny = SkClampMax(next_y - 1, sh) * sumStride;
790
791 int prev_x = -2*rx;
792 int next_x = 1;
793 int x = 0;
794
humper@google.coma99a92c2013-02-20 16:42:06 +0000795 for (; x < 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000796 SkASSERT(prev_x < 0);
797 SkASSERT(next_x <= sw);
798
799 int px = 0;
800 int nx = next_x;
801
802 int ipx = 0;
803 int inx = next_x - 1;
804
humper@google.coma99a92c2013-02-20 16:42:06 +0000805 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000806 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000807 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000808 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000809 *dst++ = SkToU8((outerSum * outerScale
810 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000811
812 prev_x += 1;
813 next_x += 1;
814 }
815
tomhudson@google.com01224d52011-11-28 18:22:01 +0000816 int i0 = prev_x + py;
817 int i1 = next_x + ny;
818 int i2 = next_x + py;
819 int i3 = prev_x + ny;
820 int i4 = prev_x + 1 + ipy;
821 int i5 = next_x - 1 + iny;
822 int i6 = next_x - 1 + ipy;
823 int i7 = prev_x + 1 + iny;
824
825#if UNROLL_KERNEL_LOOP
826 for (; x < dw - 2*rx - 4; x += 4) {
827 SkASSERT(prev_x >= 0);
828 SkASSERT(next_x <= sw);
829
humper@google.coma99a92c2013-02-20 16:42:06 +0000830 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
831 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
832 *dst++ = SkToU8((outerSum * outerScale
833 + innerSum * innerScale) >> 24);
834 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
835 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
836 *dst++ = SkToU8((outerSum * outerScale
837 + innerSum * innerScale) >> 24);
838 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
839 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
840 *dst++ = SkToU8((outerSum * outerScale
841 + innerSum * innerScale) >> 24);
842 outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
843 innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
844 *dst++ = SkToU8((outerSum * outerScale
845 + innerSum * innerScale) >> 24);
tomhudson@google.com01224d52011-11-28 18:22:01 +0000846
847 prev_x += 4;
848 next_x += 4;
849 }
850#endif
851
humper@google.coma99a92c2013-02-20 16:42:06 +0000852 for (; x < dw - 2*rx; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000853 SkASSERT(prev_x >= 0);
854 SkASSERT(next_x <= sw);
855
humper@google.coma99a92c2013-02-20 16:42:06 +0000856 uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
857 uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
858 *dst++ = SkToU8((outerSum * outerScale
859 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000860
861 prev_x += 1;
862 next_x += 1;
863 }
864
humper@google.coma99a92c2013-02-20 16:42:06 +0000865 for (; x < dw; ++x) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000866 SkASSERT(prev_x >= 0);
867 SkASSERT(next_x > sw);
868
869 int px = prev_x;
870 int nx = sw;
871
872 int ipx = prev_x + 1;
873 int inx = sw;
874
humper@google.coma99a92c2013-02-20 16:42:06 +0000875 uint32_t outerSum = sum[px+py] + sum[nx+ny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000876 - sum[nx+py] - sum[px+ny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000877 uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
tomhudson@google.com8caac642011-11-22 15:58:06 +0000878 - sum[inx+ipy] - sum[ipx+iny];
humper@google.coma99a92c2013-02-20 16:42:06 +0000879 *dst++ = SkToU8((outerSum * outerScale
880 + innerSum * innerScale) >> 24);
tomhudson@google.com8caac642011-11-22 15:58:06 +0000881
882 prev_x += 1;
883 next_x += 1;
884 }
885
886 prev_y += 1;
887 next_y += 1;
888 }
889}
890
reed@android.com8a1c16f2008-12-17 15:59:43 +0000891#include "SkColorPriv.h"
892
reed@android.com0e3c6642009-09-18 13:41:56 +0000893static void merge_src_with_blur(uint8_t dst[], int dstRB,
894 const uint8_t src[], int srcRB,
895 const uint8_t blur[], int blurRB,
896 int sw, int sh) {
897 dstRB -= sw;
898 srcRB -= sw;
899 blurRB -= sw;
900 while (--sh >= 0) {
901 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000902 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
903 dst += 1;
904 src += 1;
905 blur += 1;
906 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000907 dst += dstRB;
908 src += srcRB;
909 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000910 }
911}
912
913static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000914 const uint8_t src[], int srcRowBytes,
915 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000916 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000917 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000918 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000919 switch (style) {
920 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000921 for (x = sw - 1; x >= 0; --x) {
922 int s = *src;
923 int d = *dst;
924 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000925 dst += 1;
926 src += 1;
927 }
928 break;
929 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000930 for (x = sw - 1; x >= 0; --x) {
931 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000932 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000933 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000934 dst += 1;
935 src += 1;
936 }
937 break;
938 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000939 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000940 break;
941 }
942 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000943 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000944 }
945}
946
reed@google.com03016a32011-08-12 14:59:59 +0000947///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000948
bsalomon@google.com33cdbde2013-01-11 20:54:44 +0000949// we use a local function to wrap the class static method to work around
reed@android.com8a1c16f2008-12-17 15:59:43 +0000950// a bug in gcc98
951void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000952void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000953 SkMask::FreeImage(image);
954}
955
956bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000957 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000958 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000959{
humper@google.coma99a92c2013-02-20 16:42:06 +0000960
reed@google.com03016a32011-08-12 14:59:59 +0000961 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000962 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000963 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000964
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000965 // Force high quality off for small radii (performance)
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000966 if (radius < SkIntToScalar(3)) {
967 quality = kLow_Quality;
968 }
humper@google.coma99a92c2013-02-20 16:42:06 +0000969
970 // highQuality: use three box blur passes as a cheap way
971 // to approximate a Gaussian blur
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +0000972 int passCount = (kHigh_Quality == quality) ? 3 : 1;
humper@google.coma99a92c2013-02-20 16:42:06 +0000973 SkScalar passRadius = (kHigh_Quality == quality) ?
974 SkScalarMul( radius, kBlurRadiusFudgeFactor):
975 radius;
976
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000977 int rx = SkScalarCeil(passRadius);
humper@google.coma99a92c2013-02-20 16:42:06 +0000978 int outerWeight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000979
980 SkASSERT(rx >= 0);
humper@google.coma99a92c2013-02-20 16:42:06 +0000981 SkASSERT((unsigned)outerWeight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000982 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000983 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000984 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000985
986 int ry = rx; // only do square blur for now
987
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000988 int padx = passCount * rx;
989 int pady = passCount * ry;
humper@google.coma99a92c2013-02-20 16:42:06 +0000990
bungeman@google.com5af16f82011-09-02 15:06:44 +0000991 if (margin) {
992 margin->set(padx, pady);
993 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000994 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
995 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
humper@google.coma99a92c2013-02-20 16:42:06 +0000996
reed@android.com49f0ff22009-03-19 21:52:42 +0000997 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000998 dst->fFormat = SkMask::kA8_Format;
999 dst->fImage = NULL;
1000
reed@android.com0e3c6642009-09-18 13:41:56 +00001001 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +00001002 size_t dstSize = dst->computeImageSize();
1003 if (0 == dstSize) {
1004 return false; // too big to allocate, abort
1005 }
1006
reed@android.com8a1c16f2008-12-17 15:59:43 +00001007 int sw = src.fBounds.width();
1008 int sh = src.fBounds.height();
1009 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +00001010 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001011 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
1012
1013 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001014 if (separable) {
1015 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1016 uint8_t* tp = tmpBuffer.get();
1017 int w = sw, h = sh;
humper@google.coma99a92c2013-02-20 16:42:06 +00001018
1019 if (outerWeight == 255) {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +00001020 int loRadius, hiRadius;
1021 get_adjusted_radii(passRadius, &loRadius, &hiRadius);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001022 if (kHigh_Quality == quality) {
1023 // Do three X blurs, with a transpose on the final one.
1024 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
1025 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
1026 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
1027 // Do three Y blurs, with a transpose on the final one.
1028 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
1029 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
1030 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
1031 } else {
1032 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
1033 h = boxBlur(tp, h, dp, ry, ry, h, w, true);
1034 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +00001035 } else {
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001036 if (kHigh_Quality == quality) {
1037 // Do three X blurs, with a transpose on the final one.
humper@google.coma99a92c2013-02-20 16:42:06 +00001038 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
1039 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight);
1040 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001041 // Do three Y blurs, with a transpose on the final one.
humper@google.coma99a92c2013-02-20 16:42:06 +00001042 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight);
1043 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight);
1044 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001045 } else {
humper@google.coma99a92c2013-02-20 16:42:06 +00001046 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
1047 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight);
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001048 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001049 }
1050 } else {
reed@google.com03016a32011-08-12 14:59:59 +00001051 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
1052 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
1053 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001054 uint32_t* sumBuffer = storage.get();
1055
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001056 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +00001057 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
humper@google.coma99a92c2013-02-20 16:42:06 +00001058 if (outerWeight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001059 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +00001060 } else {
humper@google.coma99a92c2013-02-20 16:42:06 +00001061 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outerWeight);
reed@google.com03016a32011-08-12 14:59:59 +00001062 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001063
senorblanco@chromium.org91f489a2012-11-29 17:09:27 +00001064 if (kHigh_Quality == quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001065 //pass2: dp is source, tmpBuffer is destination
1066 int tmp_sw = sw + 2 * rx;
1067 int tmp_sh = sh + 2 * ry;
1068 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
1069 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
humper@google.coma99a92c2013-02-20 16:42:06 +00001070 if (outerWeight == 255)
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001071 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
1072 else
reed@google.com03016a32011-08-12 14:59:59 +00001073 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
humper@google.coma99a92c2013-02-20 16:42:06 +00001074 tmp_sw, tmp_sh, outerWeight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001075
1076 //pass3: tmpBuffer is source, dp is destination
1077 tmp_sw += 2 * rx;
1078 tmp_sh += 2 * ry;
1079 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
humper@google.coma99a92c2013-02-20 16:42:06 +00001080 if (outerWeight == 255)
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001081 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
1082 else
reed@google.com03016a32011-08-12 14:59:59 +00001083 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
humper@google.coma99a92c2013-02-20 16:42:06 +00001084 outerWeight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +00001085 }
reed@android.com8a1c16f2008-12-17 15:59:43 +00001086 }
1087
1088 dst->fImage = dp;
1089 // if need be, alloc the "real" dst (same size as src) and copy/merge
1090 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +00001091 if (style == kInner_Style) {
1092 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +00001093 size_t srcSize = src.computeImageSize();
1094 if (0 == srcSize) {
1095 return false; // too big to allocate, abort
1096 }
1097 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +00001098 merge_src_with_blur(dst->fImage, src.fRowBytes,
1099 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +00001100 dp + passCount * (rx + ry * dst->fRowBytes),
1101 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001102 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +00001103 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +00001104 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
1105 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +00001106 }
1107 (void)autoCall.detach();
1108 }
1109
reed@android.com0e3c6642009-09-18 13:41:56 +00001110 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +00001111 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +00001112 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +00001113 }
1114
reed@android.com8a1c16f2008-12-17 15:59:43 +00001115 return true;
1116}
1117
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +00001118bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
1119 SkScalar radius, Style style, Quality quality,
1120 SkIPoint* margin)
1121{
1122 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
1123}
1124
1125bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
1126 SkScalar radius, Style style, Quality quality,
1127 SkIPoint* margin)
1128{
1129 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
1130}
humper@google.com7c7292c2013-01-04 20:29:03 +00001131
1132/* Convolving a box with itself three times results in a piecewise
1133 quadratic function:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001134
humper@google.com7c7292c2013-01-04 20:29:03 +00001135 0 x <= -1.5
humper@google.coma99a92c2013-02-20 16:42:06 +00001136 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5
humper@google.com7c7292c2013-01-04 20:29:03 +00001137 3/4 - x^2 -.5 < x <= .5
1138 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5
1139 0 1.5 < x
humper@google.coma99a92c2013-02-20 16:42:06 +00001140
1141 Mathematica:
1142
1143 g[x_] := Piecewise [ {
1144 {9/8 + 3/2 x + 1/2 x^2 , -1.5 < x <= -.5},
1145 {3/4 - x^2 , -.5 < x <= .5},
1146 {9/8 - 3/2 x + 1/2 x^2 , 0.5 < x <= 1.5}
1147 }, 0]
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001148
humper@google.com7c7292c2013-01-04 20:29:03 +00001149 To get the profile curve of the blurred step function at the rectangle
1150 edge, we evaluate the indefinite integral, which is piecewise cubic:
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001151
humper@google.com7c7292c2013-01-04 20:29:03 +00001152 0 x <= -1.5
humper@google.coma99a92c2013-02-20 16:42:06 +00001153 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5
humper@google.com7c7292c2013-01-04 20:29:03 +00001154 1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5
humper@google.coma99a92c2013-02-20 16:42:06 +00001155 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5
humper@google.com7c7292c2013-01-04 20:29:03 +00001156 1 1.5 < x
humper@google.coma99a92c2013-02-20 16:42:06 +00001157
1158 in Mathematica code:
1159
1160 gi[x_] := Piecewise[ {
1161 { 0 , x <= -1.5 },
1162 { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 },
1163 { 1/2 + 3/4 x - 1/3 x^3 , -.5 < x <= .5},
1164 { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3, .5 < x <= 1.5}
1165 },1]
humper@google.com7c7292c2013-01-04 20:29:03 +00001166*/
1167
humper@google.coma99a92c2013-02-20 16:42:06 +00001168static float gaussianIntegral(float x) {
1169 if (x > 1.5f) {
humper@google.com7c7292c2013-01-04 20:29:03 +00001170 return 0.0f;
1171 }
humper@google.coma99a92c2013-02-20 16:42:06 +00001172 if (x < -1.5f) {
humper@google.com7c7292c2013-01-04 20:29:03 +00001173 return 1.0f;
1174 }
1175
1176 float x2 = x*x;
1177 float x3 = x2*x;
1178
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001179 if ( x > 0.5f ) {
humper@google.coma99a92c2013-02-20 16:42:06 +00001180 return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001181 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001182 if ( x > -0.5f ) {
1183 return 0.5f - (0.75f * x - x3 / 3.0f);
humper@google.com7c7292c2013-01-04 20:29:03 +00001184 }
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001185 return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
humper@google.com7c7292c2013-01-04 20:29:03 +00001186}
1187
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001188/*
humper@google.com7c7292c2013-01-04 20:29:03 +00001189 compute_profile allocates and fills in an array of floating
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001190 point values between 0 and 255 for the profile signature of
humper@google.com7c7292c2013-01-04 20:29:03 +00001191 a blurred half-plane with the given blur radius. Since we're
1192 going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
1193 all the time, we actually fill in the profile pre-inverted
1194 (already done 255-x).
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001195
humper@google.com7c7292c2013-01-04 20:29:03 +00001196 The function returns the size of the array allocated for the
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001197 profile. It's the responsibility of the caller to delete the
humper@google.com7c7292c2013-01-04 20:29:03 +00001198 memory returned in profile_out.
1199*/
1200
humper@google.coma99a92c2013-02-20 16:42:06 +00001201static int compute_profile(SkScalar radius, unsigned int **profile_out) {
1202 int size = SkScalarRoundToInt(radius * 3);
humper@google.com7c7292c2013-01-04 20:29:03 +00001203 int center = size >> 1;
1204
bsalomon@google.com33cdbde2013-01-11 20:54:44 +00001205 unsigned int *profile = SkNEW_ARRAY(unsigned int, size);
humper@google.com7c7292c2013-01-04 20:29:03 +00001206
humper@google.coma99a92c2013-02-20 16:42:06 +00001207 float invr = 1.f/radius;
humper@google.com7c7292c2013-01-04 20:29:03 +00001208
1209 profile[0] = 255;
humper@google.coma99a92c2013-02-20 16:42:06 +00001210 for (int x = 1 ; x < size ; ++x) {
1211 float scaled_x = (center - x - .5) * invr;
1212 float gi = gaussianIntegral(scaled_x);
1213 profile[x] = 255 - (uint8_t) (255.f * gi);
humper@google.com7c7292c2013-01-04 20:29:03 +00001214 }
1215
1216 *profile_out = profile;
1217 return size;
1218}
1219
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001220// TODO MAYBE: Maintain a profile cache to avoid recomputing this for
humper@google.com7c7292c2013-01-04 20:29:03 +00001221// commonly used radii. Consider baking some of the most common blur radii
1222// directly in as static data?
1223
1224// Implementation adapted from Michael Herf's approach:
1225// http://stereopsis.com/shadowrect/
1226
humper@google.coma99a92c2013-02-20 16:42:06 +00001227static inline unsigned int profile_lookup( unsigned int *profile, int loc, int blurred_width, int sharp_width ) {
1228 int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge?
1229 int ox = dx >> 1;
1230 if (ox < 0) {
1231 ox = 0;
1232 }
1233
1234 return profile[ox];
1235}
1236
humper@google.com7c7292c2013-01-04 20:29:03 +00001237bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src,
humper@google.coma99a92c2013-02-20 16:42:06 +00001238 SkScalar provided_radius, Style style,
humper@google.com7c7292c2013-01-04 20:29:03 +00001239 SkIPoint *margin) {
1240 int profile_size;
1241 unsigned int *profile;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001242
humper@google.com7c7292c2013-01-04 20:29:03 +00001243 float radius = SkScalarToFloat( SkScalarMul( provided_radius, kBlurRadiusFudgeFactor ) );
humper@google.coma99a92c2013-02-20 16:42:06 +00001244
humper@google.com1e1a24e2013-02-20 18:35:40 +00001245 // adjust blur radius to match interpretation from boxfilter code
1246 radius = (radius + .5) *2;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001247
humper@google.com7c7292c2013-01-04 20:29:03 +00001248 profile_size = compute_profile( radius, &profile );
humper@google.coma99a92c2013-02-20 16:42:06 +00001249
bsalomon@google.com33cdbde2013-01-11 20:54:44 +00001250 SkAutoTDeleteArray<unsigned int> ada(profile);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001251
humper@google.coma99a92c2013-02-20 16:42:06 +00001252 int pad = profile_size/2;
humper@google.com7c7292c2013-01-04 20:29:03 +00001253 if (margin) {
1254 margin->set( pad, pad );
1255 }
humper@google.coma99a92c2013-02-20 16:42:06 +00001256
1257 int shadow_left = -pad;
1258 int shadow_top = -pad;
1259 int shadow_right = src.width() + pad;
1260 int shadow_bottom = src.height() + pad;
1261
1262 dst->fBounds.set(shadow_left, shadow_top, shadow_right, shadow_bottom);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001263
humper@google.com7c7292c2013-01-04 20:29:03 +00001264 dst->fRowBytes = dst->fBounds.width();
1265 dst->fFormat = SkMask::kA8_Format;
1266 dst->fImage = NULL;
humper@google.coma99a92c2013-02-20 16:42:06 +00001267
humper@google.com7c7292c2013-01-04 20:29:03 +00001268 size_t dstSize = dst->computeImageSize();
1269 if (0 == dstSize) {
1270 return false; // too big to allocate, abort
1271 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001272
jvanverth@google.com9c4e5ac2013-01-07 18:41:28 +00001273 int sw = SkScalarFloorToInt(src.width());
1274 int sh = SkScalarFloorToInt(src.height());
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001275
humper@google.com7c7292c2013-01-04 20:29:03 +00001276 uint8_t* dp = SkMask::AllocImage(dstSize);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001277
humper@google.com7c7292c2013-01-04 20:29:03 +00001278 dst->fImage = dp;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001279
humper@google.coma99a92c2013-02-20 16:42:06 +00001280 int dstHeight = dst->fBounds.height();
1281 int dstWidth = dst->fBounds.width();
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001282
humper@google.com7c7292c2013-01-04 20:29:03 +00001283 // nearest odd number less than the profile size represents the center
1284 // of the (2x scaled) profile
1285 int center = ( profile_size & ~1 ) - 1;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001286
humper@google.com7c7292c2013-01-04 20:29:03 +00001287 int w = sw - center;
1288 int h = sh - center;
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001289
humper@google.com7c7292c2013-01-04 20:29:03 +00001290 uint8_t *outptr = dp;
humper@google.coma99a92c2013-02-20 16:42:06 +00001291
1292 SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth);
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001293
humper@google.coma99a92c2013-02-20 16:42:06 +00001294 for (int x = 0 ; x < dstWidth ; ++x) {
1295 if (profile_size <= sw) {
1296 horizontalScanline[x] = profile_lookup(profile, x, dstWidth, w);
1297 } else {
1298 float span = float(sw)/radius;
1299 float giX = 1.5 - (x+.5)/radius;
1300 horizontalScanline[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span)));
1301 }
1302 }
1303
1304 for (int y = 0 ; y < dstHeight ; ++y) {
1305 unsigned int profile_y;
1306 if (profile_size <= sh) {
1307 profile_y = profile_lookup(profile, y, dstHeight, h);
1308 } else {
1309 float span = float(sh)/radius;
1310 float giY = 1.5 - (y+.5)/radius;
1311 profile_y = (uint8_t) (255 * (gaussianIntegral(giY) - gaussianIntegral(giY + span)));
1312 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001313
humper@google.coma99a92c2013-02-20 16:42:06 +00001314 for (int x = 0 ; x < dstWidth ; x++) {
1315 unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], profile_y);
humper@google.com7c7292c2013-01-04 20:29:03 +00001316 *(outptr++) = maskval;
1317 }
1318 }
humper@google.coma99a92c2013-02-20 16:42:06 +00001319
1320 if (style == kInner_Style) {
1321 // now we allocate the "real" dst, mirror the size of src
1322 size_t srcSize = src.width() * src.height();
1323 if (0 == srcSize) {
1324 return false; // too big to allocate, abort
1325 }
1326 dst->fImage = SkMask::AllocImage(srcSize);
1327 for (int y = 0 ; y < sh ; y++) {
1328 uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad;
1329 uint8_t *inner_scanline = dst->fImage + y*sw;
1330 memcpy(inner_scanline, blur_scanline, sw);
1331 }
1332 SkMask::FreeImage(dp);
1333
1334 dst->fBounds.set(0, 0, sw, sh); // restore trimmed bounds
1335 dst->fRowBytes = sw;
1336
1337 } else if (style == kOuter_Style) {
1338 for (int y = pad ; y < dstHeight-pad ; y++) {
1339 uint8_t *dst_scanline = dp + y*dstWidth + pad;
1340 memset(dst_scanline, 0, sw);
1341 }
1342 }
1343 // normal and solid styles are the same for analytic rect blurs, so don't
1344 // need to handle solid specially.
1345
1346 return true;
1347}
1348
1349// The "simple" blur is a direct implementation of separable convolution with a discrete
1350// gaussian kernel. It's "ground truth" in a sense; too slow to be used, but very
1351// useful for correctness comparisons.
1352
1353bool SkBlurMask::BlurGroundTruth(SkMask* dst, const SkMask& src, SkScalar provided_radius,
1354 Style style, SkIPoint* margin) {
1355
1356 if (src.fFormat != SkMask::kA8_Format) {
1357 return false;
1358 }
1359
1360 float radius = SkScalarToFloat(SkScalarMul(provided_radius, kBlurRadiusFudgeFactor));
1361 float stddev = SkScalarToFloat(radius) /2.0f;
1362 float variance = stddev * stddev;
1363
1364 int windowSize = SkScalarCeil(stddev*4);
1365 // round window size up to nearest odd number
1366 windowSize |= 1;
1367
1368 SkAutoTMalloc<float> gaussWindow(windowSize);
1369
1370 int halfWindow = windowSize >> 1;
1371
1372 gaussWindow[halfWindow] = 1;
1373
1374 float windowSum = 1;
1375 for (int x = 1 ; x <= halfWindow ; ++x) {
1376 float gaussian = expf(-x*x / variance);
1377 gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
1378 windowSum += 2*gaussian;
1379 }
1380
1381 // leave the filter un-normalized for now; we will divide by the normalization
1382 // sum later;
1383
1384 int pad = halfWindow;
1385 if (margin) {
1386 margin->set( pad, pad );
1387 }
1388
1389 dst->fBounds = src.fBounds;
1390 dst->fBounds.outset(pad, pad);
1391
1392 dst->fRowBytes = dst->fBounds.width();
1393 dst->fFormat = SkMask::kA8_Format;
1394 dst->fImage = NULL;
1395
1396 if (src.fImage) {
1397
1398 size_t dstSize = dst->computeImageSize();
1399 if (0 == dstSize) {
1400 return false; // too big to allocate, abort
1401 }
1402
1403 int srcWidth = src.fBounds.width();
1404 int srcHeight = src.fBounds.height();
1405 int dstWidth = dst->fBounds.width();
1406
1407 const uint8_t* srcPixels = src.fImage;
1408 uint8_t* dstPixels = SkMask::AllocImage(dstSize);
1409 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);
1410
1411 // do the actual blur. First, make a padded copy of the source.
1412 // use double pad so we never have to check if we're outside anything
1413
1414 int padWidth = srcWidth + 4*pad;
1415 int padHeight = srcHeight;
1416 int padSize = padWidth * padHeight;
1417
1418 SkAutoTMalloc<uint8_t> padPixels(padSize);
1419 memset(padPixels, 0, padSize);
1420
1421 for (int y = 0 ; y < srcHeight; ++y) {
1422 uint8_t* padptr = padPixels + y * padWidth + 2*pad;
1423 const uint8_t* srcptr = srcPixels + y * srcWidth;
1424 memcpy(padptr, srcptr, srcWidth);
1425 }
1426
1427 // blur in X, transposing the result into a temporary floating point buffer.
1428 // also double-pad the intermediate result so that the second blur doesn't
1429 // have to do extra conditionals.
1430
1431 int tmpWidth = padHeight + 4*pad;
1432 int tmpHeight = padWidth - 2*pad;
1433 int tmpSize = tmpWidth * tmpHeight;
1434
1435 SkAutoTMalloc<float> tmpImage(tmpSize);
1436 memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));
1437
1438 for (int y = 0 ; y < padHeight ; ++y) {
1439 uint8_t *srcScanline = padPixels + y*padWidth;
1440 for (int x = pad ; x < padWidth - pad ; ++x) {
1441 float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
1442 uint8_t *windowCenter = srcScanline + x;
1443 for (int i = -pad ; i <= pad ; ++i) {
1444 *outPixel += gaussWindow[pad+i]*windowCenter[i];
1445 }
1446 *outPixel /= windowSum;
1447 }
1448 }
1449
1450 // blur in Y; now filling in the actual desired destination. We have to do
1451 // the transpose again; these transposes guarantee that we read memory in
1452 // linear order.
1453
1454 for (int y = 0 ; y < tmpHeight ; ++y) {
1455 float *srcScanline = tmpImage + y*tmpWidth;
1456 for (int x = pad ; x < tmpWidth - pad ; ++x) {
1457 float *windowCenter = srcScanline + x;
1458 float finalValue = 0;
1459 for (int i = -pad ; i <= pad ; ++i) {
1460 finalValue += gaussWindow[pad+i]*windowCenter[i];
1461 }
1462 finalValue /= windowSum;
1463 uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
1464 int integerPixel = int(finalValue + 0.5f);
1465 *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
1466 }
1467 }
1468
1469 dst->fImage = dstPixels;
1470 // if need be, alloc the "real" dst (same size as src) and copy/merge
1471 // the blur into it (applying the src)
1472 if (style == kInner_Style) {
1473 // now we allocate the "real" dst, mirror the size of src
1474 size_t srcSize = src.computeImageSize();
1475 if (0 == srcSize) {
1476 return false; // too big to allocate, abort
1477 }
1478 dst->fImage = SkMask::AllocImage(srcSize);
1479 merge_src_with_blur(dst->fImage, src.fRowBytes,
1480 srcPixels, src.fRowBytes,
1481 dstPixels + pad*dst->fRowBytes + pad,
1482 dst->fRowBytes, srcWidth, srcHeight);
1483 SkMask::FreeImage(dstPixels);
1484 } else if (style != kNormal_Style) {
1485 clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
1486 dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
1487 }
1488 (void)autoCall.detach();
1489 }
1490
1491 if (style == kInner_Style) {
1492 dst->fBounds = src.fBounds; // restore trimmed bounds
1493 dst->fRowBytes = src.fRowBytes;
1494 }
skia.committer@gmail.com8ae714b2013-01-05 02:02:05 +00001495
humper@google.com7c7292c2013-01-04 20:29:03 +00001496 return true;
1497}