blob: 4873685dc33ca3febf00e1653f5b24edd43b24ec [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000015/**
16 * This function performs a box blur in X, of the given radius. If the
skia.committer@gmail.com884e60b2012-11-16 02:01:17 +000017 * "transpose" parameter is true, it will transpose the pixels on write,
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000018 * such that X and Y are swapped. Reads are always performed from contiguous
19 * memory in X, for speed. The destination buffer (dst) must be at least
20 * (width + radius * 2) * height bytes in size.
21 */
22static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000023 int leftRadius, int rightRadius, int width, int height,
24 bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000025{
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000026 int kernelSize = leftRadius + rightRadius + 1;
27 int border = SkMin32(width, leftRadius + rightRadius);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000028 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000029 int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000030 int dst_x_stride = transpose ? height : 1;
31 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000032 for (int y = 0; y < height; ++y) {
33 int sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000034 uint8_t* dptr = dst + y * dst_y_stride;
35 const uint8_t* right = src + y * src_y_stride;
36 const uint8_t* left = right;
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +000037 for (int x = 0; x < rightRadius - leftRadius; x++) {
38 *dptr = 0;
39 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000040 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000041 for (int x = 0; x < border; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000042 sum += *right++;
43 *dptr = (sum * scale) >> 24;
44 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000045 }
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000046 for (int x = width; x < leftRadius + rightRadius; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000047 *dptr = (sum * scale) >> 24;
48 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000049 }
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000050 for (int x = leftRadius + rightRadius; x < width; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000051 sum += *right++;
52 *dptr = (sum * scale) >> 24;
53 sum -= *left++;
54 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000055 }
56 for (int x = 0; x < border; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000057 *dptr = (sum * scale) >> 24;
58 sum -= *left++;
59 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000060 }
senorblanco@chromium.org336b4da2012-11-20 17:09:40 +000061 for (int x = 0; x < leftRadius - rightRadius; x++) {
62 *dptr = 0;
63 dptr += dst_x_stride;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000064 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000065 SkASSERT(sum == 0);
66 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000067 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000068}
69
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000070static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
71{
72 *loRadius = *hiRadius = SkScalarCeil(passRadius);
73 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
74 *loRadius = *hiRadius - 1;
75 }
76}
77
tomhudson@google.com01224d52011-11-28 18:22:01 +000078// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
79// breakeven on Mac, and ~15% slowdown on Linux.
80// Reading a word at a time when bulding the sum buffer seems to give
81// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +000082#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +000083#define UNROLL_KERNEL_LOOP 1
84#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +000085
reed@android.com45607672009-09-21 00:27:08 +000086/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
87 src values at their position, plus all values above and to the left.
88 When we sample into this buffer, we need an initial row and column of 0s,
89 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +000090
reed@android.com45607672009-09-21 00:27:08 +000091 src[i, j] == sum[i+1, j+1]
92 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +000093
reed@android.com45607672009-09-21 00:27:08 +000094 We assume that the sum buffer's stride == its width
95 */
reed@google.com03016a32011-08-12 14:59:59 +000096static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
97 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +000098 int sumW = srcW + 1;
99
100 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000101 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000102 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000103
104 int x, y;
105
reed@android.com45607672009-09-21 00:27:08 +0000106 // zero out the top row and column
107 memset(sum, 0, sumW * sizeof(sum[0]));
108 sum += sumW;
109
reed@android.com8a1c16f2008-12-17 15:59:43 +0000110 // special case first row
111 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000112 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000113 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000114 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000115 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000116 }
117 src += srcRB;
118
119 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000120 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000121 uint32_t L = 0;
122 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000123 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000124
125 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
126 uint32_t T = sum[-sumW];
127 X = *src++ + L + T - C;
128 *sum++ = X;
129 L = X;
130 C = T;
131 }
132
133 for (; x >= 4; x-=4) {
134 uint32_t T = sum[-sumW];
135 X = *src++ + L + T - C;
136 *sum++ = X;
137 L = X;
138 C = T;
139 T = sum[-sumW];
140 X = *src++ + L + T - C;
141 *sum++ = X;
142 L = X;
143 C = T;
144 T = sum[-sumW];
145 X = *src++ + L + T - C;
146 *sum++ = X;
147 L = X;
148 C = T;
149 T = sum[-sumW];
150 X = *src++ + L + T - C;
151 *sum++ = X;
152 L = X;
153 C = T;
154 }
155
156 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000157 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000158 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000159 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000160 L = X;
161 C = T;
162 }
163 src += srcRB;
164 }
165}
166
reed@google.com03016a32011-08-12 14:59:59 +0000167/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000168 * This is the path for apply_kernel() to be taken when the kernel
169 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000170 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000171static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
172 int sw, int sh) {
173 SkASSERT(2*rx > sw);
174
reed@android.com8a1c16f2008-12-17 15:59:43 +0000175 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
176
reed@android.com45607672009-09-21 00:27:08 +0000177 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000178
179 int dw = sw + 2*rx;
180 int dh = sh + 2*ry;
181
reed@android.com45607672009-09-21 00:27:08 +0000182 int prev_y = -2*ry;
183 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000184
reed@android.com45607672009-09-21 00:27:08 +0000185 for (int y = 0; y < dh; y++) {
186 int py = SkClampPos(prev_y) * sumStride;
187 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000188
reed@android.com45607672009-09-21 00:27:08 +0000189 int prev_x = -2*rx;
190 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000191
reed@android.com45607672009-09-21 00:27:08 +0000192 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000193 int px = SkClampPos(prev_x);
194 int nx = SkFastMin32(next_x, sw);
195
reed@android.com45607672009-09-21 00:27:08 +0000196 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
197 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000198
199 prev_x += 1;
200 next_x += 1;
201 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000202
203 prev_y += 1;
204 next_y += 1;
205 }
206}
207/**
208 * sw and sh are the width and height of the src. Since the sum buffer
209 * matches that, but has an extra row and col at the beginning (with zeros),
210 * we can just use sw and sh as our "max" values for pinning coordinates
211 * when sampling into sum[][]
212 *
213 * The inner loop is conceptually simple; we break it into several sections
214 * to improve performance. Here's the original version:
215 for (int x = 0; x < dw; x++) {
216 int px = SkClampPos(prev_x);
217 int nx = SkFastMin32(next_x, sw);
218
219 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
220 *dst++ = SkToU8(tmp * scale >> 24);
221
222 prev_x += 1;
223 next_x += 1;
224 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000225 * The sections are:
226 * left-hand section, where prev_x is clamped to 0
227 * center section, where neither prev_x nor next_x is clamped
228 * right-hand section, where next_x is clamped to sw
229 * On some operating systems, the center section is unrolled for additional
230 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000231*/
232static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
233 int sw, int sh) {
234 if (2*rx > sw) {
235 kernel_clamped(dst, rx, ry, sum, sw, sh);
236 return;
237 }
238
239 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
240
241 int sumStride = sw + 1;
242
243 int dw = sw + 2*rx;
244 int dh = sh + 2*ry;
245
246 int prev_y = -2*ry;
247 int next_y = 1;
248
249 SkASSERT(2*rx <= dw - 2*rx);
250
251 for (int y = 0; y < dh; y++) {
252 int py = SkClampPos(prev_y) * sumStride;
253 int ny = SkFastMin32(next_y, sh) * sumStride;
254
255 int prev_x = -2*rx;
256 int next_x = 1;
257 int x = 0;
258
259 for (; x < 2*rx; x++) {
260 SkASSERT(prev_x <= 0);
261 SkASSERT(next_x <= sw);
262
263 int px = 0;
264 int nx = next_x;
265
266 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
267 *dst++ = SkToU8(tmp * scale >> 24);
268
269 prev_x += 1;
270 next_x += 1;
271 }
272
tomhudson@google.com01224d52011-11-28 18:22:01 +0000273 int i0 = prev_x + py;
274 int i1 = next_x + ny;
275 int i2 = next_x + py;
276 int i3 = prev_x + ny;
277
278#if UNROLL_KERNEL_LOOP
279 for (; x < dw - 2*rx - 4; x += 4) {
280 SkASSERT(prev_x >= 0);
281 SkASSERT(next_x <= sw);
282
283 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
284 *dst++ = SkToU8(tmp * scale >> 24);
285 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
286 *dst++ = SkToU8(tmp * scale >> 24);
287 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
288 *dst++ = SkToU8(tmp * scale >> 24);
289 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
290 *dst++ = SkToU8(tmp * scale >> 24);
291
292 prev_x += 4;
293 next_x += 4;
294 }
295#endif
296
tomhudson@google.com8caac642011-11-22 15:58:06 +0000297 for (; x < dw - 2*rx; x++) {
298 SkASSERT(prev_x >= 0);
299 SkASSERT(next_x <= sw);
300
tomhudson@google.com01224d52011-11-28 18:22:01 +0000301 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000302 *dst++ = SkToU8(tmp * scale >> 24);
303
304 prev_x += 1;
305 next_x += 1;
306 }
307
308 for (; x < dw; x++) {
309 SkASSERT(prev_x >= 0);
310 SkASSERT(next_x > sw);
311
312 int px = prev_x;
313 int nx = sw;
314
315 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
316 *dst++ = SkToU8(tmp * scale >> 24);
317
318 prev_x += 1;
319 next_x += 1;
320 }
321
reed@android.com8a1c16f2008-12-17 15:59:43 +0000322 prev_y += 1;
323 next_y += 1;
324 }
325}
326
reed@google.com03016a32011-08-12 14:59:59 +0000327/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000328 * This is the path for apply_kernel_interp() to be taken when the kernel
329 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000330 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000331static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
reed@android.com45607672009-09-21 00:27:08 +0000332 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000333 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000334
335 int inner_weight = 255 - outer_weight;
336
337 // round these guys up if they're bigger than 127
338 outer_weight += outer_weight >> 7;
339 inner_weight += inner_weight >> 7;
340
341 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
342 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
343
reed@android.com45607672009-09-21 00:27:08 +0000344 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000345
346 int dw = sw + 2*rx;
347 int dh = sh + 2*ry;
348
reed@android.com45607672009-09-21 00:27:08 +0000349 int prev_y = -2*ry;
350 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000351
reed@android.com45607672009-09-21 00:27:08 +0000352 for (int y = 0; y < dh; y++) {
353 int py = SkClampPos(prev_y) * sumStride;
354 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000355
reed@android.com45607672009-09-21 00:27:08 +0000356 int ipy = SkClampPos(prev_y + 1) * sumStride;
357 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000358
reed@android.com45607672009-09-21 00:27:08 +0000359 int prev_x = -2*rx;
360 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000361
reed@android.com45607672009-09-21 00:27:08 +0000362 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000363 int px = SkClampPos(prev_x);
364 int nx = SkFastMin32(next_x, sw);
365
366 int ipx = SkClampPos(prev_x + 1);
367 int inx = SkClampMax(next_x - 1, sw);
368
tomhudson@google.com8caac642011-11-22 15:58:06 +0000369 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
370 - sum[nx+py] - sum[px+ny];
371 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
372 - sum[inx+ipy] - sum[ipx+iny];
373 *dst++ = SkToU8((outer_sum * outer_scale
374 + inner_sum * inner_scale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000375
376 prev_x += 1;
377 next_x += 1;
378 }
379 prev_y += 1;
380 next_y += 1;
381 }
382}
383
tomhudson@google.com8caac642011-11-22 15:58:06 +0000384/**
385 * sw and sh are the width and height of the src. Since the sum buffer
386 * matches that, but has an extra row and col at the beginning (with zeros),
387 * we can just use sw and sh as our "max" values for pinning coordinates
388 * when sampling into sum[][]
389 *
390 * The inner loop is conceptually simple; we break it into several variants
391 * to improve performance. Here's the original version:
392 for (int x = 0; x < dw; x++) {
393 int px = SkClampPos(prev_x);
394 int nx = SkFastMin32(next_x, sw);
395
396 int ipx = SkClampPos(prev_x + 1);
397 int inx = SkClampMax(next_x - 1, sw);
398
399 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
400 - sum[nx+py] - sum[px+ny];
401 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
402 - sum[inx+ipy] - sum[ipx+iny];
403 *dst++ = SkToU8((outer_sum * outer_scale
404 + inner_sum * inner_scale) >> 24);
405
406 prev_x += 1;
407 next_x += 1;
408 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000409 * The sections are:
410 * left-hand section, where prev_x is clamped to 0
411 * center section, where neither prev_x nor next_x is clamped
412 * right-hand section, where next_x is clamped to sw
413 * On some operating systems, the center section is unrolled for additional
414 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000415*/
416static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
417 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
418 SkASSERT(rx > 0 && ry > 0);
419 SkASSERT(outer_weight <= 255);
420
421 if (2*rx > sw) {
422 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
423 return;
424 }
425
426 int inner_weight = 255 - outer_weight;
427
428 // round these guys up if they're bigger than 127
429 outer_weight += outer_weight >> 7;
430 inner_weight += inner_weight >> 7;
431
432 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
433 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
434
435 int sumStride = sw + 1;
436
437 int dw = sw + 2*rx;
438 int dh = sh + 2*ry;
439
440 int prev_y = -2*ry;
441 int next_y = 1;
442
443 SkASSERT(2*rx <= dw - 2*rx);
444
445 for (int y = 0; y < dh; y++) {
446 int py = SkClampPos(prev_y) * sumStride;
447 int ny = SkFastMin32(next_y, sh) * sumStride;
448
449 int ipy = SkClampPos(prev_y + 1) * sumStride;
450 int iny = SkClampMax(next_y - 1, sh) * sumStride;
451
452 int prev_x = -2*rx;
453 int next_x = 1;
454 int x = 0;
455
456 for (; x < 2*rx; x++) {
457 SkASSERT(prev_x < 0);
458 SkASSERT(next_x <= sw);
459
460 int px = 0;
461 int nx = next_x;
462
463 int ipx = 0;
464 int inx = next_x - 1;
465
466 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
467 - sum[nx+py] - sum[px+ny];
468 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
469 - sum[inx+ipy] - sum[ipx+iny];
470 *dst++ = SkToU8((outer_sum * outer_scale
471 + inner_sum * inner_scale) >> 24);
472
473 prev_x += 1;
474 next_x += 1;
475 }
476
tomhudson@google.com01224d52011-11-28 18:22:01 +0000477 int i0 = prev_x + py;
478 int i1 = next_x + ny;
479 int i2 = next_x + py;
480 int i3 = prev_x + ny;
481 int i4 = prev_x + 1 + ipy;
482 int i5 = next_x - 1 + iny;
483 int i6 = next_x - 1 + ipy;
484 int i7 = prev_x + 1 + iny;
485
486#if UNROLL_KERNEL_LOOP
487 for (; x < dw - 2*rx - 4; x += 4) {
488 SkASSERT(prev_x >= 0);
489 SkASSERT(next_x <= sw);
490
491 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
492 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
493 *dst++ = SkToU8((outer_sum * outer_scale
494 + inner_sum * inner_scale) >> 24);
495 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
496 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
497 *dst++ = SkToU8((outer_sum * outer_scale
498 + inner_sum * inner_scale) >> 24);
499 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
500 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
501 *dst++ = SkToU8((outer_sum * outer_scale
502 + inner_sum * inner_scale) >> 24);
503 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
504 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
505 *dst++ = SkToU8((outer_sum * outer_scale
506 + inner_sum * inner_scale) >> 24);
507
508 prev_x += 4;
509 next_x += 4;
510 }
511#endif
512
tomhudson@google.com8caac642011-11-22 15:58:06 +0000513 for (; x < dw - 2*rx; x++) {
514 SkASSERT(prev_x >= 0);
515 SkASSERT(next_x <= sw);
516
tomhudson@google.com01224d52011-11-28 18:22:01 +0000517 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
518 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000519 *dst++ = SkToU8((outer_sum * outer_scale
520 + inner_sum * inner_scale) >> 24);
521
522 prev_x += 1;
523 next_x += 1;
524 }
525
526 for (; x < dw; x++) {
527 SkASSERT(prev_x >= 0);
528 SkASSERT(next_x > sw);
529
530 int px = prev_x;
531 int nx = sw;
532
533 int ipx = prev_x + 1;
534 int inx = sw;
535
536 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
537 - sum[nx+py] - sum[px+ny];
538 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
539 - sum[inx+ipy] - sum[ipx+iny];
540 *dst++ = SkToU8((outer_sum * outer_scale
541 + inner_sum * inner_scale) >> 24);
542
543 prev_x += 1;
544 next_x += 1;
545 }
546
547 prev_y += 1;
548 next_y += 1;
549 }
550}
551
reed@android.com8a1c16f2008-12-17 15:59:43 +0000552#include "SkColorPriv.h"
553
reed@android.com0e3c6642009-09-18 13:41:56 +0000554static void merge_src_with_blur(uint8_t dst[], int dstRB,
555 const uint8_t src[], int srcRB,
556 const uint8_t blur[], int blurRB,
557 int sw, int sh) {
558 dstRB -= sw;
559 srcRB -= sw;
560 blurRB -= sw;
561 while (--sh >= 0) {
562 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000563 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
564 dst += 1;
565 src += 1;
566 blur += 1;
567 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000568 dst += dstRB;
569 src += srcRB;
570 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000571 }
572}
573
574static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000575 const uint8_t src[], int srcRowBytes,
576 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000577 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000578 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000579 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000580 switch (style) {
581 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000582 for (x = sw - 1; x >= 0; --x) {
583 int s = *src;
584 int d = *dst;
585 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000586 dst += 1;
587 src += 1;
588 }
589 break;
590 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000591 for (x = sw - 1; x >= 0; --x) {
592 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000593 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000594 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000595 dst += 1;
596 src += 1;
597 }
598 break;
599 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000600 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000601 break;
602 }
603 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000604 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000605 }
606}
607
reed@google.com03016a32011-08-12 14:59:59 +0000608///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000609
610// we use a local funciton to wrap the class static method to work around
611// a bug in gcc98
612void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000613void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000614 SkMask::FreeImage(image);
615}
616
617bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000618 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000619 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000620{
reed@google.com03016a32011-08-12 14:59:59 +0000621 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000622 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000623 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000624
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000625 // Force high quality off for small radii (performance)
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000626 if (radius < SkIntToScalar(3) && !separable) quality = kLow_Quality;
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000627
628 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
629 int passCount = (quality == kHigh_Quality) ? 3 : 1;
630 SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
631
632 int rx = SkScalarCeil(passRadius);
633 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000634
635 SkASSERT(rx >= 0);
636 SkASSERT((unsigned)outer_weight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000637 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000638 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000639 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000640
641 int ry = rx; // only do square blur for now
642
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000643 int padx = passCount * rx;
644 int pady = passCount * ry;
bungeman@google.com5af16f82011-09-02 15:06:44 +0000645 if (margin) {
646 margin->set(padx, pady);
647 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000648 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
649 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
reed@android.com49f0ff22009-03-19 21:52:42 +0000650 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000651 dst->fFormat = SkMask::kA8_Format;
652 dst->fImage = NULL;
653
reed@android.com0e3c6642009-09-18 13:41:56 +0000654 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +0000655 size_t dstSize = dst->computeImageSize();
656 if (0 == dstSize) {
657 return false; // too big to allocate, abort
658 }
659
reed@android.com8a1c16f2008-12-17 15:59:43 +0000660 int sw = src.fBounds.width();
661 int sh = src.fBounds.height();
662 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +0000663 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000664
665 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
666
667 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000668 if (separable) {
669 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
670 uint8_t* tp = tmpBuffer.get();
671 int w = sw, h = sh;
672
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000673 if (quality == kHigh_Quality) {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000674 int loRadius, hiRadius;
675 get_adjusted_radii(passRadius, &loRadius, &hiRadius);
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000676 // Do three X blurs, with a transpose on the final one.
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000677 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
678 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
679 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000680 // Do three Y blurs, with a transpose on the final one.
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000681 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
682 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
683 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000684 } else {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000685 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
686 h = boxBlur(tp, h, dp, ry, ry, h, w, true);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000687 }
688 } else {
reed@google.com03016a32011-08-12 14:59:59 +0000689 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
690 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
691 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000692 uint32_t* sumBuffer = storage.get();
693
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000694 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +0000695 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
reed@google.com03016a32011-08-12 14:59:59 +0000696 if (outer_weight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000697 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +0000698 } else {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000699 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
reed@google.com03016a32011-08-12 14:59:59 +0000700 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000701
reed@google.com03016a32011-08-12 14:59:59 +0000702 if (quality == kHigh_Quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000703 //pass2: dp is source, tmpBuffer is destination
704 int tmp_sw = sw + 2 * rx;
705 int tmp_sh = sh + 2 * ry;
706 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
707 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
708 if (outer_weight == 255)
709 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
710 else
reed@google.com03016a32011-08-12 14:59:59 +0000711 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
712 tmp_sw, tmp_sh, outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000713
714 //pass3: tmpBuffer is source, dp is destination
715 tmp_sw += 2 * rx;
716 tmp_sh += 2 * ry;
717 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
718 if (outer_weight == 255)
719 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
720 else
reed@google.com03016a32011-08-12 14:59:59 +0000721 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
722 outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000723 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000724 }
725
726 dst->fImage = dp;
727 // if need be, alloc the "real" dst (same size as src) and copy/merge
728 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +0000729 if (style == kInner_Style) {
730 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +0000731 size_t srcSize = src.computeImageSize();
732 if (0 == srcSize) {
733 return false; // too big to allocate, abort
734 }
735 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +0000736 merge_src_with_blur(dst->fImage, src.fRowBytes,
737 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +0000738 dp + passCount * (rx + ry * dst->fRowBytes),
739 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000740 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +0000741 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +0000742 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
743 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000744 }
745 (void)autoCall.detach();
746 }
747
reed@android.com0e3c6642009-09-18 13:41:56 +0000748 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000749 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +0000750 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000751 }
752
reed@android.com8a1c16f2008-12-17 15:59:43 +0000753 return true;
754}
755
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000756bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
757 SkScalar radius, Style style, Quality quality,
758 SkIPoint* margin)
759{
760 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
761}
762
763bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
764 SkScalar radius, Style style, Quality quality,
765 SkIPoint* margin)
766{
767 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
768}