blob: e762ae6f65879c6006976c28b9ff100748d21fd7 [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000015/**
16 * This function performs a box blur in X, of the given radius. If the
skia.committer@gmail.com884e60b2012-11-16 02:01:17 +000017 * "transpose" parameter is true, it will transpose the pixels on write,
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000018 * such that X and Y are swapped. Reads are always performed from contiguous
19 * memory in X, for speed. The destination buffer (dst) must be at least
20 * (width + radius * 2) * height bytes in size.
21 */
22static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000023 int leftRadius, int rightRadius, int width, int height,
24 bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000025{
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000026 int kernelSize = leftRadius + rightRadius + 1;
27 int border = SkMin32(width, leftRadius + rightRadius);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000028 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000029 int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000030 int dst_x_stride = transpose ? height : 1;
31 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000032 for (int y = 0; y < height; ++y) {
33 int sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000034 uint8_t* dptr = dst + y * dst_y_stride;
35 const uint8_t* right = src + y * src_y_stride;
36 const uint8_t* left = right;
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000037 for (int x = 0; x < leftRadius - rightRadius; x++) {
38 *dptr++ = 0;
39 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000040 for (int x = 0; x < border; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000041 sum += *right++;
42 *dptr = (sum * scale) >> 24;
43 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000044 }
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000045 for (int x = width; x < leftRadius + rightRadius; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000046 *dptr = (sum * scale) >> 24;
47 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000048 }
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000049 for (int x = leftRadius + rightRadius; x < width; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000050 sum += *right++;
51 *dptr = (sum * scale) >> 24;
52 sum -= *left++;
53 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000054 }
55 for (int x = 0; x < border; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000056 *dptr = (sum * scale) >> 24;
57 sum -= *left++;
58 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000059 }
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000060 for (int x = 0; x < rightRadius - leftRadius; x++) {
61 *dptr++ = 0;
62 }
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000063 SkASSERT(sum == 0);
64 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000065 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000066}
67
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +000068static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
69{
70 *loRadius = *hiRadius = SkScalarCeil(passRadius);
71 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
72 *loRadius = *hiRadius - 1;
73 }
74}
75
tomhudson@google.com01224d52011-11-28 18:22:01 +000076// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
77// breakeven on Mac, and ~15% slowdown on Linux.
78// Reading a word at a time when bulding the sum buffer seems to give
79// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +000080#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +000081#define UNROLL_KERNEL_LOOP 1
82#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +000083
reed@android.com45607672009-09-21 00:27:08 +000084/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
85 src values at their position, plus all values above and to the left.
86 When we sample into this buffer, we need an initial row and column of 0s,
87 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +000088
reed@android.com45607672009-09-21 00:27:08 +000089 src[i, j] == sum[i+1, j+1]
90 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +000091
reed@android.com45607672009-09-21 00:27:08 +000092 We assume that the sum buffer's stride == its width
93 */
reed@google.com03016a32011-08-12 14:59:59 +000094static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
95 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +000096 int sumW = srcW + 1;
97
98 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +000099 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000100 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000101
102 int x, y;
103
reed@android.com45607672009-09-21 00:27:08 +0000104 // zero out the top row and column
105 memset(sum, 0, sumW * sizeof(sum[0]));
106 sum += sumW;
107
reed@android.com8a1c16f2008-12-17 15:59:43 +0000108 // special case first row
109 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000110 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000111 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000112 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000113 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000114 }
115 src += srcRB;
116
117 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000118 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000119 uint32_t L = 0;
120 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000121 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000122
123 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
124 uint32_t T = sum[-sumW];
125 X = *src++ + L + T - C;
126 *sum++ = X;
127 L = X;
128 C = T;
129 }
130
131 for (; x >= 4; x-=4) {
132 uint32_t T = sum[-sumW];
133 X = *src++ + L + T - C;
134 *sum++ = X;
135 L = X;
136 C = T;
137 T = sum[-sumW];
138 X = *src++ + L + T - C;
139 *sum++ = X;
140 L = X;
141 C = T;
142 T = sum[-sumW];
143 X = *src++ + L + T - C;
144 *sum++ = X;
145 L = X;
146 C = T;
147 T = sum[-sumW];
148 X = *src++ + L + T - C;
149 *sum++ = X;
150 L = X;
151 C = T;
152 }
153
154 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000155 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000156 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000157 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000158 L = X;
159 C = T;
160 }
161 src += srcRB;
162 }
163}
164
reed@google.com03016a32011-08-12 14:59:59 +0000165/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000166 * This is the path for apply_kernel() to be taken when the kernel
167 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000168 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000169static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
170 int sw, int sh) {
171 SkASSERT(2*rx > sw);
172
reed@android.com8a1c16f2008-12-17 15:59:43 +0000173 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
174
reed@android.com45607672009-09-21 00:27:08 +0000175 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000176
177 int dw = sw + 2*rx;
178 int dh = sh + 2*ry;
179
reed@android.com45607672009-09-21 00:27:08 +0000180 int prev_y = -2*ry;
181 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000182
reed@android.com45607672009-09-21 00:27:08 +0000183 for (int y = 0; y < dh; y++) {
184 int py = SkClampPos(prev_y) * sumStride;
185 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000186
reed@android.com45607672009-09-21 00:27:08 +0000187 int prev_x = -2*rx;
188 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000189
reed@android.com45607672009-09-21 00:27:08 +0000190 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000191 int px = SkClampPos(prev_x);
192 int nx = SkFastMin32(next_x, sw);
193
reed@android.com45607672009-09-21 00:27:08 +0000194 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
195 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000196
197 prev_x += 1;
198 next_x += 1;
199 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000200
201 prev_y += 1;
202 next_y += 1;
203 }
204}
205/**
206 * sw and sh are the width and height of the src. Since the sum buffer
207 * matches that, but has an extra row and col at the beginning (with zeros),
208 * we can just use sw and sh as our "max" values for pinning coordinates
209 * when sampling into sum[][]
210 *
211 * The inner loop is conceptually simple; we break it into several sections
212 * to improve performance. Here's the original version:
213 for (int x = 0; x < dw; x++) {
214 int px = SkClampPos(prev_x);
215 int nx = SkFastMin32(next_x, sw);
216
217 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
218 *dst++ = SkToU8(tmp * scale >> 24);
219
220 prev_x += 1;
221 next_x += 1;
222 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000223 * The sections are:
224 * left-hand section, where prev_x is clamped to 0
225 * center section, where neither prev_x nor next_x is clamped
226 * right-hand section, where next_x is clamped to sw
227 * On some operating systems, the center section is unrolled for additional
228 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000229*/
230static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
231 int sw, int sh) {
232 if (2*rx > sw) {
233 kernel_clamped(dst, rx, ry, sum, sw, sh);
234 return;
235 }
236
237 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
238
239 int sumStride = sw + 1;
240
241 int dw = sw + 2*rx;
242 int dh = sh + 2*ry;
243
244 int prev_y = -2*ry;
245 int next_y = 1;
246
247 SkASSERT(2*rx <= dw - 2*rx);
248
249 for (int y = 0; y < dh; y++) {
250 int py = SkClampPos(prev_y) * sumStride;
251 int ny = SkFastMin32(next_y, sh) * sumStride;
252
253 int prev_x = -2*rx;
254 int next_x = 1;
255 int x = 0;
256
257 for (; x < 2*rx; x++) {
258 SkASSERT(prev_x <= 0);
259 SkASSERT(next_x <= sw);
260
261 int px = 0;
262 int nx = next_x;
263
264 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
265 *dst++ = SkToU8(tmp * scale >> 24);
266
267 prev_x += 1;
268 next_x += 1;
269 }
270
tomhudson@google.com01224d52011-11-28 18:22:01 +0000271 int i0 = prev_x + py;
272 int i1 = next_x + ny;
273 int i2 = next_x + py;
274 int i3 = prev_x + ny;
275
276#if UNROLL_KERNEL_LOOP
277 for (; x < dw - 2*rx - 4; x += 4) {
278 SkASSERT(prev_x >= 0);
279 SkASSERT(next_x <= sw);
280
281 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
282 *dst++ = SkToU8(tmp * scale >> 24);
283 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
284 *dst++ = SkToU8(tmp * scale >> 24);
285 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
286 *dst++ = SkToU8(tmp * scale >> 24);
287 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
288 *dst++ = SkToU8(tmp * scale >> 24);
289
290 prev_x += 4;
291 next_x += 4;
292 }
293#endif
294
tomhudson@google.com8caac642011-11-22 15:58:06 +0000295 for (; x < dw - 2*rx; x++) {
296 SkASSERT(prev_x >= 0);
297 SkASSERT(next_x <= sw);
298
tomhudson@google.com01224d52011-11-28 18:22:01 +0000299 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000300 *dst++ = SkToU8(tmp * scale >> 24);
301
302 prev_x += 1;
303 next_x += 1;
304 }
305
306 for (; x < dw; x++) {
307 SkASSERT(prev_x >= 0);
308 SkASSERT(next_x > sw);
309
310 int px = prev_x;
311 int nx = sw;
312
313 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
314 *dst++ = SkToU8(tmp * scale >> 24);
315
316 prev_x += 1;
317 next_x += 1;
318 }
319
reed@android.com8a1c16f2008-12-17 15:59:43 +0000320 prev_y += 1;
321 next_y += 1;
322 }
323}
324
reed@google.com03016a32011-08-12 14:59:59 +0000325/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000326 * This is the path for apply_kernel_interp() to be taken when the kernel
327 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000328 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000329static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
reed@android.com45607672009-09-21 00:27:08 +0000330 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000331 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000332
333 int inner_weight = 255 - outer_weight;
334
335 // round these guys up if they're bigger than 127
336 outer_weight += outer_weight >> 7;
337 inner_weight += inner_weight >> 7;
338
339 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
340 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
341
reed@android.com45607672009-09-21 00:27:08 +0000342 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000343
344 int dw = sw + 2*rx;
345 int dh = sh + 2*ry;
346
reed@android.com45607672009-09-21 00:27:08 +0000347 int prev_y = -2*ry;
348 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000349
reed@android.com45607672009-09-21 00:27:08 +0000350 for (int y = 0; y < dh; y++) {
351 int py = SkClampPos(prev_y) * sumStride;
352 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000353
reed@android.com45607672009-09-21 00:27:08 +0000354 int ipy = SkClampPos(prev_y + 1) * sumStride;
355 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000356
reed@android.com45607672009-09-21 00:27:08 +0000357 int prev_x = -2*rx;
358 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000359
reed@android.com45607672009-09-21 00:27:08 +0000360 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000361 int px = SkClampPos(prev_x);
362 int nx = SkFastMin32(next_x, sw);
363
364 int ipx = SkClampPos(prev_x + 1);
365 int inx = SkClampMax(next_x - 1, sw);
366
tomhudson@google.com8caac642011-11-22 15:58:06 +0000367 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
368 - sum[nx+py] - sum[px+ny];
369 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
370 - sum[inx+ipy] - sum[ipx+iny];
371 *dst++ = SkToU8((outer_sum * outer_scale
372 + inner_sum * inner_scale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000373
374 prev_x += 1;
375 next_x += 1;
376 }
377 prev_y += 1;
378 next_y += 1;
379 }
380}
381
tomhudson@google.com8caac642011-11-22 15:58:06 +0000382/**
383 * sw and sh are the width and height of the src. Since the sum buffer
384 * matches that, but has an extra row and col at the beginning (with zeros),
385 * we can just use sw and sh as our "max" values for pinning coordinates
386 * when sampling into sum[][]
387 *
388 * The inner loop is conceptually simple; we break it into several variants
389 * to improve performance. Here's the original version:
390 for (int x = 0; x < dw; x++) {
391 int px = SkClampPos(prev_x);
392 int nx = SkFastMin32(next_x, sw);
393
394 int ipx = SkClampPos(prev_x + 1);
395 int inx = SkClampMax(next_x - 1, sw);
396
397 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
398 - sum[nx+py] - sum[px+ny];
399 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
400 - sum[inx+ipy] - sum[ipx+iny];
401 *dst++ = SkToU8((outer_sum * outer_scale
402 + inner_sum * inner_scale) >> 24);
403
404 prev_x += 1;
405 next_x += 1;
406 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000407 * The sections are:
408 * left-hand section, where prev_x is clamped to 0
409 * center section, where neither prev_x nor next_x is clamped
410 * right-hand section, where next_x is clamped to sw
411 * On some operating systems, the center section is unrolled for additional
412 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000413*/
414static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
415 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
416 SkASSERT(rx > 0 && ry > 0);
417 SkASSERT(outer_weight <= 255);
418
419 if (2*rx > sw) {
420 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
421 return;
422 }
423
424 int inner_weight = 255 - outer_weight;
425
426 // round these guys up if they're bigger than 127
427 outer_weight += outer_weight >> 7;
428 inner_weight += inner_weight >> 7;
429
430 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
431 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
432
433 int sumStride = sw + 1;
434
435 int dw = sw + 2*rx;
436 int dh = sh + 2*ry;
437
438 int prev_y = -2*ry;
439 int next_y = 1;
440
441 SkASSERT(2*rx <= dw - 2*rx);
442
443 for (int y = 0; y < dh; y++) {
444 int py = SkClampPos(prev_y) * sumStride;
445 int ny = SkFastMin32(next_y, sh) * sumStride;
446
447 int ipy = SkClampPos(prev_y + 1) * sumStride;
448 int iny = SkClampMax(next_y - 1, sh) * sumStride;
449
450 int prev_x = -2*rx;
451 int next_x = 1;
452 int x = 0;
453
454 for (; x < 2*rx; x++) {
455 SkASSERT(prev_x < 0);
456 SkASSERT(next_x <= sw);
457
458 int px = 0;
459 int nx = next_x;
460
461 int ipx = 0;
462 int inx = next_x - 1;
463
464 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
465 - sum[nx+py] - sum[px+ny];
466 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
467 - sum[inx+ipy] - sum[ipx+iny];
468 *dst++ = SkToU8((outer_sum * outer_scale
469 + inner_sum * inner_scale) >> 24);
470
471 prev_x += 1;
472 next_x += 1;
473 }
474
tomhudson@google.com01224d52011-11-28 18:22:01 +0000475 int i0 = prev_x + py;
476 int i1 = next_x + ny;
477 int i2 = next_x + py;
478 int i3 = prev_x + ny;
479 int i4 = prev_x + 1 + ipy;
480 int i5 = next_x - 1 + iny;
481 int i6 = next_x - 1 + ipy;
482 int i7 = prev_x + 1 + iny;
483
484#if UNROLL_KERNEL_LOOP
485 for (; x < dw - 2*rx - 4; x += 4) {
486 SkASSERT(prev_x >= 0);
487 SkASSERT(next_x <= sw);
488
489 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
490 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
491 *dst++ = SkToU8((outer_sum * outer_scale
492 + inner_sum * inner_scale) >> 24);
493 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
494 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
495 *dst++ = SkToU8((outer_sum * outer_scale
496 + inner_sum * inner_scale) >> 24);
497 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
498 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
499 *dst++ = SkToU8((outer_sum * outer_scale
500 + inner_sum * inner_scale) >> 24);
501 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
502 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
503 *dst++ = SkToU8((outer_sum * outer_scale
504 + inner_sum * inner_scale) >> 24);
505
506 prev_x += 4;
507 next_x += 4;
508 }
509#endif
510
tomhudson@google.com8caac642011-11-22 15:58:06 +0000511 for (; x < dw - 2*rx; x++) {
512 SkASSERT(prev_x >= 0);
513 SkASSERT(next_x <= sw);
514
tomhudson@google.com01224d52011-11-28 18:22:01 +0000515 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
516 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000517 *dst++ = SkToU8((outer_sum * outer_scale
518 + inner_sum * inner_scale) >> 24);
519
520 prev_x += 1;
521 next_x += 1;
522 }
523
524 for (; x < dw; x++) {
525 SkASSERT(prev_x >= 0);
526 SkASSERT(next_x > sw);
527
528 int px = prev_x;
529 int nx = sw;
530
531 int ipx = prev_x + 1;
532 int inx = sw;
533
534 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
535 - sum[nx+py] - sum[px+ny];
536 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
537 - sum[inx+ipy] - sum[ipx+iny];
538 *dst++ = SkToU8((outer_sum * outer_scale
539 + inner_sum * inner_scale) >> 24);
540
541 prev_x += 1;
542 next_x += 1;
543 }
544
545 prev_y += 1;
546 next_y += 1;
547 }
548}
549
reed@android.com8a1c16f2008-12-17 15:59:43 +0000550#include "SkColorPriv.h"
551
reed@android.com0e3c6642009-09-18 13:41:56 +0000552static void merge_src_with_blur(uint8_t dst[], int dstRB,
553 const uint8_t src[], int srcRB,
554 const uint8_t blur[], int blurRB,
555 int sw, int sh) {
556 dstRB -= sw;
557 srcRB -= sw;
558 blurRB -= sw;
559 while (--sh >= 0) {
560 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000561 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
562 dst += 1;
563 src += 1;
564 blur += 1;
565 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000566 dst += dstRB;
567 src += srcRB;
568 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000569 }
570}
571
572static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000573 const uint8_t src[], int srcRowBytes,
574 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000575 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000576 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000577 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000578 switch (style) {
579 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000580 for (x = sw - 1; x >= 0; --x) {
581 int s = *src;
582 int d = *dst;
583 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000584 dst += 1;
585 src += 1;
586 }
587 break;
588 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000589 for (x = sw - 1; x >= 0; --x) {
590 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000591 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000592 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000593 dst += 1;
594 src += 1;
595 }
596 break;
597 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000598 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000599 break;
600 }
601 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000602 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000603 }
604}
605
reed@google.com03016a32011-08-12 14:59:59 +0000606///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000607
608// we use a local funciton to wrap the class static method to work around
609// a bug in gcc98
610void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000611void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000612 SkMask::FreeImage(image);
613}
614
615bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000616 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000617 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000618{
reed@google.com03016a32011-08-12 14:59:59 +0000619 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000620 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000621 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000622
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000623 // Force high quality off for small radii (performance)
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000624 if (radius < SkIntToScalar(3) && !separable) quality = kLow_Quality;
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000625
626 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
627 int passCount = (quality == kHigh_Quality) ? 3 : 1;
628 SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
629
630 int rx = SkScalarCeil(passRadius);
631 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000632
633 SkASSERT(rx >= 0);
634 SkASSERT((unsigned)outer_weight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000635 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000636 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000637 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000638
639 int ry = rx; // only do square blur for now
640
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000641 int padx = passCount * rx;
642 int pady = passCount * ry;
bungeman@google.com5af16f82011-09-02 15:06:44 +0000643 if (margin) {
644 margin->set(padx, pady);
645 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000646 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
647 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
reed@android.com49f0ff22009-03-19 21:52:42 +0000648 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000649 dst->fFormat = SkMask::kA8_Format;
650 dst->fImage = NULL;
651
reed@android.com0e3c6642009-09-18 13:41:56 +0000652 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +0000653 size_t dstSize = dst->computeImageSize();
654 if (0 == dstSize) {
655 return false; // too big to allocate, abort
656 }
657
reed@android.com8a1c16f2008-12-17 15:59:43 +0000658 int sw = src.fBounds.width();
659 int sh = src.fBounds.height();
660 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +0000661 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000662
663 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
664
665 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000666 if (separable) {
667 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
668 uint8_t* tp = tmpBuffer.get();
669 int w = sw, h = sh;
670
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000671 if (quality == kHigh_Quality) {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000672 int loRadius, hiRadius;
673 get_adjusted_radii(passRadius, &loRadius, &hiRadius);
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000674 // Do three X blurs, with a transpose on the final one.
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000675 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
676 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);
677 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000678 // Do three Y blurs, with a transpose on the final one.
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000679 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);
680 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);
681 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000682 } else {
senorblanco@chromium.orgc4381302012-11-16 17:22:33 +0000683 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
684 h = boxBlur(tp, h, dp, ry, ry, h, w, true);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000685 }
686 } else {
reed@google.com03016a32011-08-12 14:59:59 +0000687 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
688 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
689 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000690 uint32_t* sumBuffer = storage.get();
691
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000692 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +0000693 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
reed@google.com03016a32011-08-12 14:59:59 +0000694 if (outer_weight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000695 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +0000696 } else {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000697 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
reed@google.com03016a32011-08-12 14:59:59 +0000698 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000699
reed@google.com03016a32011-08-12 14:59:59 +0000700 if (quality == kHigh_Quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000701 //pass2: dp is source, tmpBuffer is destination
702 int tmp_sw = sw + 2 * rx;
703 int tmp_sh = sh + 2 * ry;
704 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
705 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
706 if (outer_weight == 255)
707 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
708 else
reed@google.com03016a32011-08-12 14:59:59 +0000709 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
710 tmp_sw, tmp_sh, outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000711
712 //pass3: tmpBuffer is source, dp is destination
713 tmp_sw += 2 * rx;
714 tmp_sh += 2 * ry;
715 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
716 if (outer_weight == 255)
717 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
718 else
reed@google.com03016a32011-08-12 14:59:59 +0000719 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
720 outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000721 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000722 }
723
724 dst->fImage = dp;
725 // if need be, alloc the "real" dst (same size as src) and copy/merge
726 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +0000727 if (style == kInner_Style) {
728 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +0000729 size_t srcSize = src.computeImageSize();
730 if (0 == srcSize) {
731 return false; // too big to allocate, abort
732 }
733 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +0000734 merge_src_with_blur(dst->fImage, src.fRowBytes,
735 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +0000736 dp + passCount * (rx + ry * dst->fRowBytes),
737 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000738 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +0000739 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +0000740 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
741 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000742 }
743 (void)autoCall.detach();
744 }
745
reed@android.com0e3c6642009-09-18 13:41:56 +0000746 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000747 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +0000748 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000749 }
750
reed@android.com8a1c16f2008-12-17 15:59:43 +0000751 return true;
752}
753
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000754bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
755 SkScalar radius, Style style, Quality quality,
756 SkIPoint* margin)
757{
758 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
759}
760
761bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
762 SkScalar radius, Style style, Quality quality,
763 SkIPoint* margin)
764{
765 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
766}