blob: 95a1e6b2e5f4aeb66228cdc3da83aa3805fb42e5 [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
15// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
16// breakeven on Mac, and ~15% slowdown on Linux.
17// Reading a word at a time when bulding the sum buffer seems to give
18// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +000019#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +000020#define UNROLL_KERNEL_LOOP 1
21#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +000022
reed@android.com45607672009-09-21 00:27:08 +000023/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
24 src values at their position, plus all values above and to the left.
25 When we sample into this buffer, we need an initial row and column of 0s,
26 so we have an index correspondence as follows:
27
28 src[i, j] == sum[i+1, j+1]
29 sum[0, j] == sum[i, 0] == 0
30
31 We assume that the sum buffer's stride == its width
32 */
reed@google.com03016a32011-08-12 14:59:59 +000033static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
34 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +000035 int sumW = srcW + 1;
36
37 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +000038 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +000039 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +000040
41 int x, y;
42
reed@android.com45607672009-09-21 00:27:08 +000043 // zero out the top row and column
44 memset(sum, 0, sumW * sizeof(sum[0]));
45 sum += sumW;
46
reed@android.com8a1c16f2008-12-17 15:59:43 +000047 // special case first row
48 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +000049 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +000050 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +000051 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +000052 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +000053 }
54 src += srcRB;
55
56 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +000057 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +000058 uint32_t L = 0;
59 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +000060 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +000061
62 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
63 uint32_t T = sum[-sumW];
64 X = *src++ + L + T - C;
65 *sum++ = X;
66 L = X;
67 C = T;
68 }
69
70 for (; x >= 4; x-=4) {
71 uint32_t T = sum[-sumW];
72 X = *src++ + L + T - C;
73 *sum++ = X;
74 L = X;
75 C = T;
76 T = sum[-sumW];
77 X = *src++ + L + T - C;
78 *sum++ = X;
79 L = X;
80 C = T;
81 T = sum[-sumW];
82 X = *src++ + L + T - C;
83 *sum++ = X;
84 L = X;
85 C = T;
86 T = sum[-sumW];
87 X = *src++ + L + T - C;
88 *sum++ = X;
89 L = X;
90 C = T;
91 }
92
93 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +000094 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +000095 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +000096 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +000097 L = X;
98 C = T;
99 }
100 src += srcRB;
101 }
102}
103
reed@google.com03016a32011-08-12 14:59:59 +0000104/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000105 * This is the path for apply_kernel() to be taken when the kernel
106 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000107 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000108static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
109 int sw, int sh) {
110 SkASSERT(2*rx > sw);
111
reed@android.com8a1c16f2008-12-17 15:59:43 +0000112 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
113
reed@android.com45607672009-09-21 00:27:08 +0000114 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000115
116 int dw = sw + 2*rx;
117 int dh = sh + 2*ry;
118
reed@android.com45607672009-09-21 00:27:08 +0000119 int prev_y = -2*ry;
120 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000121
reed@android.com45607672009-09-21 00:27:08 +0000122 for (int y = 0; y < dh; y++) {
123 int py = SkClampPos(prev_y) * sumStride;
124 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000125
reed@android.com45607672009-09-21 00:27:08 +0000126 int prev_x = -2*rx;
127 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000128
reed@android.com45607672009-09-21 00:27:08 +0000129 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000130 int px = SkClampPos(prev_x);
131 int nx = SkFastMin32(next_x, sw);
132
reed@android.com45607672009-09-21 00:27:08 +0000133 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
134 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000135
136 prev_x += 1;
137 next_x += 1;
138 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000139
140 prev_y += 1;
141 next_y += 1;
142 }
143}
144/**
145 * sw and sh are the width and height of the src. Since the sum buffer
146 * matches that, but has an extra row and col at the beginning (with zeros),
147 * we can just use sw and sh as our "max" values for pinning coordinates
148 * when sampling into sum[][]
149 *
150 * The inner loop is conceptually simple; we break it into several sections
151 * to improve performance. Here's the original version:
152 for (int x = 0; x < dw; x++) {
153 int px = SkClampPos(prev_x);
154 int nx = SkFastMin32(next_x, sw);
155
156 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
157 *dst++ = SkToU8(tmp * scale >> 24);
158
159 prev_x += 1;
160 next_x += 1;
161 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000162 * The sections are:
163 * left-hand section, where prev_x is clamped to 0
164 * center section, where neither prev_x nor next_x is clamped
165 * right-hand section, where next_x is clamped to sw
166 * On some operating systems, the center section is unrolled for additional
167 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000168*/
169static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
170 int sw, int sh) {
171 if (2*rx > sw) {
172 kernel_clamped(dst, rx, ry, sum, sw, sh);
173 return;
174 }
175
176 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
177
178 int sumStride = sw + 1;
179
180 int dw = sw + 2*rx;
181 int dh = sh + 2*ry;
182
183 int prev_y = -2*ry;
184 int next_y = 1;
185
186 SkASSERT(2*rx <= dw - 2*rx);
187
188 for (int y = 0; y < dh; y++) {
189 int py = SkClampPos(prev_y) * sumStride;
190 int ny = SkFastMin32(next_y, sh) * sumStride;
191
192 int prev_x = -2*rx;
193 int next_x = 1;
194 int x = 0;
195
196 for (; x < 2*rx; x++) {
197 SkASSERT(prev_x <= 0);
198 SkASSERT(next_x <= sw);
199
200 int px = 0;
201 int nx = next_x;
202
203 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
204 *dst++ = SkToU8(tmp * scale >> 24);
205
206 prev_x += 1;
207 next_x += 1;
208 }
209
tomhudson@google.com01224d52011-11-28 18:22:01 +0000210 int i0 = prev_x + py;
211 int i1 = next_x + ny;
212 int i2 = next_x + py;
213 int i3 = prev_x + ny;
214
215#if UNROLL_KERNEL_LOOP
216 for (; x < dw - 2*rx - 4; x += 4) {
217 SkASSERT(prev_x >= 0);
218 SkASSERT(next_x <= sw);
219
220 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
221 *dst++ = SkToU8(tmp * scale >> 24);
222 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
223 *dst++ = SkToU8(tmp * scale >> 24);
224 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
225 *dst++ = SkToU8(tmp * scale >> 24);
226 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
227 *dst++ = SkToU8(tmp * scale >> 24);
228
229 prev_x += 4;
230 next_x += 4;
231 }
232#endif
233
tomhudson@google.com8caac642011-11-22 15:58:06 +0000234 for (; x < dw - 2*rx; x++) {
235 SkASSERT(prev_x >= 0);
236 SkASSERT(next_x <= sw);
237
tomhudson@google.com01224d52011-11-28 18:22:01 +0000238 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000239 *dst++ = SkToU8(tmp * scale >> 24);
240
241 prev_x += 1;
242 next_x += 1;
243 }
244
245 for (; x < dw; x++) {
246 SkASSERT(prev_x >= 0);
247 SkASSERT(next_x > sw);
248
249 int px = prev_x;
250 int nx = sw;
251
252 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
253 *dst++ = SkToU8(tmp * scale >> 24);
254
255 prev_x += 1;
256 next_x += 1;
257 }
258
reed@android.com8a1c16f2008-12-17 15:59:43 +0000259 prev_y += 1;
260 next_y += 1;
261 }
262}
263
reed@google.com03016a32011-08-12 14:59:59 +0000264/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000265 * This is the path for apply_kernel_interp() to be taken when the kernel
266 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000267 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000268static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
reed@android.com45607672009-09-21 00:27:08 +0000269 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000270 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000271
272 int inner_weight = 255 - outer_weight;
273
274 // round these guys up if they're bigger than 127
275 outer_weight += outer_weight >> 7;
276 inner_weight += inner_weight >> 7;
277
278 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
279 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
280
reed@android.com45607672009-09-21 00:27:08 +0000281 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000282
283 int dw = sw + 2*rx;
284 int dh = sh + 2*ry;
285
reed@android.com45607672009-09-21 00:27:08 +0000286 int prev_y = -2*ry;
287 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000288
reed@android.com45607672009-09-21 00:27:08 +0000289 for (int y = 0; y < dh; y++) {
290 int py = SkClampPos(prev_y) * sumStride;
291 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000292
reed@android.com45607672009-09-21 00:27:08 +0000293 int ipy = SkClampPos(prev_y + 1) * sumStride;
294 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000295
reed@android.com45607672009-09-21 00:27:08 +0000296 int prev_x = -2*rx;
297 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000298
reed@android.com45607672009-09-21 00:27:08 +0000299 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000300 int px = SkClampPos(prev_x);
301 int nx = SkFastMin32(next_x, sw);
302
303 int ipx = SkClampPos(prev_x + 1);
304 int inx = SkClampMax(next_x - 1, sw);
305
tomhudson@google.com8caac642011-11-22 15:58:06 +0000306 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
307 - sum[nx+py] - sum[px+ny];
308 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
309 - sum[inx+ipy] - sum[ipx+iny];
310 *dst++ = SkToU8((outer_sum * outer_scale
311 + inner_sum * inner_scale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000312
313 prev_x += 1;
314 next_x += 1;
315 }
316 prev_y += 1;
317 next_y += 1;
318 }
319}
320
tomhudson@google.com8caac642011-11-22 15:58:06 +0000321/**
322 * sw and sh are the width and height of the src. Since the sum buffer
323 * matches that, but has an extra row and col at the beginning (with zeros),
324 * we can just use sw and sh as our "max" values for pinning coordinates
325 * when sampling into sum[][]
326 *
327 * The inner loop is conceptually simple; we break it into several variants
328 * to improve performance. Here's the original version:
329 for (int x = 0; x < dw; x++) {
330 int px = SkClampPos(prev_x);
331 int nx = SkFastMin32(next_x, sw);
332
333 int ipx = SkClampPos(prev_x + 1);
334 int inx = SkClampMax(next_x - 1, sw);
335
336 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
337 - sum[nx+py] - sum[px+ny];
338 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
339 - sum[inx+ipy] - sum[ipx+iny];
340 *dst++ = SkToU8((outer_sum * outer_scale
341 + inner_sum * inner_scale) >> 24);
342
343 prev_x += 1;
344 next_x += 1;
345 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000346 * The sections are:
347 * left-hand section, where prev_x is clamped to 0
348 * center section, where neither prev_x nor next_x is clamped
349 * right-hand section, where next_x is clamped to sw
350 * On some operating systems, the center section is unrolled for additional
351 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000352*/
353static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
354 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
355 SkASSERT(rx > 0 && ry > 0);
356 SkASSERT(outer_weight <= 255);
357
358 if (2*rx > sw) {
359 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
360 return;
361 }
362
363 int inner_weight = 255 - outer_weight;
364
365 // round these guys up if they're bigger than 127
366 outer_weight += outer_weight >> 7;
367 inner_weight += inner_weight >> 7;
368
369 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
370 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
371
372 int sumStride = sw + 1;
373
374 int dw = sw + 2*rx;
375 int dh = sh + 2*ry;
376
377 int prev_y = -2*ry;
378 int next_y = 1;
379
380 SkASSERT(2*rx <= dw - 2*rx);
381
382 for (int y = 0; y < dh; y++) {
383 int py = SkClampPos(prev_y) * sumStride;
384 int ny = SkFastMin32(next_y, sh) * sumStride;
385
386 int ipy = SkClampPos(prev_y + 1) * sumStride;
387 int iny = SkClampMax(next_y - 1, sh) * sumStride;
388
389 int prev_x = -2*rx;
390 int next_x = 1;
391 int x = 0;
392
393 for (; x < 2*rx; x++) {
394 SkASSERT(prev_x < 0);
395 SkASSERT(next_x <= sw);
396
397 int px = 0;
398 int nx = next_x;
399
400 int ipx = 0;
401 int inx = next_x - 1;
402
403 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
404 - sum[nx+py] - sum[px+ny];
405 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
406 - sum[inx+ipy] - sum[ipx+iny];
407 *dst++ = SkToU8((outer_sum * outer_scale
408 + inner_sum * inner_scale) >> 24);
409
410 prev_x += 1;
411 next_x += 1;
412 }
413
tomhudson@google.com01224d52011-11-28 18:22:01 +0000414 int i0 = prev_x + py;
415 int i1 = next_x + ny;
416 int i2 = next_x + py;
417 int i3 = prev_x + ny;
418 int i4 = prev_x + 1 + ipy;
419 int i5 = next_x - 1 + iny;
420 int i6 = next_x - 1 + ipy;
421 int i7 = prev_x + 1 + iny;
422
423#if UNROLL_KERNEL_LOOP
424 for (; x < dw - 2*rx - 4; x += 4) {
425 SkASSERT(prev_x >= 0);
426 SkASSERT(next_x <= sw);
427
428 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
429 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
430 *dst++ = SkToU8((outer_sum * outer_scale
431 + inner_sum * inner_scale) >> 24);
432 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
433 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
434 *dst++ = SkToU8((outer_sum * outer_scale
435 + inner_sum * inner_scale) >> 24);
436 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
437 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
438 *dst++ = SkToU8((outer_sum * outer_scale
439 + inner_sum * inner_scale) >> 24);
440 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
441 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
442 *dst++ = SkToU8((outer_sum * outer_scale
443 + inner_sum * inner_scale) >> 24);
444
445 prev_x += 4;
446 next_x += 4;
447 }
448#endif
449
tomhudson@google.com8caac642011-11-22 15:58:06 +0000450 for (; x < dw - 2*rx; x++) {
451 SkASSERT(prev_x >= 0);
452 SkASSERT(next_x <= sw);
453
tomhudson@google.com01224d52011-11-28 18:22:01 +0000454 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
455 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000456 *dst++ = SkToU8((outer_sum * outer_scale
457 + inner_sum * inner_scale) >> 24);
458
459 prev_x += 1;
460 next_x += 1;
461 }
462
463 for (; x < dw; x++) {
464 SkASSERT(prev_x >= 0);
465 SkASSERT(next_x > sw);
466
467 int px = prev_x;
468 int nx = sw;
469
470 int ipx = prev_x + 1;
471 int inx = sw;
472
473 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
474 - sum[nx+py] - sum[px+ny];
475 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
476 - sum[inx+ipy] - sum[ipx+iny];
477 *dst++ = SkToU8((outer_sum * outer_scale
478 + inner_sum * inner_scale) >> 24);
479
480 prev_x += 1;
481 next_x += 1;
482 }
483
484 prev_y += 1;
485 next_y += 1;
486 }
487}
488
reed@android.com8a1c16f2008-12-17 15:59:43 +0000489#include "SkColorPriv.h"
490
reed@android.com0e3c6642009-09-18 13:41:56 +0000491static void merge_src_with_blur(uint8_t dst[], int dstRB,
492 const uint8_t src[], int srcRB,
493 const uint8_t blur[], int blurRB,
494 int sw, int sh) {
495 dstRB -= sw;
496 srcRB -= sw;
497 blurRB -= sw;
498 while (--sh >= 0) {
499 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000500 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
501 dst += 1;
502 src += 1;
503 blur += 1;
504 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000505 dst += dstRB;
506 src += srcRB;
507 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000508 }
509}
510
511static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000512 const uint8_t src[], int srcRowBytes,
513 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000514 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000515 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000516 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000517 switch (style) {
518 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000519 for (x = sw - 1; x >= 0; --x) {
520 int s = *src;
521 int d = *dst;
522 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000523 dst += 1;
524 src += 1;
525 }
526 break;
527 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000528 for (x = sw - 1; x >= 0; --x) {
529 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000530 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000531 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000532 dst += 1;
533 src += 1;
534 }
535 break;
536 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000537 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000538 break;
539 }
540 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000541 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000542 }
543}
544
reed@google.com03016a32011-08-12 14:59:59 +0000545///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000546
547// we use a local funciton to wrap the class static method to work around
548// a bug in gcc98
549void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000550void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000551 SkMask::FreeImage(image);
552}
553
554bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000555 SkScalar radius, Style style, Quality quality,
556 SkIPoint* margin)
557{
reed@google.com03016a32011-08-12 14:59:59 +0000558 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000559 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000560 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000561
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000562 // Force high quality off for small radii (performance)
563 if (radius < SkIntToScalar(3)) quality = kLow_Quality;
564
565 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
566 int passCount = (quality == kHigh_Quality) ? 3 : 1;
567 SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
568
569 int rx = SkScalarCeil(passRadius);
570 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000571
572 SkASSERT(rx >= 0);
573 SkASSERT((unsigned)outer_weight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000574 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000575 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000576 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000577
578 int ry = rx; // only do square blur for now
579
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000580 int padx = passCount * rx;
581 int pady = passCount * ry;
bungeman@google.com5af16f82011-09-02 15:06:44 +0000582 if (margin) {
583 margin->set(padx, pady);
584 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000585 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
586 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
reed@android.com49f0ff22009-03-19 21:52:42 +0000587 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000588 dst->fFormat = SkMask::kA8_Format;
589 dst->fImage = NULL;
590
reed@android.com0e3c6642009-09-18 13:41:56 +0000591 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +0000592 size_t dstSize = dst->computeImageSize();
593 if (0 == dstSize) {
594 return false; // too big to allocate, abort
595 }
596
reed@android.com8a1c16f2008-12-17 15:59:43 +0000597 int sw = src.fBounds.width();
598 int sh = src.fBounds.height();
599 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +0000600 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000601
602 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
603
604 // build the blurry destination
605 {
reed@google.com03016a32011-08-12 14:59:59 +0000606 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
607 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
608 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000609 uint32_t* sumBuffer = storage.get();
610
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000611 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +0000612 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
reed@google.com03016a32011-08-12 14:59:59 +0000613 if (outer_weight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000614 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +0000615 } else {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000616 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
reed@google.com03016a32011-08-12 14:59:59 +0000617 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000618
reed@google.com03016a32011-08-12 14:59:59 +0000619 if (quality == kHigh_Quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000620 //pass2: dp is source, tmpBuffer is destination
621 int tmp_sw = sw + 2 * rx;
622 int tmp_sh = sh + 2 * ry;
623 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
624 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
625 if (outer_weight == 255)
626 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
627 else
reed@google.com03016a32011-08-12 14:59:59 +0000628 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
629 tmp_sw, tmp_sh, outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000630
631 //pass3: tmpBuffer is source, dp is destination
632 tmp_sw += 2 * rx;
633 tmp_sh += 2 * ry;
634 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
635 if (outer_weight == 255)
636 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
637 else
reed@google.com03016a32011-08-12 14:59:59 +0000638 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
639 outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000640 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000641 }
642
643 dst->fImage = dp;
644 // if need be, alloc the "real" dst (same size as src) and copy/merge
645 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +0000646 if (style == kInner_Style) {
647 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +0000648 size_t srcSize = src.computeImageSize();
649 if (0 == srcSize) {
650 return false; // too big to allocate, abort
651 }
652 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +0000653 merge_src_with_blur(dst->fImage, src.fRowBytes,
654 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +0000655 dp + passCount * (rx + ry * dst->fRowBytes),
656 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000657 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +0000658 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +0000659 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
660 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000661 }
662 (void)autoCall.detach();
663 }
664
reed@android.com0e3c6642009-09-18 13:41:56 +0000665 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000666 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +0000667 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000668 }
669
reed@android.com8a1c16f2008-12-17 15:59:43 +0000670 return true;
671}
672