blob: 1e76acf27d19ee2f23365691ec26d7e210754d06 [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000015static int boxBlurX(const uint8_t* src, int src_row_bytes,
16 uint8_t* dst, int dst_row_bytes,
17 int radius, int width, int height)
18{
19 int kernelSize = radius * 2 + 1;
20 int border = SkMin32(width, radius * 2);
21 uint32_t scale = (1 << 24) / kernelSize;
22 for (int y = 0; y < height; ++y) {
23 int sum = 0;
24 uint8_t* dptr = dst + y * dst_row_bytes;
25 const uint8_t* sptr = src + y * src_row_bytes - radius;
26 for (int x = 0; x < border; ++x) {
27 sum += *(sptr + radius);
28 *dptr++ = (sum * scale) >> 24;
29 sptr++;
30 }
31 for (int x = width; x < radius * 2; ++x) {
32 *dptr++ = (sum * scale) >> 24;
33 sptr++;
34 }
35 for (int x = radius * 2; x < width; ++x) {
36 sum += *(sptr + radius);
37 *dptr++ = (sum * scale) >> 24;
38 sum -= *(sptr - radius);
39 sptr++;
40 }
41 for (int x = 0; x < border; ++x) {
42 *dptr++ = (sum * scale) >> 24;
43 sum -= *(sptr - radius);
44 sptr++;
45 }
46 SkASSERT(sum == 0);
47 }
48 return width + radius * 2;
49}
50
51static int boxBlurY(const uint8_t* src, int src_row_bytes,
52 uint8_t* dst, int dst_row_bytes,
53 int radius, int width, int height)
54{
55 int kernelSize = radius * 2 + 1;
56 uint32_t scale = (1 << 24) / kernelSize;
57 int border = SkMin32(height, radius * 2);
58 for (int x = 0; x < width; ++x) {
59 int sum = 0;
60 uint8_t* dptr = dst + x;
61 const uint8_t* sptr = src + x - radius * src_row_bytes;
62 for (int y = 0; y < border; ++y) {
63 sum += *(sptr + radius * src_row_bytes);
64 *dptr = (sum * scale) >> 24;
65 sptr += src_row_bytes;
66 dptr += dst_row_bytes;
67 }
68 for (int y = height; y < radius * 2; ++y) {
69 *dptr = (sum * scale) >> 24;
70 sptr += src_row_bytes;
71 dptr += dst_row_bytes;
72 }
73 for (int y = radius * 2; y < height; ++y) {
74 sum += *(sptr + radius * src_row_bytes);
75 *dptr = (sum * scale) >> 24;
76 sum -= *(sptr - radius * src_row_bytes);
77 sptr += src_row_bytes;
78 dptr += dst_row_bytes;
79 }
80 for (int y = 0; y < border; ++y) {
81 *dptr = (sum * scale) >> 24;
82 sum -= *(sptr - radius * src_row_bytes);
83 sptr += src_row_bytes;
84 dptr += dst_row_bytes;
85 }
86 SkASSERT(sum == 0);
87 }
88 return height + radius * 2;
89}
90
tomhudson@google.com01224d52011-11-28 18:22:01 +000091// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
92// breakeven on Mac, and ~15% slowdown on Linux.
93// Reading a word at a time when bulding the sum buffer seems to give
94// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +000095#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +000096#define UNROLL_KERNEL_LOOP 1
97#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +000098
reed@android.com45607672009-09-21 00:27:08 +000099/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
100 src values at their position, plus all values above and to the left.
101 When we sample into this buffer, we need an initial row and column of 0s,
102 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000103
reed@android.com45607672009-09-21 00:27:08 +0000104 src[i, j] == sum[i+1, j+1]
105 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000106
reed@android.com45607672009-09-21 00:27:08 +0000107 We assume that the sum buffer's stride == its width
108 */
reed@google.com03016a32011-08-12 14:59:59 +0000109static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
110 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +0000111 int sumW = srcW + 1;
112
113 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000114 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +0000115 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000116
117 int x, y;
118
reed@android.com45607672009-09-21 00:27:08 +0000119 // zero out the top row and column
120 memset(sum, 0, sumW * sizeof(sum[0]));
121 sum += sumW;
122
reed@android.com8a1c16f2008-12-17 15:59:43 +0000123 // special case first row
124 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +0000125 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +0000126 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000127 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +0000128 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000129 }
130 src += srcRB;
131
132 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000133 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000134 uint32_t L = 0;
135 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000136 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000137
138 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
139 uint32_t T = sum[-sumW];
140 X = *src++ + L + T - C;
141 *sum++ = X;
142 L = X;
143 C = T;
144 }
145
146 for (; x >= 4; x-=4) {
147 uint32_t T = sum[-sumW];
148 X = *src++ + L + T - C;
149 *sum++ = X;
150 L = X;
151 C = T;
152 T = sum[-sumW];
153 X = *src++ + L + T - C;
154 *sum++ = X;
155 L = X;
156 C = T;
157 T = sum[-sumW];
158 X = *src++ + L + T - C;
159 *sum++ = X;
160 L = X;
161 C = T;
162 T = sum[-sumW];
163 X = *src++ + L + T - C;
164 *sum++ = X;
165 L = X;
166 C = T;
167 }
168
169 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000170 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000171 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000172 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000173 L = X;
174 C = T;
175 }
176 src += srcRB;
177 }
178}
179
reed@google.com03016a32011-08-12 14:59:59 +0000180/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000181 * This is the path for apply_kernel() to be taken when the kernel
182 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000183 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000184static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
185 int sw, int sh) {
186 SkASSERT(2*rx > sw);
187
reed@android.com8a1c16f2008-12-17 15:59:43 +0000188 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
189
reed@android.com45607672009-09-21 00:27:08 +0000190 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000191
192 int dw = sw + 2*rx;
193 int dh = sh + 2*ry;
194
reed@android.com45607672009-09-21 00:27:08 +0000195 int prev_y = -2*ry;
196 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000197
reed@android.com45607672009-09-21 00:27:08 +0000198 for (int y = 0; y < dh; y++) {
199 int py = SkClampPos(prev_y) * sumStride;
200 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000201
reed@android.com45607672009-09-21 00:27:08 +0000202 int prev_x = -2*rx;
203 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000204
reed@android.com45607672009-09-21 00:27:08 +0000205 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000206 int px = SkClampPos(prev_x);
207 int nx = SkFastMin32(next_x, sw);
208
reed@android.com45607672009-09-21 00:27:08 +0000209 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
210 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000211
212 prev_x += 1;
213 next_x += 1;
214 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000215
216 prev_y += 1;
217 next_y += 1;
218 }
219}
220/**
221 * sw and sh are the width and height of the src. Since the sum buffer
222 * matches that, but has an extra row and col at the beginning (with zeros),
223 * we can just use sw and sh as our "max" values for pinning coordinates
224 * when sampling into sum[][]
225 *
226 * The inner loop is conceptually simple; we break it into several sections
227 * to improve performance. Here's the original version:
228 for (int x = 0; x < dw; x++) {
229 int px = SkClampPos(prev_x);
230 int nx = SkFastMin32(next_x, sw);
231
232 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
233 *dst++ = SkToU8(tmp * scale >> 24);
234
235 prev_x += 1;
236 next_x += 1;
237 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000238 * The sections are:
239 * left-hand section, where prev_x is clamped to 0
240 * center section, where neither prev_x nor next_x is clamped
241 * right-hand section, where next_x is clamped to sw
242 * On some operating systems, the center section is unrolled for additional
243 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000244*/
245static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
246 int sw, int sh) {
247 if (2*rx > sw) {
248 kernel_clamped(dst, rx, ry, sum, sw, sh);
249 return;
250 }
251
252 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
253
254 int sumStride = sw + 1;
255
256 int dw = sw + 2*rx;
257 int dh = sh + 2*ry;
258
259 int prev_y = -2*ry;
260 int next_y = 1;
261
262 SkASSERT(2*rx <= dw - 2*rx);
263
264 for (int y = 0; y < dh; y++) {
265 int py = SkClampPos(prev_y) * sumStride;
266 int ny = SkFastMin32(next_y, sh) * sumStride;
267
268 int prev_x = -2*rx;
269 int next_x = 1;
270 int x = 0;
271
272 for (; x < 2*rx; x++) {
273 SkASSERT(prev_x <= 0);
274 SkASSERT(next_x <= sw);
275
276 int px = 0;
277 int nx = next_x;
278
279 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
280 *dst++ = SkToU8(tmp * scale >> 24);
281
282 prev_x += 1;
283 next_x += 1;
284 }
285
tomhudson@google.com01224d52011-11-28 18:22:01 +0000286 int i0 = prev_x + py;
287 int i1 = next_x + ny;
288 int i2 = next_x + py;
289 int i3 = prev_x + ny;
290
291#if UNROLL_KERNEL_LOOP
292 for (; x < dw - 2*rx - 4; x += 4) {
293 SkASSERT(prev_x >= 0);
294 SkASSERT(next_x <= sw);
295
296 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
297 *dst++ = SkToU8(tmp * scale >> 24);
298 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
299 *dst++ = SkToU8(tmp * scale >> 24);
300 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
301 *dst++ = SkToU8(tmp * scale >> 24);
302 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
303 *dst++ = SkToU8(tmp * scale >> 24);
304
305 prev_x += 4;
306 next_x += 4;
307 }
308#endif
309
tomhudson@google.com8caac642011-11-22 15:58:06 +0000310 for (; x < dw - 2*rx; x++) {
311 SkASSERT(prev_x >= 0);
312 SkASSERT(next_x <= sw);
313
tomhudson@google.com01224d52011-11-28 18:22:01 +0000314 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000315 *dst++ = SkToU8(tmp * scale >> 24);
316
317 prev_x += 1;
318 next_x += 1;
319 }
320
321 for (; x < dw; x++) {
322 SkASSERT(prev_x >= 0);
323 SkASSERT(next_x > sw);
324
325 int px = prev_x;
326 int nx = sw;
327
328 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
329 *dst++ = SkToU8(tmp * scale >> 24);
330
331 prev_x += 1;
332 next_x += 1;
333 }
334
reed@android.com8a1c16f2008-12-17 15:59:43 +0000335 prev_y += 1;
336 next_y += 1;
337 }
338}
339
reed@google.com03016a32011-08-12 14:59:59 +0000340/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000341 * This is the path for apply_kernel_interp() to be taken when the kernel
342 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000343 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000344static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
reed@android.com45607672009-09-21 00:27:08 +0000345 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000346 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000347
348 int inner_weight = 255 - outer_weight;
349
350 // round these guys up if they're bigger than 127
351 outer_weight += outer_weight >> 7;
352 inner_weight += inner_weight >> 7;
353
354 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
355 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
356
reed@android.com45607672009-09-21 00:27:08 +0000357 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000358
359 int dw = sw + 2*rx;
360 int dh = sh + 2*ry;
361
reed@android.com45607672009-09-21 00:27:08 +0000362 int prev_y = -2*ry;
363 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000364
reed@android.com45607672009-09-21 00:27:08 +0000365 for (int y = 0; y < dh; y++) {
366 int py = SkClampPos(prev_y) * sumStride;
367 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000368
reed@android.com45607672009-09-21 00:27:08 +0000369 int ipy = SkClampPos(prev_y + 1) * sumStride;
370 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000371
reed@android.com45607672009-09-21 00:27:08 +0000372 int prev_x = -2*rx;
373 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000374
reed@android.com45607672009-09-21 00:27:08 +0000375 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000376 int px = SkClampPos(prev_x);
377 int nx = SkFastMin32(next_x, sw);
378
379 int ipx = SkClampPos(prev_x + 1);
380 int inx = SkClampMax(next_x - 1, sw);
381
tomhudson@google.com8caac642011-11-22 15:58:06 +0000382 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
383 - sum[nx+py] - sum[px+ny];
384 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
385 - sum[inx+ipy] - sum[ipx+iny];
386 *dst++ = SkToU8((outer_sum * outer_scale
387 + inner_sum * inner_scale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000388
389 prev_x += 1;
390 next_x += 1;
391 }
392 prev_y += 1;
393 next_y += 1;
394 }
395}
396
tomhudson@google.com8caac642011-11-22 15:58:06 +0000397/**
398 * sw and sh are the width and height of the src. Since the sum buffer
399 * matches that, but has an extra row and col at the beginning (with zeros),
400 * we can just use sw and sh as our "max" values for pinning coordinates
401 * when sampling into sum[][]
402 *
403 * The inner loop is conceptually simple; we break it into several variants
404 * to improve performance. Here's the original version:
405 for (int x = 0; x < dw; x++) {
406 int px = SkClampPos(prev_x);
407 int nx = SkFastMin32(next_x, sw);
408
409 int ipx = SkClampPos(prev_x + 1);
410 int inx = SkClampMax(next_x - 1, sw);
411
412 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
413 - sum[nx+py] - sum[px+ny];
414 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
415 - sum[inx+ipy] - sum[ipx+iny];
416 *dst++ = SkToU8((outer_sum * outer_scale
417 + inner_sum * inner_scale) >> 24);
418
419 prev_x += 1;
420 next_x += 1;
421 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000422 * The sections are:
423 * left-hand section, where prev_x is clamped to 0
424 * center section, where neither prev_x nor next_x is clamped
425 * right-hand section, where next_x is clamped to sw
426 * On some operating systems, the center section is unrolled for additional
427 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000428*/
429static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
430 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
431 SkASSERT(rx > 0 && ry > 0);
432 SkASSERT(outer_weight <= 255);
433
434 if (2*rx > sw) {
435 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
436 return;
437 }
438
439 int inner_weight = 255 - outer_weight;
440
441 // round these guys up if they're bigger than 127
442 outer_weight += outer_weight >> 7;
443 inner_weight += inner_weight >> 7;
444
445 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
446 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
447
448 int sumStride = sw + 1;
449
450 int dw = sw + 2*rx;
451 int dh = sh + 2*ry;
452
453 int prev_y = -2*ry;
454 int next_y = 1;
455
456 SkASSERT(2*rx <= dw - 2*rx);
457
458 for (int y = 0; y < dh; y++) {
459 int py = SkClampPos(prev_y) * sumStride;
460 int ny = SkFastMin32(next_y, sh) * sumStride;
461
462 int ipy = SkClampPos(prev_y + 1) * sumStride;
463 int iny = SkClampMax(next_y - 1, sh) * sumStride;
464
465 int prev_x = -2*rx;
466 int next_x = 1;
467 int x = 0;
468
469 for (; x < 2*rx; x++) {
470 SkASSERT(prev_x < 0);
471 SkASSERT(next_x <= sw);
472
473 int px = 0;
474 int nx = next_x;
475
476 int ipx = 0;
477 int inx = next_x - 1;
478
479 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
480 - sum[nx+py] - sum[px+ny];
481 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
482 - sum[inx+ipy] - sum[ipx+iny];
483 *dst++ = SkToU8((outer_sum * outer_scale
484 + inner_sum * inner_scale) >> 24);
485
486 prev_x += 1;
487 next_x += 1;
488 }
489
tomhudson@google.com01224d52011-11-28 18:22:01 +0000490 int i0 = prev_x + py;
491 int i1 = next_x + ny;
492 int i2 = next_x + py;
493 int i3 = prev_x + ny;
494 int i4 = prev_x + 1 + ipy;
495 int i5 = next_x - 1 + iny;
496 int i6 = next_x - 1 + ipy;
497 int i7 = prev_x + 1 + iny;
498
499#if UNROLL_KERNEL_LOOP
500 for (; x < dw - 2*rx - 4; x += 4) {
501 SkASSERT(prev_x >= 0);
502 SkASSERT(next_x <= sw);
503
504 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
505 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
506 *dst++ = SkToU8((outer_sum * outer_scale
507 + inner_sum * inner_scale) >> 24);
508 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
509 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
510 *dst++ = SkToU8((outer_sum * outer_scale
511 + inner_sum * inner_scale) >> 24);
512 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
513 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
514 *dst++ = SkToU8((outer_sum * outer_scale
515 + inner_sum * inner_scale) >> 24);
516 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
517 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
518 *dst++ = SkToU8((outer_sum * outer_scale
519 + inner_sum * inner_scale) >> 24);
520
521 prev_x += 4;
522 next_x += 4;
523 }
524#endif
525
tomhudson@google.com8caac642011-11-22 15:58:06 +0000526 for (; x < dw - 2*rx; x++) {
527 SkASSERT(prev_x >= 0);
528 SkASSERT(next_x <= sw);
529
tomhudson@google.com01224d52011-11-28 18:22:01 +0000530 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
531 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000532 *dst++ = SkToU8((outer_sum * outer_scale
533 + inner_sum * inner_scale) >> 24);
534
535 prev_x += 1;
536 next_x += 1;
537 }
538
539 for (; x < dw; x++) {
540 SkASSERT(prev_x >= 0);
541 SkASSERT(next_x > sw);
542
543 int px = prev_x;
544 int nx = sw;
545
546 int ipx = prev_x + 1;
547 int inx = sw;
548
549 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
550 - sum[nx+py] - sum[px+ny];
551 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
552 - sum[inx+ipy] - sum[ipx+iny];
553 *dst++ = SkToU8((outer_sum * outer_scale
554 + inner_sum * inner_scale) >> 24);
555
556 prev_x += 1;
557 next_x += 1;
558 }
559
560 prev_y += 1;
561 next_y += 1;
562 }
563}
564
reed@android.com8a1c16f2008-12-17 15:59:43 +0000565#include "SkColorPriv.h"
566
reed@android.com0e3c6642009-09-18 13:41:56 +0000567static void merge_src_with_blur(uint8_t dst[], int dstRB,
568 const uint8_t src[], int srcRB,
569 const uint8_t blur[], int blurRB,
570 int sw, int sh) {
571 dstRB -= sw;
572 srcRB -= sw;
573 blurRB -= sw;
574 while (--sh >= 0) {
575 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000576 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
577 dst += 1;
578 src += 1;
579 blur += 1;
580 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000581 dst += dstRB;
582 src += srcRB;
583 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000584 }
585}
586
587static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000588 const uint8_t src[], int srcRowBytes,
589 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000590 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000591 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000592 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000593 switch (style) {
594 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000595 for (x = sw - 1; x >= 0; --x) {
596 int s = *src;
597 int d = *dst;
598 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000599 dst += 1;
600 src += 1;
601 }
602 break;
603 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000604 for (x = sw - 1; x >= 0; --x) {
605 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000606 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000607 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000608 dst += 1;
609 src += 1;
610 }
611 break;
612 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000613 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000614 break;
615 }
616 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000617 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000618 }
619}
620
reed@google.com03016a32011-08-12 14:59:59 +0000621///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000622
623// we use a local funciton to wrap the class static method to work around
624// a bug in gcc98
625void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000626void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000627 SkMask::FreeImage(image);
628}
629
630bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000631 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000632 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000633{
reed@google.com03016a32011-08-12 14:59:59 +0000634 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000635 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000636 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000637
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000638 // Force high quality off for small radii (performance)
639 if (radius < SkIntToScalar(3)) quality = kLow_Quality;
640
641 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
642 int passCount = (quality == kHigh_Quality) ? 3 : 1;
643 SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
644
645 int rx = SkScalarCeil(passRadius);
646 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000647
648 SkASSERT(rx >= 0);
649 SkASSERT((unsigned)outer_weight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000650 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000651 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000652 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000653
654 int ry = rx; // only do square blur for now
655
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000656 int padx = passCount * rx;
657 int pady = passCount * ry;
bungeman@google.com5af16f82011-09-02 15:06:44 +0000658 if (margin) {
659 margin->set(padx, pady);
660 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000661 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
662 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
reed@android.com49f0ff22009-03-19 21:52:42 +0000663 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000664 dst->fFormat = SkMask::kA8_Format;
665 dst->fImage = NULL;
666
reed@android.com0e3c6642009-09-18 13:41:56 +0000667 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +0000668 size_t dstSize = dst->computeImageSize();
669 if (0 == dstSize) {
670 return false; // too big to allocate, abort
671 }
672
reed@android.com8a1c16f2008-12-17 15:59:43 +0000673 int sw = src.fBounds.width();
674 int sh = src.fBounds.height();
675 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +0000676 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000677
678 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
679
680 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000681 if (separable) {
682 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
683 uint8_t* tp = tmpBuffer.get();
684 int w = sw, h = sh;
685
686 w = boxBlurX(sp, src.fRowBytes, tp, dst->fRowBytes, rx, w, h);
687 h = boxBlurY(tp, dst->fRowBytes, dp, dst->fRowBytes, ry, w, h);
688 if (quality == kHigh_Quality) {
689 w = boxBlurX(dp, dst->fRowBytes, tp, dst->fRowBytes, rx, w, h);
690 h = boxBlurY(tp, dst->fRowBytes, dp, dst->fRowBytes, ry, w, h);
691 w = boxBlurX(dp, dst->fRowBytes, tp, dst->fRowBytes, rx, w, h);
692 h = boxBlurY(tp, dst->fRowBytes, dp, dst->fRowBytes, ry, w, h);
693 }
694 } else {
reed@google.com03016a32011-08-12 14:59:59 +0000695 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
696 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
697 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000698 uint32_t* sumBuffer = storage.get();
699
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000700 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +0000701 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
reed@google.com03016a32011-08-12 14:59:59 +0000702 if (outer_weight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000703 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +0000704 } else {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000705 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
reed@google.com03016a32011-08-12 14:59:59 +0000706 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000707
reed@google.com03016a32011-08-12 14:59:59 +0000708 if (quality == kHigh_Quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000709 //pass2: dp is source, tmpBuffer is destination
710 int tmp_sw = sw + 2 * rx;
711 int tmp_sh = sh + 2 * ry;
712 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
713 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
714 if (outer_weight == 255)
715 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
716 else
reed@google.com03016a32011-08-12 14:59:59 +0000717 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
718 tmp_sw, tmp_sh, outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000719
720 //pass3: tmpBuffer is source, dp is destination
721 tmp_sw += 2 * rx;
722 tmp_sh += 2 * ry;
723 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
724 if (outer_weight == 255)
725 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
726 else
reed@google.com03016a32011-08-12 14:59:59 +0000727 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
728 outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000729 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000730 }
731
732 dst->fImage = dp;
733 // if need be, alloc the "real" dst (same size as src) and copy/merge
734 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +0000735 if (style == kInner_Style) {
736 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +0000737 size_t srcSize = src.computeImageSize();
738 if (0 == srcSize) {
739 return false; // too big to allocate, abort
740 }
741 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +0000742 merge_src_with_blur(dst->fImage, src.fRowBytes,
743 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +0000744 dp + passCount * (rx + ry * dst->fRowBytes),
745 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000746 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +0000747 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +0000748 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
749 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000750 }
751 (void)autoCall.detach();
752 }
753
reed@android.com0e3c6642009-09-18 13:41:56 +0000754 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000755 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +0000756 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000757 }
758
reed@android.com8a1c16f2008-12-17 15:59:43 +0000759 return true;
760}
761
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000762bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
763 SkScalar radius, Style style, Quality quality,
764 SkIPoint* margin)
765{
766 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
767}
768
769bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
770 SkScalar radius, Style style, Quality quality,
771 SkIPoint* margin)
772{
773 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
774}