blob: 54423ef555107fcd05c6d134470df60975031cd3 [file] [log] [blame]
epoger@google.comec3ed6a2011-07-28 14:26:00 +00001
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
reed@android.com8a1c16f2008-12-17 15:59:43 +00009
10#include "SkBlurMask.h"
tomhudson@google.com889bd8b2011-09-27 17:38:17 +000011#include "SkMath.h"
reed@android.com8a1c16f2008-12-17 15:59:43 +000012#include "SkTemplates.h"
tomhudson@google.com01224d52011-11-28 18:22:01 +000013#include "SkEndian.h"
14
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000015/**
16 * This function performs a box blur in X, of the given radius. If the
17 * "transpose" parameter is true, it will transpose the pixels on write,
18 * such that X and Y are swapped. Reads are always performed from contiguous
19 * memory in X, for speed. The destination buffer (dst) must be at least
20 * (width + radius * 2) * height bytes in size.
21 */
22static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
23 int radius, int width, int height, bool transpose)
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000024{
25 int kernelSize = radius * 2 + 1;
26 int border = SkMin32(width, radius * 2);
27 uint32_t scale = (1 << 24) / kernelSize;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000028 int new_width = width + radius * 2;
29 int dst_x_stride = transpose ? height : 1;
30 int dst_y_stride = transpose ? 1 : new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000031 for (int y = 0; y < height; ++y) {
32 int sum = 0;
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000033 uint8_t* dptr = dst + y * dst_y_stride;
34 const uint8_t* right = src + y * src_y_stride;
35 const uint8_t* left = right;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000036 for (int x = 0; x < border; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000037 sum += *right++;
38 *dptr = (sum * scale) >> 24;
39 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000040 }
41 for (int x = width; x < radius * 2; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000042 *dptr = (sum * scale) >> 24;
43 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000044 }
45 for (int x = radius * 2; x < width; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000046 sum += *right++;
47 *dptr = (sum * scale) >> 24;
48 sum -= *left++;
49 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000050 }
51 for (int x = 0; x < border; ++x) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000052 *dptr = (sum * scale) >> 24;
53 sum -= *left++;
54 dptr += dst_x_stride;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000055 }
56 SkASSERT(sum == 0);
57 }
senorblanco@chromium.org908276b2012-11-15 20:27:35 +000058 return new_width;
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +000059}
60
tomhudson@google.com01224d52011-11-28 18:22:01 +000061// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,
62// breakeven on Mac, and ~15% slowdown on Linux.
63// Reading a word at a time when bulding the sum buffer seems to give
64// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.
tomhudson@google.com054ff1e2012-01-11 19:29:08 +000065#if defined(SK_BUILD_FOR_WIN32)
tomhudson@google.com01224d52011-11-28 18:22:01 +000066#define UNROLL_KERNEL_LOOP 1
67#endif
reed@android.com8a1c16f2008-12-17 15:59:43 +000068
reed@android.com45607672009-09-21 00:27:08 +000069/** The sum buffer is an array of u32 to hold the accumulated sum of all of the
70 src values at their position, plus all values above and to the left.
71 When we sample into this buffer, we need an initial row and column of 0s,
72 so we have an index correspondence as follows:
rmistry@google.comfbfcd562012-08-23 18:09:54 +000073
reed@android.com45607672009-09-21 00:27:08 +000074 src[i, j] == sum[i+1, j+1]
75 sum[0, j] == sum[i, 0] == 0
rmistry@google.comfbfcd562012-08-23 18:09:54 +000076
reed@android.com45607672009-09-21 00:27:08 +000077 We assume that the sum buffer's stride == its width
78 */
reed@google.com03016a32011-08-12 14:59:59 +000079static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,
80 const uint8_t src[], int srcRB) {
reed@android.com45607672009-09-21 00:27:08 +000081 int sumW = srcW + 1;
82
83 SkASSERT(srcRB >= srcW);
reed@android.com8a1c16f2008-12-17 15:59:43 +000084 // mod srcRB so we can apply it after each row
reed@android.com45607672009-09-21 00:27:08 +000085 srcRB -= srcW;
reed@android.com8a1c16f2008-12-17 15:59:43 +000086
87 int x, y;
88
reed@android.com45607672009-09-21 00:27:08 +000089 // zero out the top row and column
90 memset(sum, 0, sumW * sizeof(sum[0]));
91 sum += sumW;
92
reed@android.com8a1c16f2008-12-17 15:59:43 +000093 // special case first row
94 uint32_t X = 0;
reed@android.com45607672009-09-21 00:27:08 +000095 *sum++ = 0; // initialze the first column to 0
reed@google.com03016a32011-08-12 14:59:59 +000096 for (x = srcW - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +000097 X = *src++ + X;
reed@android.com45607672009-09-21 00:27:08 +000098 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +000099 }
100 src += srcRB;
101
102 // now do the rest of the rows
reed@google.com03016a32011-08-12 14:59:59 +0000103 for (y = srcH - 1; y > 0; --y) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000104 uint32_t L = 0;
105 uint32_t C = 0;
reed@android.com45607672009-09-21 00:27:08 +0000106 *sum++ = 0; // initialze the first column to 0
tomhudson@google.com01224d52011-11-28 18:22:01 +0000107
108 for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {
109 uint32_t T = sum[-sumW];
110 X = *src++ + L + T - C;
111 *sum++ = X;
112 L = X;
113 C = T;
114 }
115
116 for (; x >= 4; x-=4) {
117 uint32_t T = sum[-sumW];
118 X = *src++ + L + T - C;
119 *sum++ = X;
120 L = X;
121 C = T;
122 T = sum[-sumW];
123 X = *src++ + L + T - C;
124 *sum++ = X;
125 L = X;
126 C = T;
127 T = sum[-sumW];
128 X = *src++ + L + T - C;
129 *sum++ = X;
130 L = X;
131 C = T;
132 T = sum[-sumW];
133 X = *src++ + L + T - C;
134 *sum++ = X;
135 L = X;
136 C = T;
137 }
138
139 for (; x >= 0; --x) {
reed@android.com45607672009-09-21 00:27:08 +0000140 uint32_t T = sum[-sumW];
reed@android.com8a1c16f2008-12-17 15:59:43 +0000141 X = *src++ + L + T - C;
reed@android.com45607672009-09-21 00:27:08 +0000142 *sum++ = X;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000143 L = X;
144 C = T;
145 }
146 src += srcRB;
147 }
148}
149
reed@google.com03016a32011-08-12 14:59:59 +0000150/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000151 * This is the path for apply_kernel() to be taken when the kernel
152 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000153 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000154static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],
155 int sw, int sh) {
156 SkASSERT(2*rx > sw);
157
reed@android.com8a1c16f2008-12-17 15:59:43 +0000158 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
159
reed@android.com45607672009-09-21 00:27:08 +0000160 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000161
162 int dw = sw + 2*rx;
163 int dh = sh + 2*ry;
164
reed@android.com45607672009-09-21 00:27:08 +0000165 int prev_y = -2*ry;
166 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000167
reed@android.com45607672009-09-21 00:27:08 +0000168 for (int y = 0; y < dh; y++) {
169 int py = SkClampPos(prev_y) * sumStride;
170 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000171
reed@android.com45607672009-09-21 00:27:08 +0000172 int prev_x = -2*rx;
173 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000174
reed@android.com45607672009-09-21 00:27:08 +0000175 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000176 int px = SkClampPos(prev_x);
177 int nx = SkFastMin32(next_x, sw);
178
reed@android.com45607672009-09-21 00:27:08 +0000179 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
180 *dst++ = SkToU8(tmp * scale >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000181
182 prev_x += 1;
183 next_x += 1;
184 }
tomhudson@google.com8caac642011-11-22 15:58:06 +0000185
186 prev_y += 1;
187 next_y += 1;
188 }
189}
190/**
191 * sw and sh are the width and height of the src. Since the sum buffer
192 * matches that, but has an extra row and col at the beginning (with zeros),
193 * we can just use sw and sh as our "max" values for pinning coordinates
194 * when sampling into sum[][]
195 *
196 * The inner loop is conceptually simple; we break it into several sections
197 * to improve performance. Here's the original version:
198 for (int x = 0; x < dw; x++) {
199 int px = SkClampPos(prev_x);
200 int nx = SkFastMin32(next_x, sw);
201
202 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
203 *dst++ = SkToU8(tmp * scale >> 24);
204
205 prev_x += 1;
206 next_x += 1;
207 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000208 * The sections are:
209 * left-hand section, where prev_x is clamped to 0
210 * center section, where neither prev_x nor next_x is clamped
211 * right-hand section, where next_x is clamped to sw
212 * On some operating systems, the center section is unrolled for additional
213 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000214*/
215static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],
216 int sw, int sh) {
217 if (2*rx > sw) {
218 kernel_clamped(dst, rx, ry, sum, sw, sh);
219 return;
220 }
221
222 uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));
223
224 int sumStride = sw + 1;
225
226 int dw = sw + 2*rx;
227 int dh = sh + 2*ry;
228
229 int prev_y = -2*ry;
230 int next_y = 1;
231
232 SkASSERT(2*rx <= dw - 2*rx);
233
234 for (int y = 0; y < dh; y++) {
235 int py = SkClampPos(prev_y) * sumStride;
236 int ny = SkFastMin32(next_y, sh) * sumStride;
237
238 int prev_x = -2*rx;
239 int next_x = 1;
240 int x = 0;
241
242 for (; x < 2*rx; x++) {
243 SkASSERT(prev_x <= 0);
244 SkASSERT(next_x <= sw);
245
246 int px = 0;
247 int nx = next_x;
248
249 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
250 *dst++ = SkToU8(tmp * scale >> 24);
251
252 prev_x += 1;
253 next_x += 1;
254 }
255
tomhudson@google.com01224d52011-11-28 18:22:01 +0000256 int i0 = prev_x + py;
257 int i1 = next_x + ny;
258 int i2 = next_x + py;
259 int i3 = prev_x + ny;
260
261#if UNROLL_KERNEL_LOOP
262 for (; x < dw - 2*rx - 4; x += 4) {
263 SkASSERT(prev_x >= 0);
264 SkASSERT(next_x <= sw);
265
266 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
267 *dst++ = SkToU8(tmp * scale >> 24);
268 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
269 *dst++ = SkToU8(tmp * scale >> 24);
270 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
271 *dst++ = SkToU8(tmp * scale >> 24);
272 tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
273 *dst++ = SkToU8(tmp * scale >> 24);
274
275 prev_x += 4;
276 next_x += 4;
277 }
278#endif
279
tomhudson@google.com8caac642011-11-22 15:58:06 +0000280 for (; x < dw - 2*rx; x++) {
281 SkASSERT(prev_x >= 0);
282 SkASSERT(next_x <= sw);
283
tomhudson@google.com01224d52011-11-28 18:22:01 +0000284 uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000285 *dst++ = SkToU8(tmp * scale >> 24);
286
287 prev_x += 1;
288 next_x += 1;
289 }
290
291 for (; x < dw; x++) {
292 SkASSERT(prev_x >= 0);
293 SkASSERT(next_x > sw);
294
295 int px = prev_x;
296 int nx = sw;
297
298 uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];
299 *dst++ = SkToU8(tmp * scale >> 24);
300
301 prev_x += 1;
302 next_x += 1;
303 }
304
reed@android.com8a1c16f2008-12-17 15:59:43 +0000305 prev_y += 1;
306 next_y += 1;
307 }
308}
309
reed@google.com03016a32011-08-12 14:59:59 +0000310/**
tomhudson@google.com8caac642011-11-22 15:58:06 +0000311 * This is the path for apply_kernel_interp() to be taken when the kernel
312 * is wider than the source image.
reed@android.com45607672009-09-21 00:27:08 +0000313 */
tomhudson@google.com8caac642011-11-22 15:58:06 +0000314static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
reed@android.com45607672009-09-21 00:27:08 +0000315 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
tomhudson@google.com8caac642011-11-22 15:58:06 +0000316 SkASSERT(2*rx > sw);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000317
318 int inner_weight = 255 - outer_weight;
319
320 // round these guys up if they're bigger than 127
321 outer_weight += outer_weight >> 7;
322 inner_weight += inner_weight >> 7;
323
324 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
325 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
326
reed@android.com45607672009-09-21 00:27:08 +0000327 int sumStride = sw + 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000328
329 int dw = sw + 2*rx;
330 int dh = sh + 2*ry;
331
reed@android.com45607672009-09-21 00:27:08 +0000332 int prev_y = -2*ry;
333 int next_y = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000334
reed@android.com45607672009-09-21 00:27:08 +0000335 for (int y = 0; y < dh; y++) {
336 int py = SkClampPos(prev_y) * sumStride;
337 int ny = SkFastMin32(next_y, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000338
reed@android.com45607672009-09-21 00:27:08 +0000339 int ipy = SkClampPos(prev_y + 1) * sumStride;
340 int iny = SkClampMax(next_y - 1, sh) * sumStride;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000341
reed@android.com45607672009-09-21 00:27:08 +0000342 int prev_x = -2*rx;
343 int next_x = 1;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000344
reed@android.com45607672009-09-21 00:27:08 +0000345 for (int x = 0; x < dw; x++) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000346 int px = SkClampPos(prev_x);
347 int nx = SkFastMin32(next_x, sw);
348
349 int ipx = SkClampPos(prev_x + 1);
350 int inx = SkClampMax(next_x - 1, sw);
351
tomhudson@google.com8caac642011-11-22 15:58:06 +0000352 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
353 - sum[nx+py] - sum[px+ny];
354 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
355 - sum[inx+ipy] - sum[ipx+iny];
356 *dst++ = SkToU8((outer_sum * outer_scale
357 + inner_sum * inner_scale) >> 24);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000358
359 prev_x += 1;
360 next_x += 1;
361 }
362 prev_y += 1;
363 next_y += 1;
364 }
365}
366
tomhudson@google.com8caac642011-11-22 15:58:06 +0000367/**
368 * sw and sh are the width and height of the src. Since the sum buffer
369 * matches that, but has an extra row and col at the beginning (with zeros),
370 * we can just use sw and sh as our "max" values for pinning coordinates
371 * when sampling into sum[][]
372 *
373 * The inner loop is conceptually simple; we break it into several variants
374 * to improve performance. Here's the original version:
375 for (int x = 0; x < dw; x++) {
376 int px = SkClampPos(prev_x);
377 int nx = SkFastMin32(next_x, sw);
378
379 int ipx = SkClampPos(prev_x + 1);
380 int inx = SkClampMax(next_x - 1, sw);
381
382 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
383 - sum[nx+py] - sum[px+ny];
384 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
385 - sum[inx+ipy] - sum[ipx+iny];
386 *dst++ = SkToU8((outer_sum * outer_scale
387 + inner_sum * inner_scale) >> 24);
388
389 prev_x += 1;
390 next_x += 1;
391 }
tomhudson@google.com01224d52011-11-28 18:22:01 +0000392 * The sections are:
393 * left-hand section, where prev_x is clamped to 0
394 * center section, where neither prev_x nor next_x is clamped
395 * right-hand section, where next_x is clamped to sw
396 * On some operating systems, the center section is unrolled for additional
397 * speedup.
tomhudson@google.com8caac642011-11-22 15:58:06 +0000398*/
399static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
400 const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {
401 SkASSERT(rx > 0 && ry > 0);
402 SkASSERT(outer_weight <= 255);
403
404 if (2*rx > sw) {
405 kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);
406 return;
407 }
408
409 int inner_weight = 255 - outer_weight;
410
411 // round these guys up if they're bigger than 127
412 outer_weight += outer_weight >> 7;
413 inner_weight += inner_weight >> 7;
414
415 uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));
416 uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));
417
418 int sumStride = sw + 1;
419
420 int dw = sw + 2*rx;
421 int dh = sh + 2*ry;
422
423 int prev_y = -2*ry;
424 int next_y = 1;
425
426 SkASSERT(2*rx <= dw - 2*rx);
427
428 for (int y = 0; y < dh; y++) {
429 int py = SkClampPos(prev_y) * sumStride;
430 int ny = SkFastMin32(next_y, sh) * sumStride;
431
432 int ipy = SkClampPos(prev_y + 1) * sumStride;
433 int iny = SkClampMax(next_y - 1, sh) * sumStride;
434
435 int prev_x = -2*rx;
436 int next_x = 1;
437 int x = 0;
438
439 for (; x < 2*rx; x++) {
440 SkASSERT(prev_x < 0);
441 SkASSERT(next_x <= sw);
442
443 int px = 0;
444 int nx = next_x;
445
446 int ipx = 0;
447 int inx = next_x - 1;
448
449 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
450 - sum[nx+py] - sum[px+ny];
451 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
452 - sum[inx+ipy] - sum[ipx+iny];
453 *dst++ = SkToU8((outer_sum * outer_scale
454 + inner_sum * inner_scale) >> 24);
455
456 prev_x += 1;
457 next_x += 1;
458 }
459
tomhudson@google.com01224d52011-11-28 18:22:01 +0000460 int i0 = prev_x + py;
461 int i1 = next_x + ny;
462 int i2 = next_x + py;
463 int i3 = prev_x + ny;
464 int i4 = prev_x + 1 + ipy;
465 int i5 = next_x - 1 + iny;
466 int i6 = next_x - 1 + ipy;
467 int i7 = prev_x + 1 + iny;
468
469#if UNROLL_KERNEL_LOOP
470 for (; x < dw - 2*rx - 4; x += 4) {
471 SkASSERT(prev_x >= 0);
472 SkASSERT(next_x <= sw);
473
474 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
475 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
476 *dst++ = SkToU8((outer_sum * outer_scale
477 + inner_sum * inner_scale) >> 24);
478 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
479 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
480 *dst++ = SkToU8((outer_sum * outer_scale
481 + inner_sum * inner_scale) >> 24);
482 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
483 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
484 *dst++ = SkToU8((outer_sum * outer_scale
485 + inner_sum * inner_scale) >> 24);
486 outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
487 inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
488 *dst++ = SkToU8((outer_sum * outer_scale
489 + inner_sum * inner_scale) >> 24);
490
491 prev_x += 4;
492 next_x += 4;
493 }
494#endif
495
tomhudson@google.com8caac642011-11-22 15:58:06 +0000496 for (; x < dw - 2*rx; x++) {
497 SkASSERT(prev_x >= 0);
498 SkASSERT(next_x <= sw);
499
tomhudson@google.com01224d52011-11-28 18:22:01 +0000500 uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
501 uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
tomhudson@google.com8caac642011-11-22 15:58:06 +0000502 *dst++ = SkToU8((outer_sum * outer_scale
503 + inner_sum * inner_scale) >> 24);
504
505 prev_x += 1;
506 next_x += 1;
507 }
508
509 for (; x < dw; x++) {
510 SkASSERT(prev_x >= 0);
511 SkASSERT(next_x > sw);
512
513 int px = prev_x;
514 int nx = sw;
515
516 int ipx = prev_x + 1;
517 int inx = sw;
518
519 uint32_t outer_sum = sum[px+py] + sum[nx+ny]
520 - sum[nx+py] - sum[px+ny];
521 uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]
522 - sum[inx+ipy] - sum[ipx+iny];
523 *dst++ = SkToU8((outer_sum * outer_scale
524 + inner_sum * inner_scale) >> 24);
525
526 prev_x += 1;
527 next_x += 1;
528 }
529
530 prev_y += 1;
531 next_y += 1;
532 }
533}
534
reed@android.com8a1c16f2008-12-17 15:59:43 +0000535#include "SkColorPriv.h"
536
reed@android.com0e3c6642009-09-18 13:41:56 +0000537static void merge_src_with_blur(uint8_t dst[], int dstRB,
538 const uint8_t src[], int srcRB,
539 const uint8_t blur[], int blurRB,
540 int sw, int sh) {
541 dstRB -= sw;
542 srcRB -= sw;
543 blurRB -= sw;
544 while (--sh >= 0) {
545 for (int x = sw - 1; x >= 0; --x) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000546 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
547 dst += 1;
548 src += 1;
549 blur += 1;
550 }
reed@android.com0e3c6642009-09-18 13:41:56 +0000551 dst += dstRB;
552 src += srcRB;
553 blur += blurRB;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000554 }
555}
556
557static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
reed@android.com0e3c6642009-09-18 13:41:56 +0000558 const uint8_t src[], int srcRowBytes,
559 int sw, int sh,
reed@android.com45607672009-09-21 00:27:08 +0000560 SkBlurMask::Style style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000561 int x;
reed@android.com0e3c6642009-09-18 13:41:56 +0000562 while (--sh >= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000563 switch (style) {
564 case SkBlurMask::kSolid_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000565 for (x = sw - 1; x >= 0; --x) {
566 int s = *src;
567 int d = *dst;
568 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
reed@android.com8a1c16f2008-12-17 15:59:43 +0000569 dst += 1;
570 src += 1;
571 }
572 break;
573 case SkBlurMask::kOuter_Style:
reed@android.com0e3c6642009-09-18 13:41:56 +0000574 for (x = sw - 1; x >= 0; --x) {
575 if (*src) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000576 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
reed@android.com0e3c6642009-09-18 13:41:56 +0000577 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000578 dst += 1;
579 src += 1;
580 }
581 break;
582 default:
tomhudson@google.com0c00f212011-12-28 14:59:50 +0000583 SkDEBUGFAIL("Unexpected blur style here");
reed@android.com8a1c16f2008-12-17 15:59:43 +0000584 break;
585 }
586 dst += dstRowBytes - sw;
reed@android.com0e3c6642009-09-18 13:41:56 +0000587 src += srcRowBytes - sw;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000588 }
589}
590
reed@google.com03016a32011-08-12 14:59:59 +0000591///////////////////////////////////////////////////////////////////////////////
reed@android.com8a1c16f2008-12-17 15:59:43 +0000592
593// we use a local funciton to wrap the class static method to work around
594// a bug in gcc98
595void SkMask_FreeImage(uint8_t* image);
reed@google.com03016a32011-08-12 14:59:59 +0000596void SkMask_FreeImage(uint8_t* image) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000597 SkMask::FreeImage(image);
598}
599
600bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
bungeman@google.com5af16f82011-09-02 15:06:44 +0000601 SkScalar radius, Style style, Quality quality,
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000602 SkIPoint* margin, bool separable)
bungeman@google.com5af16f82011-09-02 15:06:44 +0000603{
reed@google.com03016a32011-08-12 14:59:59 +0000604 if (src.fFormat != SkMask::kA8_Format) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000605 return false;
reed@google.com03016a32011-08-12 14:59:59 +0000606 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000607
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000608 // Force high quality off for small radii (performance)
609 if (radius < SkIntToScalar(3)) quality = kLow_Quality;
610
611 // highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur
612 int passCount = (quality == kHigh_Quality) ? 3 : 1;
613 SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));
614
615 int rx = SkScalarCeil(passRadius);
616 int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000617
618 SkASSERT(rx >= 0);
619 SkASSERT((unsigned)outer_weight <= 255);
reed@android.com0e3c6642009-09-18 13:41:56 +0000620 if (rx <= 0) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000621 return false;
reed@android.com0e3c6642009-09-18 13:41:56 +0000622 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000623
624 int ry = rx; // only do square blur for now
625
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000626 int padx = passCount * rx;
627 int pady = passCount * ry;
bungeman@google.com5af16f82011-09-02 15:06:44 +0000628 if (margin) {
629 margin->set(padx, pady);
630 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000631 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
632 src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
reed@android.com49f0ff22009-03-19 21:52:42 +0000633 dst->fRowBytes = dst->fBounds.width();
reed@android.com8a1c16f2008-12-17 15:59:43 +0000634 dst->fFormat = SkMask::kA8_Format;
635 dst->fImage = NULL;
636
reed@android.com0e3c6642009-09-18 13:41:56 +0000637 if (src.fImage) {
reed@android.com543ed932009-04-24 12:43:40 +0000638 size_t dstSize = dst->computeImageSize();
639 if (0 == dstSize) {
640 return false; // too big to allocate, abort
641 }
642
reed@android.com8a1c16f2008-12-17 15:59:43 +0000643 int sw = src.fBounds.width();
644 int sh = src.fBounds.height();
645 const uint8_t* sp = src.fImage;
reed@android.com543ed932009-04-24 12:43:40 +0000646 uint8_t* dp = SkMask::AllocImage(dstSize);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000647
648 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
649
650 // build the blurry destination
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000651 if (separable) {
652 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
653 uint8_t* tp = tmpBuffer.get();
654 int w = sw, h = sh;
655
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000656 if (quality == kHigh_Quality) {
senorblanco@chromium.org908276b2012-11-15 20:27:35 +0000657 // Do three X blurs, with a transpose on the final one.
658 w = boxBlur(sp, src.fRowBytes, tp, rx, w, h, false);
659 w = boxBlur(tp, w, dp, rx, w, h, false);
660 w = boxBlur(dp, w, tp, rx, w, h, true);
661 // Do three Y blurs, with a transpose on the final one.
662 h = boxBlur(tp, h, dp, ry, h, w, false);
663 h = boxBlur(dp, h, tp, ry, h, w, false);
664 h = boxBlur(tp, h, dp, ry, h, w, true);
665 } else {
666 w = boxBlur(sp, src.fRowBytes, tp, rx, w, h, true);
667 h = boxBlur(tp, h, dp, ry, h, w, true);
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000668 }
669 } else {
reed@google.com03016a32011-08-12 14:59:59 +0000670 const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;
671 const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;
672 SkAutoTMalloc<uint32_t> storage(storageW * storageH);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000673 uint32_t* sumBuffer = storage.get();
674
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000675 //pass1: sp is source, dp is destination
reed@android.com8a1c16f2008-12-17 15:59:43 +0000676 build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);
reed@google.com03016a32011-08-12 14:59:59 +0000677 if (outer_weight == 255) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000678 apply_kernel(dp, rx, ry, sumBuffer, sw, sh);
reed@google.com03016a32011-08-12 14:59:59 +0000679 } else {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000680 apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);
reed@google.com03016a32011-08-12 14:59:59 +0000681 }
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000682
reed@google.com03016a32011-08-12 14:59:59 +0000683 if (quality == kHigh_Quality) {
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000684 //pass2: dp is source, tmpBuffer is destination
685 int tmp_sw = sw + 2 * rx;
686 int tmp_sh = sh + 2 * ry;
687 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);
688 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);
689 if (outer_weight == 255)
690 apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);
691 else
reed@google.com03016a32011-08-12 14:59:59 +0000692 apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,
693 tmp_sw, tmp_sh, outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000694
695 //pass3: tmpBuffer is source, dp is destination
696 tmp_sw += 2 * rx;
697 tmp_sh += 2 * ry;
698 build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);
699 if (outer_weight == 255)
700 apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);
701 else
reed@google.com03016a32011-08-12 14:59:59 +0000702 apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,
703 outer_weight);
senorblanco@chromium.org4868e6b2011-02-18 19:03:01 +0000704 }
reed@android.com8a1c16f2008-12-17 15:59:43 +0000705 }
706
707 dst->fImage = dp;
708 // if need be, alloc the "real" dst (same size as src) and copy/merge
709 // the blur into it (applying the src)
reed@android.com0e3c6642009-09-18 13:41:56 +0000710 if (style == kInner_Style) {
711 // now we allocate the "real" dst, mirror the size of src
reed@android.com543ed932009-04-24 12:43:40 +0000712 size_t srcSize = src.computeImageSize();
713 if (0 == srcSize) {
714 return false; // too big to allocate, abort
715 }
716 dst->fImage = SkMask::AllocImage(srcSize);
reed@android.com0e3c6642009-09-18 13:41:56 +0000717 merge_src_with_blur(dst->fImage, src.fRowBytes,
718 sp, src.fRowBytes,
reed@google.com03016a32011-08-12 14:59:59 +0000719 dp + passCount * (rx + ry * dst->fRowBytes),
720 dst->fRowBytes, sw, sh);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000721 SkMask::FreeImage(dp);
reed@android.com0e3c6642009-09-18 13:41:56 +0000722 } else if (style != kNormal_Style) {
reed@google.com03016a32011-08-12 14:59:59 +0000723 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
724 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
reed@android.com8a1c16f2008-12-17 15:59:43 +0000725 }
726 (void)autoCall.detach();
727 }
728
reed@android.com0e3c6642009-09-18 13:41:56 +0000729 if (style == kInner_Style) {
reed@android.com8a1c16f2008-12-17 15:59:43 +0000730 dst->fBounds = src.fBounds; // restore trimmed bounds
reed@android.com0e3c6642009-09-18 13:41:56 +0000731 dst->fRowBytes = src.fRowBytes;
reed@android.com8a1c16f2008-12-17 15:59:43 +0000732 }
733
reed@android.com8a1c16f2008-12-17 15:59:43 +0000734 return true;
735}
736
senorblanco@chromium.org71f0f342012-11-13 20:35:21 +0000737bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,
738 SkScalar radius, Style style, Quality quality,
739 SkIPoint* margin)
740{
741 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);
742}
743
744bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
745 SkScalar radius, Style style, Quality quality,
746 SkIPoint* margin)
747{
748 return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);
749}