Blame - src/effects/SkBlurMask.cpp - platform/external/skia

2008-12-17 15:59:43 +0000

[diff] [blame]

9

10

#include "SkBlurMask.h"

tomhudson@google.com

889bd8b

2011-09-27 17:38:17 +0000

[diff] [blame]

11

#include "SkMath.h"

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

12

#include "SkTemplates.h"

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

13

#include "SkEndian.h"

14

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

15

/**

16

* This function performs a box blur in X, of the given radius. If the

skia.committer@gmail.com

884e60b

2012-11-16 02:01:17 +0000

[diff] [blame]

17

* "transpose" parameter is true, it will transpose the pixels on write,

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

18

* such that X and Y are swapped. Reads are always performed from contiguous

19

* memory in X, for speed. The destination buffer (dst) must be at least

20

* (width + radius * 2) * height bytes in size.

21

*/

22

static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

23

int leftRadius, int rightRadius, int width, int height,

24

bool transpose)

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

25

{

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

26

int kernelSize = leftRadius + rightRadius + 1;

27

int border = SkMin32(width, leftRadius + rightRadius);

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

28

uint32_t scale = (1 << 24) / kernelSize;

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

29

int new_width = width + SkMax32(leftRadius, rightRadius) * 2;

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

30

int dst_x_stride = transpose ? height : 1;

31

int dst_y_stride = transpose ? 1 : new_width;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

32

for (int y = 0; y < height; ++y) {

33

int sum = 0;

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

34

uint8_t* dptr = dst + y * dst_y_stride;

35

const uint8_t* right = src + y * src_y_stride;

36

const uint8_t* left = right;

senorblanco@chromium.org

336b4da

2012-11-20 17:09:40 +0000

[diff] [blame]

37

for (int x = 0; x < rightRadius - leftRadius; x++) {

38

*dptr = 0;

39

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

40

}

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

41

for (int x = 0; x < border; ++x) {

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

42

sum += *right++;

43

*dptr = (sum * scale) >> 24;

44

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

45

}

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

46

for (int x = width; x < leftRadius + rightRadius; ++x) {

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

47

*dptr = (sum * scale) >> 24;

48

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

49

}

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

50

for (int x = leftRadius + rightRadius; x < width; ++x) {

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

51

sum += *right++;

52

*dptr = (sum * scale) >> 24;

53

sum -= *left++;

54

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

55

}

56

for (int x = 0; x < border; ++x) {

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

57

*dptr = (sum * scale) >> 24;

58

sum -= *left++;

59

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

60

}

senorblanco@chromium.org

336b4da

2012-11-20 17:09:40 +0000

[diff] [blame]

61

for (int x = 0; x < leftRadius - rightRadius; x++) {

62

*dptr = 0;

63

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

64

}

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

65

SkASSERT(sum == 0);

66

}

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

67

return new_width;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

68

}

69

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

70

static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)

71

{

72

*loRadius = *hiRadius = SkScalarCeil(passRadius);

73

if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {

74

*loRadius = *hiRadius - 1;

}

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

78

// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,

79

// breakeven on Mac, and ~15% slowdown on Linux.

80

// Reading a word at a time when bulding the sum buffer seems to give

81

// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.

tomhudson@google.com

054ff1e

2012-01-11 19:29:08 +0000

[diff] [blame]

82

#if defined(SK_BUILD_FOR_WIN32)

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

83

#define UNROLL_KERNEL_LOOP 1

84

#endif

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

85

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

86

/** The sum buffer is an array of u32 to hold the accumulated sum of all of the

87

src values at their position, plus all values above and to the left.

88

When we sample into this buffer, we need an initial row and column of 0s,

89

so we have an index correspondence as follows:

rmistry@google.com

fbfcd56

2012-08-23 18:09:54 +0000

[diff] [blame]

90

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

91

src[i, j] == sum[i+1, j+1]

92

sum[0, j] == sum[i, 0] == 0

rmistry@google.com

fbfcd56

2012-08-23 18:09:54 +0000

[diff] [blame]

93

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

94

We assume that the sum buffer's stride == its width

95

*/

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

96

static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,

97

const uint8_t src[], int srcRB) {

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

98

int sumW = srcW + 1;

99

100

SkASSERT(srcRB >= srcW);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

101

// mod srcRB so we can apply it after each row

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

102

srcRB -= srcW;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

int x, y;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

106

// zero out the top row and column

107

memset(sum, 0, sumW * sizeof(sum[0]));

108

sum += sumW;

109

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

110

// special case first row

111

uint32_t X = 0;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

112

*sum++ = 0; // initialze the first column to 0

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

113

for (x = srcW - 1; x >= 0; --x) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

114

X = *src++ + X;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

115

*sum++ = X;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

src += srcRB;

// now do the rest of the rows

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

120

for (y = srcH - 1; y > 0; --y) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

121

uint32_t L = 0;

122

uint32_t C = 0;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

123

*sum++ = 0; // initialze the first column to 0

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

124

125

for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {

126

uint32_t T = sum[-sumW];

127

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

}

for (; x >= 4; x-=4) {

134

uint32_t T = sum[-sumW];

135

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

T = sum[-sumW];

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

T = sum[-sumW];

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

T = sum[-sumW];

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

}

for (; x >= 0; --x) {

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

157

uint32_t T = sum[-sumW];

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

158

X = *src++ + L + T - C;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

159

*sum++ = X;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

L = X;

C = T;

}

src += srcRB;

}

}

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

167

/**

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

168

* This is the path for apply_kernel() to be taken when the kernel

169

* is wider than the source image.

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

170

*/

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

171

static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],

int sw, int sh) {

SkASSERT(2*rx > sw);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

175

uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));

176

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

177

int sumStride = sw + 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

int dw = sw + 2*rx;

int dh = sh + 2*ry;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

182

int prev_y = -2*ry;

183

int next_y = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

184

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

185

for (int y = 0; y < dh; y++) {

186

int py = SkClampPos(prev_y) * sumStride;

187

int ny = SkFastMin32(next_y, sh) * sumStride;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

188

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

189

int prev_x = -2*rx;

190

int next_x = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

191

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

192

for (int x = 0; x < dw; x++) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

193

int px = SkClampPos(prev_x);

194

int nx = SkFastMin32(next_x, sw);

195

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

196

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

197

*dst++ = SkToU8(tmp * scale >> 24);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

prev_y += 1;

next_y += 1;

}

}

/**

* sw and sh are the width and height of the src. Since the sum buffer

209

* matches that, but has an extra row and col at the beginning (with zeros),

210

* we can just use sw and sh as our "max" values for pinning coordinates

211

* when sampling into sum[][]

212

*

213

* The inner loop is conceptually simple; we break it into several sections

214

* to improve performance. Here's the original version:

215

for (int x = 0; x < dw; x++) {

216

int px = SkClampPos(prev_x);

217

int nx = SkFastMin32(next_x, sw);

218

219

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

220

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

225

* The sections are:

226

* left-hand section, where prev_x is clamped to 0

227

* center section, where neither prev_x nor next_x is clamped

228

* right-hand section, where next_x is clamped to sw

229

* On some operating systems, the center section is unrolled for additional

230

* speedup.

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

231

*/

232

static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],

233

int sw, int sh) {

234

if (2*rx > sw) {

235

kernel_clamped(dst, rx, ry, sum, sw, sh);

return;

}

uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));

240

241

int sumStride = sw + 1;

int dw = sw + 2*rx;

int dh = sh + 2*ry;

int prev_y = -2*ry;

int next_y = 1;

SkASSERT(2*rx <= dw - 2*rx);

250

251

for (int y = 0; y < dh; y++) {

252

int py = SkClampPos(prev_y) * sumStride;

253

int ny = SkFastMin32(next_y, sh) * sumStride;

int prev_x = -2*rx;

int next_x = 1;

int x = 0;

for (; x < 2*rx; x++) {

260

SkASSERT(prev_x <= 0);

261

SkASSERT(next_x <= sw);

int px = 0;

int nx = next_x;

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

267

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

273

int i0 = prev_x + py;

274

int i1 = next_x + ny;

275

int i2 = next_x + py;

276

int i3 = prev_x + ny;

277

278

#if UNROLL_KERNEL_LOOP

279

for (; x < dw - 2*rx - 4; x += 4) {

280

SkASSERT(prev_x >= 0);

281

SkASSERT(next_x <= sw);

282

283

uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

284

*dst++ = SkToU8(tmp * scale >> 24);

285

tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

286

*dst++ = SkToU8(tmp * scale >> 24);

287

tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

288

*dst++ = SkToU8(tmp * scale >> 24);

289

tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

290

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 4;

next_x += 4;

}

#endif

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

297

for (; x < dw - 2*rx; x++) {

298

SkASSERT(prev_x >= 0);

299

SkASSERT(next_x <= sw);

300

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

301

uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

302

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

for (; x < dw; x++) {

309

SkASSERT(prev_x >= 0);

310

SkASSERT(next_x > sw);

int px = prev_x;

int nx = sw;

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

316

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

prev_y += 1;

next_y += 1;

}

}

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

327

/**

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

328

* This is the path for apply_kernel_interp() to be taken when the kernel

329

* is wider than the source image.

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

330

*/

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

331

static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

332

const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

333

SkASSERT(2*rx > sw);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

334

335

int inner_weight = 255 - outer_weight;

336

337

// round these guys up if they're bigger than 127

338

outer_weight += outer_weight >> 7;

339

inner_weight += inner_weight >> 7;

340

341

uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));

342

uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));

343

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

344

int sumStride = sw + 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

int dw = sw + 2*rx;

int dh = sh + 2*ry;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

349

int prev_y = -2*ry;

350

int next_y = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

351

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

352

for (int y = 0; y < dh; y++) {

353

int py = SkClampPos(prev_y) * sumStride;

354

int ny = SkFastMin32(next_y, sh) * sumStride;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

355

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

356

int ipy = SkClampPos(prev_y + 1) * sumStride;

357

int iny = SkClampMax(next_y - 1, sh) * sumStride;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

358

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

359

int prev_x = -2*rx;

360

int next_x = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

361

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

362

for (int x = 0; x < dw; x++) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

363

int px = SkClampPos(prev_x);

364

int nx = SkFastMin32(next_x, sw);

365

366

int ipx = SkClampPos(prev_x + 1);

367

int inx = SkClampMax(next_x - 1, sw);

368

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

369

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

370

- sum[nx+py] - sum[px+ny];

371

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

372

- sum[inx+ipy] - sum[ipx+iny];

373

*dst++ = SkToU8((outer_sum * outer_scale

374

+ inner_sum * inner_scale) >> 24);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

prev_x += 1;

next_x += 1;

}

prev_y += 1;

next_y += 1;

}

}

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

384

/**

385

* sw and sh are the width and height of the src. Since the sum buffer

386

* matches that, but has an extra row and col at the beginning (with zeros),

387

* we can just use sw and sh as our "max" values for pinning coordinates

388

* when sampling into sum[][]

389

*

390

* The inner loop is conceptually simple; we break it into several variants

391

* to improve performance. Here's the original version:

392

for (int x = 0; x < dw; x++) {

393

int px = SkClampPos(prev_x);

394

int nx = SkFastMin32(next_x, sw);

395

396

int ipx = SkClampPos(prev_x + 1);

397

int inx = SkClampMax(next_x - 1, sw);

398

399

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

400

- sum[nx+py] - sum[px+ny];

401

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

402

- sum[inx+ipy] - sum[ipx+iny];

403

*dst++ = SkToU8((outer_sum * outer_scale

404

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

409

* The sections are:

410

* left-hand section, where prev_x is clamped to 0

411

* center section, where neither prev_x nor next_x is clamped

412

* right-hand section, where next_x is clamped to sw

413

* On some operating systems, the center section is unrolled for additional

414

* speedup.

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

415

*/

416

static void apply_kernel_interp(uint8_t dst[], int rx, int ry,

417

const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {

418

SkASSERT(rx > 0 && ry > 0);

419

SkASSERT(outer_weight <= 255);

420

421

if (2*rx > sw) {

422

kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);

return;

}

int inner_weight = 255 - outer_weight;

427

428

// round these guys up if they're bigger than 127

429

outer_weight += outer_weight >> 7;

430

inner_weight += inner_weight >> 7;

431

432

uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));

433

uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));

434

435

int sumStride = sw + 1;

int dw = sw + 2*rx;

int dh = sh + 2*ry;

int prev_y = -2*ry;

int next_y = 1;

SkASSERT(2*rx <= dw - 2*rx);

444

445

for (int y = 0; y < dh; y++) {

446

int py = SkClampPos(prev_y) * sumStride;

447

int ny = SkFastMin32(next_y, sh) * sumStride;

448

449

int ipy = SkClampPos(prev_y + 1) * sumStride;

450

int iny = SkClampMax(next_y - 1, sh) * sumStride;

int prev_x = -2*rx;

int next_x = 1;

int x = 0;

for (; x < 2*rx; x++) {

457

SkASSERT(prev_x < 0);

458

SkASSERT(next_x <= sw);

int px = 0;

int nx = next_x;

int ipx = 0;

int inx = next_x - 1;

465

466

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

467

- sum[nx+py] - sum[px+ny];

468

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

469

- sum[inx+ipy] - sum[ipx+iny];

470

*dst++ = SkToU8((outer_sum * outer_scale

471

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

477

int i0 = prev_x + py;

478

int i1 = next_x + ny;

479

int i2 = next_x + py;

480

int i3 = prev_x + ny;

481

int i4 = prev_x + 1 + ipy;

482

int i5 = next_x - 1 + iny;

483

int i6 = next_x - 1 + ipy;

484

int i7 = prev_x + 1 + iny;

485

486

#if UNROLL_KERNEL_LOOP

487

for (; x < dw - 2*rx - 4; x += 4) {

488

SkASSERT(prev_x >= 0);

489

SkASSERT(next_x <= sw);

490

491

uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

492

uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

493

*dst++ = SkToU8((outer_sum * outer_scale

494

+ inner_sum * inner_scale) >> 24);

495

outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

496

inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

497

*dst++ = SkToU8((outer_sum * outer_scale

498

+ inner_sum * inner_scale) >> 24);

499

outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

500

inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

501

*dst++ = SkToU8((outer_sum * outer_scale

502

+ inner_sum * inner_scale) >> 24);

503

outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

504

inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

505

*dst++ = SkToU8((outer_sum * outer_scale

506

+ inner_sum * inner_scale) >> 24);

prev_x += 4;

next_x += 4;

}

#endif

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

513

for (; x < dw - 2*rx; x++) {

514

SkASSERT(prev_x >= 0);

515

SkASSERT(next_x <= sw);

516

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

517

uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

518

uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

519

*dst++ = SkToU8((outer_sum * outer_scale

520

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

for (; x < dw; x++) {

527

SkASSERT(prev_x >= 0);

528

SkASSERT(next_x > sw);

int px = prev_x;

int nx = sw;

int ipx = prev_x + 1;

534

int inx = sw;

535

536

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

537

- sum[nx+py] - sum[px+ny];

538

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

539

- sum[inx+ipy] - sum[ipx+iny];

540

*dst++ = SkToU8((outer_sum * outer_scale

541

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

prev_y += 1;

next_y += 1;

}

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

552

#include "SkColorPriv.h"

553

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

554

static void merge_src_with_blur(uint8_t dst[], int dstRB,

555

const uint8_t src[], int srcRB,

556

const uint8_t blur[], int blurRB,

int sw, int sh) {

dstRB -= sw;

srcRB -= sw;

blurRB -= sw;

while (--sh >= 0) {

for (int x = sw - 1; x >= 0; --x) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

563

*dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));

dst += 1;

src += 1;

blur += 1;

}

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

568

dst += dstRB;

569

src += srcRB;

570

blur += blurRB;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

}

static void clamp_with_orig(uint8_t dst[], int dstRowBytes,

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

575

const uint8_t src[], int srcRowBytes,

576

int sw, int sh,

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

577

SkBlurMask::Style style) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

578

int x;

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

579

while (--sh >= 0) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

580

switch (style) {

581

case SkBlurMask::kSolid_Style:

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

582

for (x = sw - 1; x >= 0; --x) {

583

int s = *src;

584

int d = *dst;

585

*dst = SkToU8(s + d - SkMulDiv255Round(s, d));

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

dst += 1;

src += 1;

}

break;

case SkBlurMask::kOuter_Style:

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

591

for (x = sw - 1; x >= 0; --x) {

592

if (*src) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

593

*dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

594

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

dst += 1;

src += 1;

}

break;

default:

tomhudson@google.com

0c00f21

2011-12-28 14:59:50 +0000

[diff] [blame]

600

SkDEBUGFAIL("Unexpected blur style here");

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

601

break;

602

}

603

dst += dstRowBytes - sw;

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

604

src += srcRowBytes - sw;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

}

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

608

///////////////////////////////////////////////////////////////////////////////

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

609

610

// we use a local funciton to wrap the class static method to work around

611

// a bug in gcc98

612

void SkMask_FreeImage(uint8_t* image);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

613

void SkMask_FreeImage(uint8_t* image) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

614

SkMask::FreeImage(image);

615

}

616

617

bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,

bungeman@google.com

5af16f8

2011-09-02 15:06:44 +0000

[diff] [blame]

618

SkScalar radius, Style style, Quality quality,

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

619

SkIPoint* margin, bool separable)

bungeman@google.com

5af16f8

2011-09-02 15:06:44 +0000

[diff] [blame]

620

{

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

621

if (src.fFormat != SkMask::kA8_Format) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

622

return false;

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

623

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

624

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

625

// Force high quality off for small radii (performance)

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

626

if (radius < SkIntToScalar(3) && !separable) quality = kLow_Quality;

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

627

628

// highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur

629

int passCount = (quality == kHigh_Quality) ? 3 : 1;

630

SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));

631

632

int rx = SkScalarCeil(passRadius);

633

int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

634

635

SkASSERT(rx >= 0);

636

SkASSERT((unsigned)outer_weight <= 255);

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

637

if (rx <= 0) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

638

return false;

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

639

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

640

641

int ry = rx; // only do square blur for now

642

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

643

int padx = passCount * rx;

644

int pady = passCount * ry;

bungeman@google.com

5af16f8

2011-09-02 15:06:44 +0000

[diff] [blame]

645

if (margin) {

646

margin->set(padx, pady);

647

}

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

648

dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,

649

src.fBounds.fRight + padx, src.fBounds.fBottom + pady);

reed@android.com

49f0ff2

2009-03-19 21:52:42 +0000

[diff] [blame]

650

dst->fRowBytes = dst->fBounds.width();

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

651

dst->fFormat = SkMask::kA8_Format;

652

dst->fImage = NULL;

653

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

654

if (src.fImage) {

reed@android.com

543ed93

2009-04-24 12:43:40 +0000

[diff] [blame]

655

size_t dstSize = dst->computeImageSize();

656

if (0 == dstSize) {

657

return false; // too big to allocate, abort

658

}

659

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

660

int sw = src.fBounds.width();

661

int sh = src.fBounds.height();

662

const uint8_t* sp = src.fImage;

reed@android.com

543ed93

2009-04-24 12:43:40 +0000

[diff] [blame]

663

uint8_t* dp = SkMask::AllocImage(dstSize);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

664

665

SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);

666

667

// build the blurry destination

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

668

if (separable) {

669

SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);

670

uint8_t* tp = tmpBuffer.get();

671

int w = sw, h = sh;

672

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

673

if (quality == kHigh_Quality) {

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

674

int loRadius, hiRadius;

675

get_adjusted_radii(passRadius, &loRadius, &hiRadius);

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

676

// Do three X blurs, with a transpose on the final one.

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

677

w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);

678

w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);

679

w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

680

// Do three Y blurs, with a transpose on the final one.

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

681

h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);

682

h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);

683

h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

684

} else {

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

685

w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);

686

h = boxBlur(tp, h, dp, ry, ry, h, w, true);

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

687

}

688

} else {

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

689

const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;

690

const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;

691

SkAutoTMalloc<uint32_t> storage(storageW * storageH);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

692

uint32_t* sumBuffer = storage.get();

693

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

694

//pass1: sp is source, dp is destination

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

695

build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

696

if (outer_weight == 255) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

697

apply_kernel(dp, rx, ry, sumBuffer, sw, sh);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

698

} else {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

699

apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

700

}

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

701

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

702

if (quality == kHigh_Quality) {

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

703

//pass2: dp is source, tmpBuffer is destination

704

int tmp_sw = sw + 2 * rx;

705

int tmp_sh = sh + 2 * ry;

706

SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);

707

build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);

708

if (outer_weight == 255)

709

apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);

710

else

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

711

apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,

712

tmp_sw, tmp_sh, outer_weight);

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

713

714

//pass3: tmpBuffer is source, dp is destination

715

tmp_sw += 2 * rx;

716

tmp_sh += 2 * ry;

717

build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);

718

if (outer_weight == 255)

719

apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);

720

else

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

721

apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,

722

outer_weight);

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

723

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

dst->fImage = dp;

// if need be, alloc the "real" dst (same size as src) and copy/merge

728

// the blur into it (applying the src)

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

729

if (style == kInner_Style) {

730

// now we allocate the "real" dst, mirror the size of src

reed@android.com

543ed93

2009-04-24 12:43:40 +0000

[diff] [blame]

731

size_t srcSize = src.computeImageSize();

732

if (0 == srcSize) {

733

return false; // too big to allocate, abort

734

}

735

dst->fImage = SkMask::AllocImage(srcSize);

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

736

merge_src_with_blur(dst->fImage, src.fRowBytes,

737

sp, src.fRowBytes,

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

738

dp + passCount * (rx + ry * dst->fRowBytes),

739

dst->fRowBytes, sw, sh);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

740

SkMask::FreeImage(dp);

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

741

} else if (style != kNormal_Style) {

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

742

clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),

743

dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

744

}

745

(void)autoCall.detach();

746

}

747

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

748

if (style == kInner_Style) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

749

dst->fBounds = src.fBounds; // restore trimmed bounds

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

750

dst->fRowBytes = src.fRowBytes;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

751

}

752

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

753

return true;

754

}

755

senorblanco@chromium.org