Blame - src/effects/SkBlurMask.cpp - platform/external/skia

2008-12-17 15:59:43 +0000

[diff] [blame]

9

10

#include "SkBlurMask.h"

tomhudson@google.com

889bd8b

2011-09-27 17:38:17 +0000

[diff] [blame]

11

#include "SkMath.h"

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

12

#include "SkTemplates.h"

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

13

#include "SkEndian.h"

14

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

15

/**

16

* This function performs a box blur in X, of the given radius. If the

skia.committer@gmail.com

884e60b

2012-11-16 02:01:17 +0000

[diff] [blame]

17

* "transpose" parameter is true, it will transpose the pixels on write,

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

18

* such that X and Y are swapped. Reads are always performed from contiguous

19

* memory in X, for speed. The destination buffer (dst) must be at least

20

* (width + radius * 2) * height bytes in size.

21

*/

22

static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

23

int leftRadius, int rightRadius, int width, int height,

24

bool transpose)

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

25

{

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

26

int kernelSize = leftRadius + rightRadius + 1;

27

int border = SkMin32(width, leftRadius + rightRadius);

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

28

uint32_t scale = (1 << 24) / kernelSize;

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

29

int new_width = width + SkMax32(leftRadius, rightRadius) * 2;

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

30

int dst_x_stride = transpose ? height : 1;

31

int dst_y_stride = transpose ? 1 : new_width;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

32

for (int y = 0; y < height; ++y) {

33

int sum = 0;

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

34

uint8_t* dptr = dst + y * dst_y_stride;

35

const uint8_t* right = src + y * src_y_stride;

36

const uint8_t* left = right;

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

37

for (int x = 0; x < leftRadius - rightRadius; x++) {

38

*dptr++ = 0;

39

}

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

40

for (int x = 0; x < border; ++x) {

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

41

sum += *right++;

42

*dptr = (sum * scale) >> 24;

43

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

44

}

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

45

for (int x = width; x < leftRadius + rightRadius; ++x) {

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

46

*dptr = (sum * scale) >> 24;

47

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

48

}

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

49

for (int x = leftRadius + rightRadius; x < width; ++x) {

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

50

sum += *right++;

51

*dptr = (sum * scale) >> 24;

52

sum -= *left++;

53

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

54

}

55

for (int x = 0; x < border; ++x) {

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

56

*dptr = (sum * scale) >> 24;

57

sum -= *left++;

58

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

59

}

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

60

for (int x = 0; x < rightRadius - leftRadius; x++) {

61

*dptr++ = 0;

62

}

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

63

SkASSERT(sum == 0);

64

}

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

65

return new_width;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

66

}

67

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

68

static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)

69

{

70

*loRadius = *hiRadius = SkScalarCeil(passRadius);

71

if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {

72

*loRadius = *hiRadius - 1;

}

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

76

// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,

77

// breakeven on Mac, and ~15% slowdown on Linux.

78

// Reading a word at a time when bulding the sum buffer seems to give

79

// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.

tomhudson@google.com

054ff1e

2012-01-11 19:29:08 +0000

[diff] [blame]

80

#if defined(SK_BUILD_FOR_WIN32)

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

81

#define UNROLL_KERNEL_LOOP 1

82

#endif

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

83

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

84

/** The sum buffer is an array of u32 to hold the accumulated sum of all of the

85

src values at their position, plus all values above and to the left.

86

When we sample into this buffer, we need an initial row and column of 0s,

87

so we have an index correspondence as follows:

rmistry@google.com

fbfcd56

2012-08-23 18:09:54 +0000

[diff] [blame]

88

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

89

src[i, j] == sum[i+1, j+1]

90

sum[0, j] == sum[i, 0] == 0

rmistry@google.com

fbfcd56

2012-08-23 18:09:54 +0000

[diff] [blame]

91

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

92

We assume that the sum buffer's stride == its width

93

*/

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

94

static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,

95

const uint8_t src[], int srcRB) {

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

96

int sumW = srcW + 1;

97

98

SkASSERT(srcRB >= srcW);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

99

// mod srcRB so we can apply it after each row

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

100

srcRB -= srcW;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

int x, y;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

104

// zero out the top row and column

105

memset(sum, 0, sumW * sizeof(sum[0]));

106

sum += sumW;

107

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

108

// special case first row

109

uint32_t X = 0;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

110

*sum++ = 0; // initialze the first column to 0

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

111

for (x = srcW - 1; x >= 0; --x) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

112

X = *src++ + X;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

113

*sum++ = X;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

src += srcRB;

// now do the rest of the rows

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

118

for (y = srcH - 1; y > 0; --y) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

119

uint32_t L = 0;

120

uint32_t C = 0;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

121

*sum++ = 0; // initialze the first column to 0

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

122

123

for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {

124

uint32_t T = sum[-sumW];

125

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

}

for (; x >= 4; x-=4) {

132

uint32_t T = sum[-sumW];

133

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

T = sum[-sumW];

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

T = sum[-sumW];

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

T = sum[-sumW];

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

}

for (; x >= 0; --x) {

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

155

uint32_t T = sum[-sumW];

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

156

X = *src++ + L + T - C;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

157

*sum++ = X;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

L = X;

C = T;

}

src += srcRB;

}

}

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

165

/**

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

166

* This is the path for apply_kernel() to be taken when the kernel

167

* is wider than the source image.

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

168

*/

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

169

static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],

int sw, int sh) {

SkASSERT(2*rx > sw);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

173

uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));

174

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

175

int sumStride = sw + 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

int dw = sw + 2*rx;

int dh = sh + 2*ry;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

180

int prev_y = -2*ry;

181

int next_y = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

182

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

183

for (int y = 0; y < dh; y++) {

184

int py = SkClampPos(prev_y) * sumStride;

185

int ny = SkFastMin32(next_y, sh) * sumStride;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

186

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

187

int prev_x = -2*rx;

188

int next_x = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

189

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

190

for (int x = 0; x < dw; x++) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

191

int px = SkClampPos(prev_x);

192

int nx = SkFastMin32(next_x, sw);

193

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

194

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

195

*dst++ = SkToU8(tmp * scale >> 24);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

prev_y += 1;

next_y += 1;

}

}

/**

* sw and sh are the width and height of the src. Since the sum buffer

207

* matches that, but has an extra row and col at the beginning (with zeros),

208

* we can just use sw and sh as our "max" values for pinning coordinates

209

* when sampling into sum[][]

210

*

211

* The inner loop is conceptually simple; we break it into several sections

212

* to improve performance. Here's the original version:

213

for (int x = 0; x < dw; x++) {

214

int px = SkClampPos(prev_x);

215

int nx = SkFastMin32(next_x, sw);

216

217

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

218

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

223

* The sections are:

224

* left-hand section, where prev_x is clamped to 0

225

* center section, where neither prev_x nor next_x is clamped

226

* right-hand section, where next_x is clamped to sw

227

* On some operating systems, the center section is unrolled for additional

228

* speedup.

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

229

*/

230

static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],

231

int sw, int sh) {

232

if (2*rx > sw) {

233

kernel_clamped(dst, rx, ry, sum, sw, sh);

return;

}

uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));

238

239

int sumStride = sw + 1;

int dw = sw + 2*rx;

int dh = sh + 2*ry;

int prev_y = -2*ry;

int next_y = 1;

SkASSERT(2*rx <= dw - 2*rx);

248

249

for (int y = 0; y < dh; y++) {

250

int py = SkClampPos(prev_y) * sumStride;

251

int ny = SkFastMin32(next_y, sh) * sumStride;

int prev_x = -2*rx;

int next_x = 1;

int x = 0;

for (; x < 2*rx; x++) {

258

SkASSERT(prev_x <= 0);

259

SkASSERT(next_x <= sw);

int px = 0;

int nx = next_x;

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

265

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

271

int i0 = prev_x + py;

272

int i1 = next_x + ny;

273

int i2 = next_x + py;

274

int i3 = prev_x + ny;

275

276

#if UNROLL_KERNEL_LOOP

277

for (; x < dw - 2*rx - 4; x += 4) {

278

SkASSERT(prev_x >= 0);

279

SkASSERT(next_x <= sw);

280

281

uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

282

*dst++ = SkToU8(tmp * scale >> 24);

283

tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

284

*dst++ = SkToU8(tmp * scale >> 24);

285

tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

286

*dst++ = SkToU8(tmp * scale >> 24);

287

tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

288

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 4;

next_x += 4;

}

#endif

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

295

for (; x < dw - 2*rx; x++) {

296

SkASSERT(prev_x >= 0);

297

SkASSERT(next_x <= sw);

298

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

299

uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

300

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

for (; x < dw; x++) {

307

SkASSERT(prev_x >= 0);

308

SkASSERT(next_x > sw);

int px = prev_x;

int nx = sw;

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

314

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

prev_y += 1;

next_y += 1;

}

}

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

325

/**

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

326

* This is the path for apply_kernel_interp() to be taken when the kernel

327

* is wider than the source image.

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

328

*/

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

329

static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

330

const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

331

SkASSERT(2*rx > sw);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

332

333

int inner_weight = 255 - outer_weight;

334

335

// round these guys up if they're bigger than 127

336

outer_weight += outer_weight >> 7;

337

inner_weight += inner_weight >> 7;

338

339

uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));

340

uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));

341

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

342

int sumStride = sw + 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

int dw = sw + 2*rx;

int dh = sh + 2*ry;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

347

int prev_y = -2*ry;

348

int next_y = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

349

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

350

for (int y = 0; y < dh; y++) {

351

int py = SkClampPos(prev_y) * sumStride;

352

int ny = SkFastMin32(next_y, sh) * sumStride;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

353

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

354

int ipy = SkClampPos(prev_y + 1) * sumStride;

355

int iny = SkClampMax(next_y - 1, sh) * sumStride;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

356

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

357

int prev_x = -2*rx;

358

int next_x = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

359

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

360

for (int x = 0; x < dw; x++) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

361

int px = SkClampPos(prev_x);

362

int nx = SkFastMin32(next_x, sw);

363

364

int ipx = SkClampPos(prev_x + 1);

365

int inx = SkClampMax(next_x - 1, sw);

366

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

367

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

368

- sum[nx+py] - sum[px+ny];

369

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

370

- sum[inx+ipy] - sum[ipx+iny];

371

*dst++ = SkToU8((outer_sum * outer_scale

372

+ inner_sum * inner_scale) >> 24);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

prev_x += 1;

next_x += 1;

}

prev_y += 1;

next_y += 1;

}

}

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

382

/**

383

* sw and sh are the width and height of the src. Since the sum buffer

384

* matches that, but has an extra row and col at the beginning (with zeros),

385

* we can just use sw and sh as our "max" values for pinning coordinates

386

* when sampling into sum[][]

387

*

388

* The inner loop is conceptually simple; we break it into several variants

389

* to improve performance. Here's the original version:

390

for (int x = 0; x < dw; x++) {

391

int px = SkClampPos(prev_x);

392

int nx = SkFastMin32(next_x, sw);

393

394

int ipx = SkClampPos(prev_x + 1);

395

int inx = SkClampMax(next_x - 1, sw);

396

397

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

398

- sum[nx+py] - sum[px+ny];

399

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

400

- sum[inx+ipy] - sum[ipx+iny];

401

*dst++ = SkToU8((outer_sum * outer_scale

402

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

407

* The sections are:

408

* left-hand section, where prev_x is clamped to 0

409

* center section, where neither prev_x nor next_x is clamped

410

* right-hand section, where next_x is clamped to sw

411

* On some operating systems, the center section is unrolled for additional

412

* speedup.

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

413

*/

414

static void apply_kernel_interp(uint8_t dst[], int rx, int ry,

415

const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {

416

SkASSERT(rx > 0 && ry > 0);

417

SkASSERT(outer_weight <= 255);

418

419

if (2*rx > sw) {

420

kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);

return;

}

int inner_weight = 255 - outer_weight;

425

426

// round these guys up if they're bigger than 127

427

outer_weight += outer_weight >> 7;

428

inner_weight += inner_weight >> 7;

429

430

uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));

431

uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));

432

433

int sumStride = sw + 1;

int dw = sw + 2*rx;

int dh = sh + 2*ry;

int prev_y = -2*ry;

int next_y = 1;

SkASSERT(2*rx <= dw - 2*rx);

442

443

for (int y = 0; y < dh; y++) {

444

int py = SkClampPos(prev_y) * sumStride;

445

int ny = SkFastMin32(next_y, sh) * sumStride;

446

447

int ipy = SkClampPos(prev_y + 1) * sumStride;

448

int iny = SkClampMax(next_y - 1, sh) * sumStride;

int prev_x = -2*rx;

int next_x = 1;

int x = 0;

for (; x < 2*rx; x++) {

455

SkASSERT(prev_x < 0);

456

SkASSERT(next_x <= sw);

int px = 0;

int nx = next_x;

int ipx = 0;

int inx = next_x - 1;

463

464

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

465

- sum[nx+py] - sum[px+ny];

466

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

467

- sum[inx+ipy] - sum[ipx+iny];

468

*dst++ = SkToU8((outer_sum * outer_scale

469

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

475

int i0 = prev_x + py;

476

int i1 = next_x + ny;

477

int i2 = next_x + py;

478

int i3 = prev_x + ny;

479

int i4 = prev_x + 1 + ipy;

480

int i5 = next_x - 1 + iny;

481

int i6 = next_x - 1 + ipy;

482

int i7 = prev_x + 1 + iny;

483

484

#if UNROLL_KERNEL_LOOP

485

for (; x < dw - 2*rx - 4; x += 4) {

486

SkASSERT(prev_x >= 0);

487

SkASSERT(next_x <= sw);

488

489

uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

490

uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

491

*dst++ = SkToU8((outer_sum * outer_scale

492

+ inner_sum * inner_scale) >> 24);

493

outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

494

inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

495

*dst++ = SkToU8((outer_sum * outer_scale

496

+ inner_sum * inner_scale) >> 24);

497

outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

498

inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

499

*dst++ = SkToU8((outer_sum * outer_scale

500

+ inner_sum * inner_scale) >> 24);

501

outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

502

inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

503

*dst++ = SkToU8((outer_sum * outer_scale

504

+ inner_sum * inner_scale) >> 24);

prev_x += 4;

next_x += 4;

}

#endif

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

511

for (; x < dw - 2*rx; x++) {

512

SkASSERT(prev_x >= 0);

513

SkASSERT(next_x <= sw);

514

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

515

uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

516

uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

517

*dst++ = SkToU8((outer_sum * outer_scale

518

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

for (; x < dw; x++) {

525

SkASSERT(prev_x >= 0);

526

SkASSERT(next_x > sw);

int px = prev_x;

int nx = sw;

int ipx = prev_x + 1;

532

int inx = sw;

533

534

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

535

- sum[nx+py] - sum[px+ny];

536

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

537

- sum[inx+ipy] - sum[ipx+iny];

538

*dst++ = SkToU8((outer_sum * outer_scale

539

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

prev_y += 1;

next_y += 1;

}

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

550

#include "SkColorPriv.h"

551

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

552

static void merge_src_with_blur(uint8_t dst[], int dstRB,

553

const uint8_t src[], int srcRB,

554

const uint8_t blur[], int blurRB,

int sw, int sh) {

dstRB -= sw;

srcRB -= sw;

blurRB -= sw;

while (--sh >= 0) {

for (int x = sw - 1; x >= 0; --x) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

561

*dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));

dst += 1;

src += 1;

blur += 1;

}

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

566

dst += dstRB;

567

src += srcRB;

568

blur += blurRB;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

}

static void clamp_with_orig(uint8_t dst[], int dstRowBytes,

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

573

const uint8_t src[], int srcRowBytes,

574

int sw, int sh,

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

575

SkBlurMask::Style style) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

576

int x;

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

577

while (--sh >= 0) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

578

switch (style) {

579

case SkBlurMask::kSolid_Style:

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

580

for (x = sw - 1; x >= 0; --x) {

581

int s = *src;

582

int d = *dst;

583

*dst = SkToU8(s + d - SkMulDiv255Round(s, d));

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

dst += 1;

src += 1;

}

break;

case SkBlurMask::kOuter_Style:

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

589

for (x = sw - 1; x >= 0; --x) {

590

if (*src) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

591

*dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

592

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

dst += 1;

src += 1;

}

break;

default:

tomhudson@google.com

0c00f21

2011-12-28 14:59:50 +0000

[diff] [blame]

598

SkDEBUGFAIL("Unexpected blur style here");

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

599

break;

600

}

601

dst += dstRowBytes - sw;

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

602

src += srcRowBytes - sw;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

}

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

606

///////////////////////////////////////////////////////////////////////////////

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

607

608

// we use a local funciton to wrap the class static method to work around

609

// a bug in gcc98

610

void SkMask_FreeImage(uint8_t* image);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

611

void SkMask_FreeImage(uint8_t* image) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

612

SkMask::FreeImage(image);

613

}

614

615

bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,

bungeman@google.com

5af16f8

2011-09-02 15:06:44 +0000

[diff] [blame]

616

SkScalar radius, Style style, Quality quality,

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

617

SkIPoint* margin, bool separable)

bungeman@google.com

5af16f8

2011-09-02 15:06:44 +0000

[diff] [blame]

618

{

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

619

if (src.fFormat != SkMask::kA8_Format) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

620

return false;

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

621

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

622

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

623

// Force high quality off for small radii (performance)

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

624

if (radius < SkIntToScalar(3) && !separable) quality = kLow_Quality;

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

625

626

// highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur

627

int passCount = (quality == kHigh_Quality) ? 3 : 1;

628

SkScalar passRadius = SkScalarDiv(radius, SkScalarSqrt(SkIntToScalar(passCount)));

629

630

int rx = SkScalarCeil(passRadius);

631

int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

632

633

SkASSERT(rx >= 0);

634

SkASSERT((unsigned)outer_weight <= 255);

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

635

if (rx <= 0) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

636

return false;

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

637

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

638

639

int ry = rx; // only do square blur for now

640

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

641

int padx = passCount * rx;

642

int pady = passCount * ry;

bungeman@google.com

5af16f8

2011-09-02 15:06:44 +0000

[diff] [blame]

643

if (margin) {

644

margin->set(padx, pady);

645

}

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

646

dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,

647

src.fBounds.fRight + padx, src.fBounds.fBottom + pady);

reed@android.com

49f0ff2

2009-03-19 21:52:42 +0000

[diff] [blame]

648

dst->fRowBytes = dst->fBounds.width();

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

649

dst->fFormat = SkMask::kA8_Format;

650

dst->fImage = NULL;

651

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

652

if (src.fImage) {

reed@android.com

543ed93

2009-04-24 12:43:40 +0000

[diff] [blame]

653

size_t dstSize = dst->computeImageSize();

654

if (0 == dstSize) {

655

return false; // too big to allocate, abort

656

}

657

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

658

int sw = src.fBounds.width();

659

int sh = src.fBounds.height();

660

const uint8_t* sp = src.fImage;

reed@android.com

543ed93

2009-04-24 12:43:40 +0000

[diff] [blame]

661

uint8_t* dp = SkMask::AllocImage(dstSize);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

662

663

SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);

664

665

// build the blurry destination

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

666

if (separable) {

667

SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);

668

uint8_t* tp = tmpBuffer.get();

669

int w = sw, h = sh;

670

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

671

if (quality == kHigh_Quality) {

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

672

int loRadius, hiRadius;

673

get_adjusted_radii(passRadius, &loRadius, &hiRadius);

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

674

// Do three X blurs, with a transpose on the final one.

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

675

w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);

676

w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);

677

w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

678

// Do three Y blurs, with a transpose on the final one.

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

679

h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);

680

h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);

681

h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

682

} else {

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

683

w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);

684

h = boxBlur(tp, h, dp, ry, ry, h, w, true);

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

685

}

686

} else {

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

687

const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;

688

const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;

689

SkAutoTMalloc<uint32_t> storage(storageW * storageH);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

690

uint32_t* sumBuffer = storage.get();

691

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

692

//pass1: sp is source, dp is destination

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

693

build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

694

if (outer_weight == 255) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

695

apply_kernel(dp, rx, ry, sumBuffer, sw, sh);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

696

} else {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

697

apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

698

}

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

699

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

700

if (quality == kHigh_Quality) {

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

701

//pass2: dp is source, tmpBuffer is destination

702

int tmp_sw = sw + 2 * rx;

703

int tmp_sh = sh + 2 * ry;

704

SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);

705

build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);

706

if (outer_weight == 255)

707

apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);

708

else

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

709

apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,

710

tmp_sw, tmp_sh, outer_weight);

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

711

712

//pass3: tmpBuffer is source, dp is destination

713

tmp_sw += 2 * rx;

714

tmp_sh += 2 * ry;

715

build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);

716

if (outer_weight == 255)

717

apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);

718

else

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

719

apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,

720

outer_weight);

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

721

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

dst->fImage = dp;

// if need be, alloc the "real" dst (same size as src) and copy/merge

726

// the blur into it (applying the src)

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

727

if (style == kInner_Style) {

728

// now we allocate the "real" dst, mirror the size of src

reed@android.com

543ed93

2009-04-24 12:43:40 +0000

[diff] [blame]

729

size_t srcSize = src.computeImageSize();

730

if (0 == srcSize) {

731

return false; // too big to allocate, abort

732

}

733

dst->fImage = SkMask::AllocImage(srcSize);

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

734

merge_src_with_blur(dst->fImage, src.fRowBytes,

735

sp, src.fRowBytes,

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

736

dp + passCount * (rx + ry * dst->fRowBytes),

737

dst->fRowBytes, sw, sh);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

738

SkMask::FreeImage(dp);

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

739

} else if (style != kNormal_Style) {

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

740

clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),

741

dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

742

}

743

(void)autoCall.detach();

744

}

745

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

746

if (style == kInner_Style) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

747

dst->fBounds = src.fBounds; // restore trimmed bounds

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

748

dst->fRowBytes = src.fRowBytes;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

749

}

750

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

751

return true;

752

}

753

senorblanco@chromium.org