Blame - src/effects/SkBlurMask.cpp - platform/external/skia

2008-12-17 15:59:43 +0000

[diff] [blame]

9

10

#include "SkBlurMask.h"

tomhudson@google.com

889bd8b

2011-09-27 17:38:17 +0000

[diff] [blame]

11

#include "SkMath.h"

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

12

#include "SkTemplates.h"

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

13

#include "SkEndian.h"

14

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

15

// scale factor for the blur radius to match the behavior of the all existing blur

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

16

// code (both on the CPU and the GPU). This magic constant is 1/sqrt(3).

17

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

18

// TODO: get rid of this fudge factor and move any required fudging up into

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

19

// the calling library

20

21

#define kBlurRadiusFudgeFactor SkFloatToScalar( .57735f )

22

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

23

#define UNROLL_SEPARABLE_LOOPS

24

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

25

#define SK_DISABLE_BLUR_ROUNDING

26

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

27

/**

28

* This function performs a box blur in X, of the given radius. If the

skia.committer@gmail.com

884e60b

2012-11-16 02:01:17 +0000

[diff] [blame]

29

* "transpose" parameter is true, it will transpose the pixels on write,

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

30

* such that X and Y are swapped. Reads are always performed from contiguous

31

* memory in X, for speed. The destination buffer (dst) must be at least

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

32

* (width + leftRadius + rightRadius) * height bytes in size.

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

33

*

34

* This is what the inner loop looks like before unrolling, and with the two

35

* cases broken out separately (width < diameter, width >= diameter):

36

*

37

* if (width < diameter) {

38

* for (int x = 0; x < width; ++x) {

39

* sum += *right++;

40

* *dptr = (sum * scale + half) >> 24;

41

* dptr += dst_x_stride;

42

* }

43

* for (int x = width; x < diameter; ++x) {

44

* *dptr = (sum * scale + half) >> 24;

45

* dptr += dst_x_stride;

46

* }

47

* for (int x = 0; x < width; ++x) {

48

* *dptr = (sum * scale + half) >> 24;

49

* sum -= *left++;

50

* dptr += dst_x_stride;

51

* }

52

* } else {

53

* for (int x = 0; x < diameter; ++x) {

54

* sum += *right++;

55

* *dptr = (sum * scale + half) >> 24;

56

* dptr += dst_x_stride;

57

* }

58

* for (int x = diameter; x < width; ++x) {

59

* sum += *right++;

60

* *dptr = (sum * scale + half) >> 24;

61

* sum -= *left++;

62

* dptr += dst_x_stride;

63

* }

64

* for (int x = 0; x < diameter; ++x) {

65

* *dptr = (sum * scale + half) >> 24;

66

* sum -= *left++;

67

* dptr += dst_x_stride;

68

* }

69

* }

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

70

*/

71

static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

72

int leftRadius, int rightRadius, int width, int height,

73

bool transpose)

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

74

{

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

75

int diameter = leftRadius + rightRadius;

76

int kernelSize = diameter + 1;

77

int border = SkMin32(width, diameter);

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

78

uint32_t scale = (1 << 24) / kernelSize;

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

79

int new_width = width + SkMax32(leftRadius, rightRadius) * 2;

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

80

int dst_x_stride = transpose ? height : 1;

81

int dst_y_stride = transpose ? 1 : new_width;

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

82

#ifndef SK_DISABLE_BLUR_ROUNDING

83

uint32_t half = 1 << 23;

84

#else

85

uint32_t half = 0;

86

#endif

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

87

for (int y = 0; y < height; ++y) {

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

88

uint32_t sum = 0;

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

89

uint8_t* dptr = dst + y * dst_y_stride;

90

const uint8_t* right = src + y * src_y_stride;

91

const uint8_t* left = right;

senorblanco@chromium.org

336b4da

2012-11-20 17:09:40 +0000

[diff] [blame]

92

for (int x = 0; x < rightRadius - leftRadius; x++) {

93

*dptr = 0;

94

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

95

}

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

96

#define LEFT_BORDER_ITER \

97

sum += *right++; \

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

98

*dptr = (sum * scale + half) >> 24; \

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

99

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

100

101

int x = 0;

102

#ifdef UNROLL_SEPARABLE_LOOPS

103

for (; x < border - 16; x += 16) {

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

120

}

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

121

#endif

122

for (; x < border; ++x) {

123

LEFT_BORDER_ITER

124

}

125

#undef LEFT_BORDER_ITER

126

#define TRIVIAL_ITER \

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

127

*dptr = (sum * scale + half) >> 24; \

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

128

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

129

x = width;

130

#ifdef UNROLL_SEPARABLE_LOOPS

131

for (; x < diameter - 16; x += 16) {

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

TRIVIAL_ITER

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

148

}

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

149

#endif

150

for (; x < diameter; ++x) {

TRIVIAL_ITER

}

#undef TRIVIAL_ITER

#define CENTER_ITER \

155

sum += *right++; \

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

156

*dptr = (sum * scale + half) >> 24; \

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

157

sum -= *left++; \

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

158

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

159

160

x = diameter;

161

#ifdef UNROLL_SEPARABLE_LOOPS

162

for (; x < width - 16; x += 16) {

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

179

}

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

180

#endif

181

for (; x < width; ++x) {

CENTER_ITER

}

#undef CENTER_ITER

#define RIGHT_BORDER_ITER \

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

186

*dptr = (sum * scale + half) >> 24; \

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

187

sum -= *left++; \

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

188

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

189

190

x = 0;

191

#ifdef UNROLL_SEPARABLE_LOOPS

192

for (; x < border - 16; x += 16) {

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

209

}

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

210

#endif

211

for (; x < border; ++x) {

212

RIGHT_BORDER_ITER

213

}

214

#undef RIGHT_BORDER_ITER

senorblanco@chromium.org

336b4da

2012-11-20 17:09:40 +0000

[diff] [blame]

215

for (int x = 0; x < leftRadius - rightRadius; x++) {

216

*dptr = 0;

217

dptr += dst_x_stride;

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

218

}

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

219

SkASSERT(sum == 0);

220

}

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

221

return new_width;

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

222

}

223

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

224

/**

225

* This variant of the box blur handles blurring of non-integer radii. It

226

* keeps two running sums: an outer sum for the rounded-up kernel radius, and

227

* an inner sum for the rounded-down kernel radius. For each pixel, it linearly

228

* interpolates between them. In float this would be:

229

* outer_weight * outer_sum / kernelSize +

230

* (1.0 - outer_weight) * innerSum / (kernelSize - 2)

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

231

*

232

* This is what the inner loop looks like before unrolling, and with the two

233

* cases broken out separately (width < diameter, width >= diameter):

234

*

235

* if (width < diameter) {

236

* for (int x = 0; x < width; x++) {

237

* inner_sum = outer_sum;

238

* outer_sum += *right++;

239

* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;

240

* dptr += dst_x_stride;

241

* }

242

* for (int x = width; x < diameter; ++x) {

243

* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;

244

* dptr += dst_x_stride;

245

* }

246

* for (int x = 0; x < width; x++) {

247

* inner_sum = outer_sum - *left++;

248

* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;

249

* dptr += dst_x_stride;

250

* outer_sum = inner_sum;

251

* }

252

* } else {

253

* for (int x = 0; x < diameter; x++) {

254

* inner_sum = outer_sum;

255

* outer_sum += *right++;

256

* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;

257

* dptr += dst_x_stride;

258

* }

259

* for (int x = diameter; x < width; ++x) {

260

* inner_sum = outer_sum - *left;

261

* outer_sum += *right++;

262

* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;

263

* dptr += dst_x_stride;

264

* outer_sum -= *left++;

265

* }

266

* for (int x = 0; x < diameter; x++) {

267

* inner_sum = outer_sum - *left++;

268

* *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;

269

* dptr += dst_x_stride;

270

* outer_sum = inner_sum;

* }

* }

* }

* return new_width;

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

275

*/

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

276

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

277

static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,

278

int radius, int width, int height,

279

bool transpose, uint8_t outer_weight)

280

{

281

int diameter = radius * 2;

282

int kernelSize = diameter + 1;

283

int border = SkMin32(width, diameter);

284

int inner_weight = 255 - outer_weight;

285

outer_weight += outer_weight >> 7;

286

inner_weight += inner_weight >> 7;

287

uint32_t outer_scale = (outer_weight << 16) / kernelSize;

288

uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

289

#ifndef SK_DISABLE_BLUR_ROUNDING

290

uint32_t half = 1 << 23;

291

#else

292

uint32_t half = 0;

293

#endif

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

294

int new_width = width + diameter;

295

int dst_x_stride = transpose ? height : 1;

296

int dst_y_stride = transpose ? 1 : new_width;

297

for (int y = 0; y < height; ++y) {

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

298

uint32_t outer_sum = 0, inner_sum = 0;

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

299

uint8_t* dptr = dst + y * dst_y_stride;

300

const uint8_t* right = src + y * src_y_stride;

301

const uint8_t* left = right;

302

int x = 0;

303

304

#define LEFT_BORDER_ITER \

305

inner_sum = outer_sum; \

306

outer_sum += *right++; \

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

307

*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

308

dptr += dst_x_stride;

309

310

#ifdef UNROLL_SEPARABLE_LOOPS

311

for (;x < border - 16; x += 16) {

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

LEFT_BORDER_ITER

}

#endif

for (;x < border; x++) {

332

LEFT_BORDER_ITER

333

}

334

#undef LEFT_BORDER_ITER

335

for (int x = width; x < diameter; ++x) {

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

336

*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

337

dptr += dst_x_stride;

}

x = diameter;

#define CENTER_ITER \

342

inner_sum = outer_sum - *left; \

343

outer_sum += *right++; \

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

344

*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

345

dptr += dst_x_stride; \

346

outer_sum -= *left++;

347

348

#ifdef UNROLL_SEPARABLE_LOOPS

349

for (; x < width - 16; x += 16) {

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

CENTER_ITER

}

#endif

for (; x < width; ++x) {

CENTER_ITER

}

#undef CENTER_ITER

#define RIGHT_BORDER_ITER \

374

inner_sum = outer_sum - *left++; \

senorblanco@chromium.org

2013-02-19 16:09:10 +0000

[diff] [blame^]

375

*dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \

senorblanco@chromium.org

2012-11-27 22:57:41 +0000

[diff] [blame]

376

dptr += dst_x_stride; \

377

outer_sum = inner_sum;

378

379

x = 0;

380

#ifdef UNROLL_SEPARABLE_LOOPS

381

for (; x < border - 16; x += 16) {

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

RIGHT_BORDER_ITER

}

#endif

for (; x < border; x++) {

401

RIGHT_BORDER_ITER

402

}

403

#undef RIGHT_BORDER_ITER

404

SkASSERT(outer_sum == 0 && inner_sum == 0);

}

return new_width;

}

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

409

static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)

410

{

411

*loRadius = *hiRadius = SkScalarCeil(passRadius);

412

if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {

413

*loRadius = *hiRadius - 1;

}

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

417

// Unrolling the integer blur kernel seems to give us a ~15% speedup on Windows,

418

// breakeven on Mac, and ~15% slowdown on Linux.

419

// Reading a word at a time when bulding the sum buffer seems to give

420

// us no appreciable speedup on Windows or Mac, and 2% slowdown on Linux.

tomhudson@google.com

054ff1e

2012-01-11 19:29:08 +0000

[diff] [blame]

421

#if defined(SK_BUILD_FOR_WIN32)

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

422

#define UNROLL_KERNEL_LOOP 1

423

#endif

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

424

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

425

/** The sum buffer is an array of u32 to hold the accumulated sum of all of the

426

src values at their position, plus all values above and to the left.

427

When we sample into this buffer, we need an initial row and column of 0s,

428

so we have an index correspondence as follows:

rmistry@google.com

fbfcd56

2012-08-23 18:09:54 +0000

[diff] [blame]

429

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

430

src[i, j] == sum[i+1, j+1]

431

sum[0, j] == sum[i, 0] == 0

rmistry@google.com

fbfcd56

2012-08-23 18:09:54 +0000

[diff] [blame]

432

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

433

We assume that the sum buffer's stride == its width

434

*/

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

435

static void build_sum_buffer(uint32_t sum[], int srcW, int srcH,

436

const uint8_t src[], int srcRB) {

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

437

int sumW = srcW + 1;

438

439

SkASSERT(srcRB >= srcW);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

440

// mod srcRB so we can apply it after each row

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

441

srcRB -= srcW;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

int x, y;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

445

// zero out the top row and column

446

memset(sum, 0, sumW * sizeof(sum[0]));

447

sum += sumW;

448

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

449

// special case first row

450

uint32_t X = 0;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

451

*sum++ = 0; // initialze the first column to 0

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

452

for (x = srcW - 1; x >= 0; --x) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

453

X = *src++ + X;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

454

*sum++ = X;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

src += srcRB;

// now do the rest of the rows

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

459

for (y = srcH - 1; y > 0; --y) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

460

uint32_t L = 0;

461

uint32_t C = 0;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

462

*sum++ = 0; // initialze the first column to 0

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

463

464

for (x = srcW - 1; !SkIsAlign4((intptr_t) src) && x >= 0; x--) {

465

uint32_t T = sum[-sumW];

466

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

}

for (; x >= 4; x-=4) {

473

uint32_t T = sum[-sumW];

474

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

T = sum[-sumW];

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

T = sum[-sumW];

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

T = sum[-sumW];

X = *src++ + L + T - C;

*sum++ = X;

L = X;

C = T;

}

for (; x >= 0; --x) {

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

496

uint32_t T = sum[-sumW];

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

497

X = *src++ + L + T - C;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

498

*sum++ = X;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

L = X;

C = T;

}

src += srcRB;

}

}

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

506

/**

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

507

* This is the path for apply_kernel() to be taken when the kernel

508

* is wider than the source image.

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

509

*/

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

510

static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[],

int sw, int sh) {

SkASSERT(2*rx > sw);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

514

uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));

515

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

516

int sumStride = sw + 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

int dw = sw + 2*rx;

int dh = sh + 2*ry;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

521

int prev_y = -2*ry;

522

int next_y = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

523

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

524

for (int y = 0; y < dh; y++) {

525

int py = SkClampPos(prev_y) * sumStride;

526

int ny = SkFastMin32(next_y, sh) * sumStride;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

527

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

528

int prev_x = -2*rx;

529

int next_x = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

530

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

531

for (int x = 0; x < dw; x++) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

532

int px = SkClampPos(prev_x);

533

int nx = SkFastMin32(next_x, sw);

534

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

535

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

536

*dst++ = SkToU8(tmp * scale >> 24);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

prev_y += 1;

next_y += 1;

}

}

/**

* sw and sh are the width and height of the src. Since the sum buffer

548

* matches that, but has an extra row and col at the beginning (with zeros),

549

* we can just use sw and sh as our "max" values for pinning coordinates

550

* when sampling into sum[][]

551

*

552

* The inner loop is conceptually simple; we break it into several sections

553

* to improve performance. Here's the original version:

554

for (int x = 0; x < dw; x++) {

555

int px = SkClampPos(prev_x);

556

int nx = SkFastMin32(next_x, sw);

557

558

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

559

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

564

* The sections are:

565

* left-hand section, where prev_x is clamped to 0

566

* center section, where neither prev_x nor next_x is clamped

567

* right-hand section, where next_x is clamped to sw

568

* On some operating systems, the center section is unrolled for additional

569

* speedup.

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

570

*/

571

static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[],

572

int sw, int sh) {

573

if (2*rx > sw) {

574

kernel_clamped(dst, rx, ry, sum, sw, sh);

return;

}

uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1));

579

580

int sumStride = sw + 1;

int dw = sw + 2*rx;

int dh = sh + 2*ry;

int prev_y = -2*ry;

int next_y = 1;

SkASSERT(2*rx <= dw - 2*rx);

589

590

for (int y = 0; y < dh; y++) {

591

int py = SkClampPos(prev_y) * sumStride;

592

int ny = SkFastMin32(next_y, sh) * sumStride;

int prev_x = -2*rx;

int next_x = 1;

int x = 0;

for (; x < 2*rx; x++) {

599

SkASSERT(prev_x <= 0);

600

SkASSERT(next_x <= sw);

int px = 0;

int nx = next_x;

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

606

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

612

int i0 = prev_x + py;

613

int i1 = next_x + ny;

614

int i2 = next_x + py;

615

int i3 = prev_x + ny;

616

617

#if UNROLL_KERNEL_LOOP

618

for (; x < dw - 2*rx - 4; x += 4) {

619

SkASSERT(prev_x >= 0);

620

SkASSERT(next_x <= sw);

621

622

uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

623

*dst++ = SkToU8(tmp * scale >> 24);

624

tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

625

*dst++ = SkToU8(tmp * scale >> 24);

626

tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

627

*dst++ = SkToU8(tmp * scale >> 24);

628

tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

629

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 4;

next_x += 4;

}

#endif

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

636

for (; x < dw - 2*rx; x++) {

637

SkASSERT(prev_x >= 0);

638

SkASSERT(next_x <= sw);

639

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

640

uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

641

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

for (; x < dw; x++) {

648

SkASSERT(prev_x >= 0);

649

SkASSERT(next_x > sw);

int px = prev_x;

int nx = sw;

uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny];

655

*dst++ = SkToU8(tmp * scale >> 24);

prev_x += 1;

next_x += 1;

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

prev_y += 1;

next_y += 1;

}

}

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

666

/**

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

667

* This is the path for apply_kernel_interp() to be taken when the kernel

668

* is wider than the source image.

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

669

*/

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

670

static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

671

const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

672

SkASSERT(2*rx > sw);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

673

674

int inner_weight = 255 - outer_weight;

675

676

// round these guys up if they're bigger than 127

677

outer_weight += outer_weight >> 7;

678

inner_weight += inner_weight >> 7;

679

680

uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));

681

uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));

682

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

683

int sumStride = sw + 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

int dw = sw + 2*rx;

int dh = sh + 2*ry;

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

688

int prev_y = -2*ry;

689

int next_y = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

690

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

691

for (int y = 0; y < dh; y++) {

692

int py = SkClampPos(prev_y) * sumStride;

693

int ny = SkFastMin32(next_y, sh) * sumStride;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

694

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

695

int ipy = SkClampPos(prev_y + 1) * sumStride;

696

int iny = SkClampMax(next_y - 1, sh) * sumStride;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

697

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

698

int prev_x = -2*rx;

699

int next_x = 1;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

700

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

701

for (int x = 0; x < dw; x++) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

702

int px = SkClampPos(prev_x);

703

int nx = SkFastMin32(next_x, sw);

704

705

int ipx = SkClampPos(prev_x + 1);

706

int inx = SkClampMax(next_x - 1, sw);

707

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

708

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

709

- sum[nx+py] - sum[px+ny];

710

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

711

- sum[inx+ipy] - sum[ipx+iny];

712

*dst++ = SkToU8((outer_sum * outer_scale

713

+ inner_sum * inner_scale) >> 24);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

prev_x += 1;

next_x += 1;

}

prev_y += 1;

next_y += 1;

}

}

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

723

/**

724

* sw and sh are the width and height of the src. Since the sum buffer

725

* matches that, but has an extra row and col at the beginning (with zeros),

726

* we can just use sw and sh as our "max" values for pinning coordinates

727

* when sampling into sum[][]

728

*

729

* The inner loop is conceptually simple; we break it into several variants

730

* to improve performance. Here's the original version:

731

for (int x = 0; x < dw; x++) {

732

int px = SkClampPos(prev_x);

733

int nx = SkFastMin32(next_x, sw);

734

735

int ipx = SkClampPos(prev_x + 1);

736

int inx = SkClampMax(next_x - 1, sw);

737

738

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

739

- sum[nx+py] - sum[px+ny];

740

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

741

- sum[inx+ipy] - sum[ipx+iny];

742

*dst++ = SkToU8((outer_sum * outer_scale

743

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

748

* The sections are:

749

* left-hand section, where prev_x is clamped to 0

750

* center section, where neither prev_x nor next_x is clamped

751

* right-hand section, where next_x is clamped to sw

752

* On some operating systems, the center section is unrolled for additional

753

* speedup.

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

754

*/

755

static void apply_kernel_interp(uint8_t dst[], int rx, int ry,

756

const uint32_t sum[], int sw, int sh, U8CPU outer_weight) {

757

SkASSERT(rx > 0 && ry > 0);

758

SkASSERT(outer_weight <= 255);

759

760

if (2*rx > sw) {

761

kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outer_weight);

return;

}

int inner_weight = 255 - outer_weight;

766

767

// round these guys up if they're bigger than 127

768

outer_weight += outer_weight >> 7;

769

inner_weight += inner_weight >> 7;

770

771

uint32_t outer_scale = (outer_weight << 16) / ((2*rx + 1)*(2*ry + 1));

772

uint32_t inner_scale = (inner_weight << 16) / ((2*rx - 1)*(2*ry - 1));

773

774

int sumStride = sw + 1;

int dw = sw + 2*rx;

int dh = sh + 2*ry;

int prev_y = -2*ry;

int next_y = 1;

SkASSERT(2*rx <= dw - 2*rx);

783

784

for (int y = 0; y < dh; y++) {

785

int py = SkClampPos(prev_y) * sumStride;

786

int ny = SkFastMin32(next_y, sh) * sumStride;

787

788

int ipy = SkClampPos(prev_y + 1) * sumStride;

789

int iny = SkClampMax(next_y - 1, sh) * sumStride;

int prev_x = -2*rx;

int next_x = 1;

int x = 0;

for (; x < 2*rx; x++) {

796

SkASSERT(prev_x < 0);

797

SkASSERT(next_x <= sw);

int px = 0;

int nx = next_x;

int ipx = 0;

int inx = next_x - 1;

804

805

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

806

- sum[nx+py] - sum[px+ny];

807

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

808

- sum[inx+ipy] - sum[ipx+iny];

809

*dst++ = SkToU8((outer_sum * outer_scale

810

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

816

int i0 = prev_x + py;

817

int i1 = next_x + ny;

818

int i2 = next_x + py;

819

int i3 = prev_x + ny;

820

int i4 = prev_x + 1 + ipy;

821

int i5 = next_x - 1 + iny;

822

int i6 = next_x - 1 + ipy;

823

int i7 = prev_x + 1 + iny;

824

825

#if UNROLL_KERNEL_LOOP

826

for (; x < dw - 2*rx - 4; x += 4) {

827

SkASSERT(prev_x >= 0);

828

SkASSERT(next_x <= sw);

829

830

uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

831

uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

832

*dst++ = SkToU8((outer_sum * outer_scale

833

+ inner_sum * inner_scale) >> 24);

834

outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

835

inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

836

*dst++ = SkToU8((outer_sum * outer_scale

837

+ inner_sum * inner_scale) >> 24);

838

outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

839

inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

840

*dst++ = SkToU8((outer_sum * outer_scale

841

+ inner_sum * inner_scale) >> 24);

842

outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

843

inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

844

*dst++ = SkToU8((outer_sum * outer_scale

845

+ inner_sum * inner_scale) >> 24);

prev_x += 4;

next_x += 4;

}

#endif

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

852

for (; x < dw - 2*rx; x++) {

853

SkASSERT(prev_x >= 0);

854

SkASSERT(next_x <= sw);

855

tomhudson@google.com

2011-11-28 18:22:01 +0000

[diff] [blame]

856

uint32_t outer_sum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];

857

uint32_t inner_sum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];

tomhudson@google.com

2011-11-22 15:58:06 +0000

[diff] [blame]

858

*dst++ = SkToU8((outer_sum * outer_scale

859

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

for (; x < dw; x++) {

866

SkASSERT(prev_x >= 0);

867

SkASSERT(next_x > sw);

int px = prev_x;

int nx = sw;

int ipx = prev_x + 1;

873

int inx = sw;

874

875

uint32_t outer_sum = sum[px+py] + sum[nx+ny]

876

- sum[nx+py] - sum[px+ny];

877

uint32_t inner_sum = sum[ipx+ipy] + sum[inx+iny]

878

- sum[inx+ipy] - sum[ipx+iny];

879

*dst++ = SkToU8((outer_sum * outer_scale

880

+ inner_sum * inner_scale) >> 24);

prev_x += 1;

next_x += 1;

}

prev_y += 1;

next_y += 1;

}

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

891

#include "SkColorPriv.h"

892

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

893

static void merge_src_with_blur(uint8_t dst[], int dstRB,

894

const uint8_t src[], int srcRB,

895

const uint8_t blur[], int blurRB,

int sw, int sh) {

dstRB -= sw;

srcRB -= sw;

blurRB -= sw;

while (--sh >= 0) {

for (int x = sw - 1; x >= 0; --x) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

902

*dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));

dst += 1;

src += 1;

blur += 1;

}

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

907

dst += dstRB;

908

src += srcRB;

909

blur += blurRB;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

}

static void clamp_with_orig(uint8_t dst[], int dstRowBytes,

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

914

const uint8_t src[], int srcRowBytes,

915

int sw, int sh,

reed@android.com

2009-09-21 00:27:08 +0000

[diff] [blame]

916

SkBlurMask::Style style) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

917

int x;

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

918

while (--sh >= 0) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

919

switch (style) {

920

case SkBlurMask::kSolid_Style:

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

921

for (x = sw - 1; x >= 0; --x) {

922

int s = *src;

923

int d = *dst;

924

*dst = SkToU8(s + d - SkMulDiv255Round(s, d));

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

dst += 1;

src += 1;

}

break;

case SkBlurMask::kOuter_Style:

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

930

for (x = sw - 1; x >= 0; --x) {

931

if (*src) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

932

*dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

933

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

dst += 1;

src += 1;

}

break;

default:

tomhudson@google.com

0c00f21

2011-12-28 14:59:50 +0000

[diff] [blame]

939

SkDEBUGFAIL("Unexpected blur style here");

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

940

break;

941

}

942

dst += dstRowBytes - sw;

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

943

src += srcRowBytes - sw;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

}

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

947

///////////////////////////////////////////////////////////////////////////////

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

948

bsalomon@google.com

33cdbde

2013-01-11 20:54:44 +0000

[diff] [blame]

949

// we use a local function to wrap the class static method to work around

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

950

// a bug in gcc98

951

void SkMask_FreeImage(uint8_t* image);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

952

void SkMask_FreeImage(uint8_t* image) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

953

SkMask::FreeImage(image);

954

}

955

956

bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,

bungeman@google.com

5af16f8

2011-09-02 15:06:44 +0000

[diff] [blame]

957

SkScalar radius, Style style, Quality quality,

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

958

SkIPoint* margin, bool separable)

bungeman@google.com

5af16f8

2011-09-02 15:06:44 +0000

[diff] [blame]

959

{

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

960

if (src.fFormat != SkMask::kA8_Format) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

961

return false;

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

962

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

963

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

964

// Force high quality off for small radii (performance)

senorblanco@chromium.org

2012-11-29 17:09:27 +0000

[diff] [blame]

965

if (radius < SkIntToScalar(3)) {

966

quality = kLow_Quality;

967

}

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

968

969

// highQuality: use three box blur passes as a cheap way to approximate a Gaussian blur

senorblanco@chromium.org

2012-11-29 17:09:27 +0000

[diff] [blame]

970

int passCount = (kHigh_Quality == quality) ? 3 : 1;

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

971

SkScalar passRadius = (kHigh_Quality == quality) ? SkScalarMul( radius, kBlurRadiusFudgeFactor): radius;

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

972

973

int rx = SkScalarCeil(passRadius);

974

int outer_weight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

975

976

SkASSERT(rx >= 0);

977

SkASSERT((unsigned)outer_weight <= 255);

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

978

if (rx <= 0) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

979

return false;

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

980

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

981

982

int ry = rx; // only do square blur for now

983

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

984

int padx = passCount * rx;

985

int pady = passCount * ry;

bungeman@google.com

5af16f8

2011-09-02 15:06:44 +0000

[diff] [blame]

986

if (margin) {

987

margin->set(padx, pady);

988

}

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

989

dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,

990

src.fBounds.fRight + padx, src.fBounds.fBottom + pady);

reed@android.com

49f0ff2

2009-03-19 21:52:42 +0000

[diff] [blame]

991

dst->fRowBytes = dst->fBounds.width();

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

992

dst->fFormat = SkMask::kA8_Format;

993

dst->fImage = NULL;

994

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

995

if (src.fImage) {

reed@android.com

543ed93

2009-04-24 12:43:40 +0000

[diff] [blame]

996

size_t dstSize = dst->computeImageSize();

997

if (0 == dstSize) {

998

return false; // too big to allocate, abort

999

}

1000

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1001

int sw = src.fBounds.width();

1002

int sh = src.fBounds.height();

1003

const uint8_t* sp = src.fImage;

reed@android.com

543ed93

2009-04-24 12:43:40 +0000

[diff] [blame]

1004

uint8_t* dp = SkMask::AllocImage(dstSize);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1005

1006

SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);

1007

1008

// build the blurry destination

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

1009

if (separable) {

1010

SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);

1011

uint8_t* tp = tmpBuffer.get();

1012

int w = sw, h = sh;

1013

senorblanco@chromium.org

2012-11-29 17:09:27 +0000

[diff] [blame]

1014

if (outer_weight == 255) {

senorblanco@chromium.org

2012-11-16 17:22:33 +0000

[diff] [blame]

1015

int loRadius, hiRadius;

1016

get_adjusted_radii(passRadius, &loRadius, &hiRadius);

senorblanco@chromium.org

2012-11-29 17:09:27 +0000

[diff] [blame]

1017

if (kHigh_Quality == quality) {

1018

// Do three X blurs, with a transpose on the final one.

1019

w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);

1020

w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false);

1021

w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true);

1022

// Do three Y blurs, with a transpose on the final one.

1023

h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false);

1024

h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false);

1025

h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true);

1026

} else {

1027

w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);

1028

h = boxBlur(tp, h, dp, ry, ry, h, w, true);

1029

}

senorblanco@chromium.org

2012-11-15 20:27:35 +0000

[diff] [blame]

1030

} else {

senorblanco@chromium.org

2012-11-29 17:09:27 +0000

[diff] [blame]

1031

if (kHigh_Quality == quality) {

1032

// Do three X blurs, with a transpose on the final one.

1033

w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outer_weight);

1034

w = boxBlurInterp(tp, w, dp, rx, w, h, false, outer_weight);

1035

w = boxBlurInterp(dp, w, tp, rx, w, h, true, outer_weight);

1036

// Do three Y blurs, with a transpose on the final one.

1037

h = boxBlurInterp(tp, h, dp, ry, h, w, false, outer_weight);

1038

h = boxBlurInterp(dp, h, tp, ry, h, w, false, outer_weight);

1039

h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);

1040

} else {

1041

w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outer_weight);

1042

h = boxBlurInterp(tp, h, dp, ry, h, w, true, outer_weight);

1043

}

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

1044

}

1045

} else {

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

1046

const size_t storageW = sw + 2 * (passCount - 1) * rx + 1;

1047

const size_t storageH = sh + 2 * (passCount - 1) * ry + 1;

1048

SkAutoTMalloc<uint32_t> storage(storageW * storageH);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1049

uint32_t* sumBuffer = storage.get();

1050

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

1051

//pass1: sp is source, dp is destination

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1052

build_sum_buffer(sumBuffer, sw, sh, sp, src.fRowBytes);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

1053

if (outer_weight == 255) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1054

apply_kernel(dp, rx, ry, sumBuffer, sw, sh);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

1055

} else {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1056

apply_kernel_interp(dp, rx, ry, sumBuffer, sw, sh, outer_weight);

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

1057

}

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

1058

senorblanco@chromium.org

2012-11-29 17:09:27 +0000

[diff] [blame]

1059

if (kHigh_Quality == quality) {

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

1060

//pass2: dp is source, tmpBuffer is destination

1061

int tmp_sw = sw + 2 * rx;

1062

int tmp_sh = sh + 2 * ry;

1063

SkAutoTMalloc<uint8_t> tmpBuffer(dstSize);

1064

build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, dp, tmp_sw);

1065

if (outer_weight == 255)

1066

apply_kernel(tmpBuffer.get(), rx, ry, sumBuffer, tmp_sw, tmp_sh);

1067

else

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

1068

apply_kernel_interp(tmpBuffer.get(), rx, ry, sumBuffer,

1069

tmp_sw, tmp_sh, outer_weight);

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

1070

1071

//pass3: tmpBuffer is source, dp is destination

1072

tmp_sw += 2 * rx;

1073

tmp_sh += 2 * ry;

1074

build_sum_buffer(sumBuffer, tmp_sw, tmp_sh, tmpBuffer.get(), tmp_sw);

1075

if (outer_weight == 255)

1076

apply_kernel(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh);

1077

else

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

1078

apply_kernel_interp(dp, rx, ry, sumBuffer, tmp_sw, tmp_sh,

1079

outer_weight);

senorblanco@chromium.org

2011-02-18 19:03:01 +0000

[diff] [blame]

1080

}

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

}

dst->fImage = dp;

// if need be, alloc the "real" dst (same size as src) and copy/merge

1085

// the blur into it (applying the src)

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

1086

if (style == kInner_Style) {

1087

// now we allocate the "real" dst, mirror the size of src

reed@android.com

543ed93

2009-04-24 12:43:40 +0000

[diff] [blame]

1088

size_t srcSize = src.computeImageSize();

1089

if (0 == srcSize) {

1090

return false; // too big to allocate, abort

1091

}

1092

dst->fImage = SkMask::AllocImage(srcSize);

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

1093

merge_src_with_blur(dst->fImage, src.fRowBytes,

1094

sp, src.fRowBytes,

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

1095

dp + passCount * (rx + ry * dst->fRowBytes),

1096

dst->fRowBytes, sw, sh);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1097

SkMask::FreeImage(dp);

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

1098

} else if (style != kNormal_Style) {

reed@google.com

2011-08-12 14:59:59 +0000

[diff] [blame]

1099

clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),

1100

dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1101

}

1102

(void)autoCall.detach();

1103

}

1104

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

1105

if (style == kInner_Style) {

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1106

dst->fBounds = src.fBounds; // restore trimmed bounds

reed@android.com

2009-09-18 13:41:56 +0000

[diff] [blame]

1107

dst->fRowBytes = src.fRowBytes;

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1108

}

1109

reed@android.com

2008-12-17 15:59:43 +0000

[diff] [blame]

1110

return true;

1111

}

1112

senorblanco@chromium.org

2012-11-13 20:35:21 +0000

[diff] [blame]

1113

bool SkBlurMask::BlurSeparable(SkMask* dst, const SkMask& src,

1114

SkScalar radius, Style style, Quality quality,

1115

SkIPoint* margin)

1116

{

1117

return SkBlurMask::Blur(dst, src, radius, style, quality, margin, true);

1118

}

1119

1120

bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,

1121

SkScalar radius, Style style, Quality quality,

1122

SkIPoint* margin)

1123

{

1124

return SkBlurMask::Blur(dst, src, radius, style, quality, margin, false);

1125

}

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1126

1127

/* Convolving a box with itself three times results in a piecewise

1128

quadratic function:

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1129

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1130

0 x <= -1.5

1131

9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= 1.5

1132

3/4 - x^2 -.5 < x <= .5

1133

9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5

1134

0 1.5 < x

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1135

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1136

To get the profile curve of the blurred step function at the rectangle

1137

edge, we evaluate the indefinite integral, which is piecewise cubic:

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1138

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1139

0 x <= -1.5

1140

5/8 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5

1141

1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5

1142

3/8 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5

1 1.5 < x

*/

static float gaussian_integral( float x ) {

if ( x > 1.5f ) {

return 0.0f;

}

if ( x < -1.5f ) {

return 1.0f;

}

float x2 = x*x;

float x3 = x2*x;

jvanverth@google.com

2013-01-07 18:41:28 +0000

[diff] [blame]

1157

if ( x > 0.5f ) {

1158

return 0.5625f - ( x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1159

}

jvanverth@google.com

2013-01-07 18:41:28 +0000

[diff] [blame]

1160

if ( x > -0.5f ) {

1161

return 0.5f - (0.75f * x - x3 / 3.0f);

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1162

}

jvanverth@google.com

2013-01-07 18:41:28 +0000

[diff] [blame]

1163

return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1164

}

1165

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1166

/*

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1167

compute_profile allocates and fills in an array of floating

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1168

point values between 0 and 255 for the profile signature of

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1169

a blurred half-plane with the given blur radius. Since we're

1170

going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))

1171

all the time, we actually fill in the profile pre-inverted

1172

(already done 255-x).

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1173

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1174

The function returns the size of the array allocated for the

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1175

profile. It's the responsibility of the caller to delete the

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1176

memory returned in profile_out.

1177

*/

1178

1179

static int compute_profile( SkScalar radius, unsigned int **profile_out ) {

jvanverth@google.com

2013-01-07 18:41:28 +0000

[diff] [blame]

1180

int size = SkScalarFloorToInt(radius * 3 + 1);

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1181

int center = size >> 1;

1182

bsalomon@google.com

33cdbde

2013-01-11 20:54:44 +0000

[diff] [blame]

1183

unsigned int *profile = SkNEW_ARRAY(unsigned int, size);

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1184

1185

float invr = 1.0f/radius;

1186

1187

profile[0] = 255;

1188

for (int x = 1 ; x < size ; x++) {

1189

float scaled_x = ( center - x ) * invr;

1190

float gi = gaussian_integral( scaled_x );

1191

profile[x] = 255 - (uint8_t) ( 255.f * gi );

1192

}

1193

1194

*profile_out = profile;

1195

return size;

1196

}

1197

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1198

// TODO MAYBE: Maintain a profile cache to avoid recomputing this for

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1199

// commonly used radii. Consider baking some of the most common blur radii

1200

// directly in as static data?

1201

1202

// Implementation adapted from Michael Herf's approach:

1203

// http://stereopsis.com/shadowrect/

1204

1205

bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src,

1206

SkScalar provided_radius, Style style, Quality quality,

1207

SkIPoint *margin) {

1208

int profile_size;

1209

unsigned int *profile;

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1210

1211

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1212

float radius = SkScalarToFloat( SkScalarMul( provided_radius, kBlurRadiusFudgeFactor ) );

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1213

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1214

profile_size = compute_profile( radius, &profile );

bsalomon@google.com

33cdbde

2013-01-11 20:54:44 +0000

[diff] [blame]

1215

SkAutoTDeleteArray<unsigned int> ada(profile);

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1216

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1217

int pad = (int) (radius * 1.5f + 1);

1218

if (margin) {

1219

margin->set( pad, pad );

1220

}

jvanverth@google.com

2013-01-07 18:41:28 +0000

[diff] [blame]

1221

dst->fBounds = SkIRect::MakeWH(SkScalarFloorToInt(src.width()), SkScalarFloorToInt(src.height()));

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1222

dst->fBounds.outset(pad, pad);

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1223

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1224

dst->fRowBytes = dst->fBounds.width();

1225

dst->fFormat = SkMask::kA8_Format;

1226

dst->fImage = NULL;

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1227

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1228

size_t dstSize = dst->computeImageSize();

1229

if (0 == dstSize) {

1230

return false; // too big to allocate, abort

1231

}

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1232

jvanverth@google.com

2013-01-07 18:41:28 +0000

[diff] [blame]

1233

int sw = SkScalarFloorToInt(src.width());

1234

int sh = SkScalarFloorToInt(src.height());

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1235

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1236

uint8_t* dp = SkMask::AllocImage(dstSize);

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1237

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1238

dst->fImage = dp;

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1239

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1240

int dst_height = dst->fBounds.height();

1241

int dst_width = dst->fBounds.width();

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1242

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1243

// nearest odd number less than the profile size represents the center

1244

// of the (2x scaled) profile

1245

int center = ( profile_size & ~1 ) - 1;

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1246

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1247

int w = sw - center;

1248

int h = sh - center;

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1249

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1250

uint8_t *outptr = dp;

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1251

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1252

for (int y = 0 ; y < dst_height ; y++)

1253

{

1254

// time to fill in a scanline of the blurry rectangle.

1255

// to avoid floating point math, everything is multiplied by

1256

// 2 where needed. This keeps things nice and integer-oriented.

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1257

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1258

int dy = abs((y << 1) - dst_height) - h; // how far are we from the original edge?

1259

int oy = dy >> 1;

1260

if (oy < 0) oy = 0;

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1261

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1262

unsigned int profile_y = profile[oy];

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1263

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1264

for (int x = 0 ; x < (dst_width << 1) ; x += 2) {

1265

int dx = abs( x - dst_width ) - w;

1266

int ox = dx >> 1;

1267

if (ox < 0) ox = 0;

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1268

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1269

unsigned int maskval = SkMulDiv255Round(profile[ox], profile_y);

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1270

humper@google.com

2013-01-04 20:29:03 +0000

[diff] [blame]

1271

*(outptr++) = maskval;

1272

}

1273

}

skia.committer@gmail.com

2013-01-05 02:02:05 +0000

[diff] [blame]

1274

humper@google.com