Blame - src/core/SkConvolver.cpp - platform/external/skqp

2013-07-19 20:20:04 +0000

[diff] [blame]

1

2

// Use of this source code is governed by a BSD-style license that can be

3

// found in the LICENSE file.

4

5

#include "SkConvolver.h"

#include "SkSize.h"

#include "SkTypes.h"

namespace {

// Converts the argument to an 8-bit unsigned value by clamping to the range

12

// 0-255.

13

inline unsigned char ClampTo8(int a) {

14

if (static_cast<unsigned>(a) < 256) {

15

return a; // Avoid the extra check in the common case.

}

if (a < 0) {

return 0;

}

return 255;

}

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

23

// Stores a list of rows in a circular buffer. The usage is you write into it

24

// by calling AdvanceRow. It will keep track of which row in the buffer it

25

// should use next, and the total number of rows added.

26

class CircularRowBuffer {

27

public:

28

// The number of pixels in each row is given in |sourceRowPixelWidth|.

29

// The maximum number of rows needed in the buffer is |maxYFilterSize|

30

// (we only need to store enough rows for the biggest filter).

31

//

32

// We use the |firstInputRow| to compute the coordinates of all of the

33

// following rows returned by Advance().

34

CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,

35

int firstInputRow)

36

: fRowByteWidth(destRowPixelWidth * 4),

37

fNumRows(maxYFilterSize),

38

fNextRow(0),

39

fNextRowCoordinate(firstInputRow) {

40

fBuffer.reset(fRowByteWidth * maxYFilterSize);

41

fRowAddresses.reset(fNumRows);

42

}

43

44

// Moves to the next row in the buffer, returning a pointer to the beginning

45

// of it.

46

unsigned char* advanceRow() {

47

unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];

48

fNextRowCoordinate++;

49

50

// Set the pointer to the next row to use, wrapping around if necessary.

51

fNextRow++;

52

if (fNextRow == fNumRows) {

fNextRow = 0;

}

return row;

}

// Returns a pointer to an "unrolled" array of rows. These rows will start

59

// at the y coordinate placed into |*firstRowIndex| and will continue in

60

// order for the maximum number of rows in this circular buffer.

61

//

62

// The |firstRowIndex_| may be negative. This means the circular buffer

63

// starts before the top of the image (it hasn't been filled yet).

64

unsigned char* const* GetRowAddresses(int* firstRowIndex) {

65

// Example for a 4-element circular buffer holding coords 6-9.

66

// Row 0 Coord 8

67

// Row 1 Coord 9

68

// Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10.

69

// Row 3 Coord 7

70

//

71

// The "next" row is also the first (lowest) coordinate. This computation

72

// may yield a negative value, but that's OK, the math will work out

73

// since the user of this buffer will compute the offset relative

74

// to the firstRowIndex and the negative rows will never be used.

75

*firstRowIndex = fNextRowCoordinate - fNumRows;

76

77

int curRow = fNextRow;

78

for (int i = 0; i < fNumRows; i++) {

79

fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];

80

81

// Advance to the next row, wrapping if necessary.

82

curRow++;

83

if (curRow == fNumRows) {

curRow = 0;

}

}

return &fRowAddresses[0];

}

private:

// The buffer storing the rows. They are packed, each one fRowByteWidth.

92

SkTArray<unsigned char> fBuffer;

93

94

// Number of bytes per row in the |buffer|.

95

int fRowByteWidth;

96

97

// The number of rows available in the buffer.

98

int fNumRows;

99

100

// The next row index we should write into. This wraps around as the

101

// circular buffer is used.

102

int fNextRow;

103

104

// The y coordinate of the |fNextRow|. This is incremented each time a

105

// new row is appended and does not wrap.

106

int fNextRowCoordinate;

107

108

// Buffer used by GetRowAddresses().

109

SkTArray<unsigned char*> fRowAddresses;

110

};

111

112

// Convolves horizontally along a single row. The row data is given in

113

// |srcData| and continues for the numValues() of the filter.

114

template<bool hasAlpha>

115

void ConvolveHorizontally(const unsigned char* srcData,

116

const SkConvolutionFilter1D& filter,

117

unsigned char* outRow) {

118

// Loop over each pixel on this row in the output image.

119

int numValues = filter.numValues();

120

for (int outX = 0; outX < numValues; outX++) {

121

// Get the filter that determines the current output pixel.

122

int filterOffset, filterLength;

123

const SkConvolutionFilter1D::ConvolutionFixed* filterValues =

124

filter.FilterForValue(outX, &filterOffset, &filterLength);

125

126

// Compute the first pixel in this row that the filter affects. It will

127

// touch |filterLength| pixels (4 bytes each) after this.

128

const unsigned char* rowToFilter = &srcData[filterOffset * 4];

129

130

// Apply the filter to the row to get the destination pixel in |accum|.

131

int accum[4] = {0};

132

for (int filterX = 0; filterX < filterLength; filterX++) {

133

SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];

134

accum[0] += curFilter * rowToFilter[filterX * 4 + 0];

135

accum[1] += curFilter * rowToFilter[filterX * 4 + 1];

136

accum[2] += curFilter * rowToFilter[filterX * 4 + 2];

137

if (hasAlpha) {

138

accum[3] += curFilter * rowToFilter[filterX * 4 + 3];

}

}

// Bring this value back in range. All of the filter scaling factors

143

// are in fixed point with kShiftBits bits of fractional part.

144

accum[0] >>= SkConvolutionFilter1D::kShiftBits;

145

accum[1] >>= SkConvolutionFilter1D::kShiftBits;

146

accum[2] >>= SkConvolutionFilter1D::kShiftBits;

147

if (hasAlpha) {

148

accum[3] >>= SkConvolutionFilter1D::kShiftBits;

149

}

150

151

// Store the new pixel.

152

outRow[outX * 4 + 0] = ClampTo8(accum[0]);

153

outRow[outX * 4 + 1] = ClampTo8(accum[1]);

154

outRow[outX * 4 + 2] = ClampTo8(accum[2]);

155

if (hasAlpha) {

156

outRow[outX * 4 + 3] = ClampTo8(accum[3]);

}

}

}

mtklein

2014-06-25 11:38:00 -0700

[diff] [blame]

161

// There's a bug somewhere here with GCC autovectorization (-ftree-vectorize) on 32 bit builds.

mtklein

b726df4

2014-06-25 12:40:51 -0700

[diff] [blame]

162

// Dropping to -O2 disables -ftree-vectorize. GCC 4.6 needs noinline. http://skbug.com/2575

mtklein

2014-06-25 11:38:00 -0700

[diff] [blame]

163

#if defined(__i386) && SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE)

mtklein

b726df4

2014-06-25 12:40:51 -0700

[diff] [blame]

164

#define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), noinline))

mtklein

2014-06-25 11:38:00 -0700

[diff] [blame]

165

#else

166

#define SK_MAYBE_DISABLE_VECTORIZATION

167

#endif

168

169

SK_MAYBE_DISABLE_VECTORIZATION

170

static void ConvolveHorizontallyAlpha(const unsigned char* srcData,

171

const SkConvolutionFilter1D& filter,

172

unsigned char* outRow) {

173

return ConvolveHorizontally<true>(srcData, filter, outRow);

174

}

175

176

SK_MAYBE_DISABLE_VECTORIZATION

177

static void ConvolveHorizontallyNoAlpha(const unsigned char* srcData,

178

const SkConvolutionFilter1D& filter,

179

unsigned char* outRow) {

180

return ConvolveHorizontally<false>(srcData, filter, outRow);

181

}

182

183

#undef SK_MAYBE_DISABLE_VECTORIZATION

184

185

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

186

// Does vertical convolution to produce one output row. The filter values and

187

// length are given in the first two parameters. These are applied to each

188

// of the rows pointed to in the |sourceDataRows| array, with each row

189

// being |pixelWidth| wide.

190

//

191

// The output must have room for |pixelWidth * 4| bytes.

192

template<bool hasAlpha>

193

void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,

194

int filterLength,

195

unsigned char* const* sourceDataRows,

196

int pixelWidth,

197

unsigned char* outRow) {

198

// We go through each column in the output and do a vertical convolution,

199

// generating one output pixel each time.

200

for (int outX = 0; outX < pixelWidth; outX++) {

201

// Compute the number of bytes over in each row that the current column

202

// we're convolving starts at. The pixel will cover the next 4 bytes.

203

int byteOffset = outX * 4;

204

205

// Apply the filter to one column of pixels.

206

int accum[4] = {0};

207

for (int filterY = 0; filterY < filterLength; filterY++) {

208

SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];

209

accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];

210

accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];

211

accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];

212

if (hasAlpha) {

213

accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];

}

}

// Bring this value back in range. All of the filter scaling factors

218

// are in fixed point with kShiftBits bits of precision.

219

accum[0] >>= SkConvolutionFilter1D::kShiftBits;

220

accum[1] >>= SkConvolutionFilter1D::kShiftBits;

221

accum[2] >>= SkConvolutionFilter1D::kShiftBits;

222

if (hasAlpha) {

223

accum[3] >>= SkConvolutionFilter1D::kShiftBits;

224

}

225

226

// Store the new pixel.

227

outRow[byteOffset + 0] = ClampTo8(accum[0]);

228

outRow[byteOffset + 1] = ClampTo8(accum[1]);

229

outRow[byteOffset + 2] = ClampTo8(accum[2]);

230

if (hasAlpha) {

231

unsigned char alpha = ClampTo8(accum[3]);

232

233

// Make sure the alpha channel doesn't come out smaller than any of the

234

// color channels. We use premultipled alpha channels, so this should

235

// never happen, but rounding errors will cause this from time to time.

236

// These "impossible" colors will cause overflows (and hence random pixel

237

// values) when the resulting bitmap is drawn to the screen.

238

//

239

// We only need to do this when generating the final output row (here).

240

int maxColorChannel = SkTMax(outRow[byteOffset + 0],

skia.committer@gmail.com

1f3c738

2013-07-20 07:00:58 +0000

[diff] [blame]

241

SkTMax(outRow[byteOffset + 1],

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

242

outRow[byteOffset + 2]));

243

if (alpha < maxColorChannel) {

244

outRow[byteOffset + 3] = maxColorChannel;

245

} else {

246

outRow[byteOffset + 3] = alpha;

247

}

248

} else {

249

// No alpha channel, the image is opaque.

250

outRow[byteOffset + 3] = 0xff;

}

}

}

void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,

256

int filterLength,

257

unsigned char* const* sourceDataRows,

258

int pixelWidth,

259

unsigned char* outRow,

260

bool sourceHasAlpha) {

261

if (sourceHasAlpha) {

262

ConvolveVertically<true>(filterValues, filterLength,

263

sourceDataRows, pixelWidth,

264

outRow);

265

} else {

266

ConvolveVertically<false>(filterValues, filterLength,

267

sourceDataRows, pixelWidth,

outRow);

}

}

} // namespace

// SkConvolutionFilter1D ---------------------------------------------------------

275

276

SkConvolutionFilter1D::SkConvolutionFilter1D()

: fMaxFilter(0) {

}

SkConvolutionFilter1D::~SkConvolutionFilter1D() {

281

}

282

283

void SkConvolutionFilter1D::AddFilter(int filterOffset,

284

const float* filterValues,

285

int filterLength) {

286

SkASSERT(filterLength > 0);

287

288

SkTArray<ConvolutionFixed> fixedValues;

289

fixedValues.reset(filterLength);

290

291

for (int i = 0; i < filterLength; ++i) {

292

fixedValues.push_back(FloatToFixed(filterValues[i]));

293

}

294

295

AddFilter(filterOffset, &fixedValues[0], filterLength);

296

}

297

298

void SkConvolutionFilter1D::AddFilter(int filterOffset,

299

const ConvolutionFixed* filterValues,

300

int filterLength) {

301

// It is common for leading/trailing filter values to be zeros. In such

302

// cases it is beneficial to only store the central factors.

303

// For a scaling to 1/4th in each dimension using a Lanczos-2 filter on

304

// a 1080p image this optimization gives a ~10% speed improvement.

305

int filterSize = filterLength;

306

int firstNonZero = 0;

307

while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {

firstNonZero++;

}

if (firstNonZero < filterLength) {

312

// Here we have at least one non-zero factor.

313

int lastNonZero = filterLength - 1;

314

while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {

lastNonZero--;

}

filterOffset += firstNonZero;

319

filterLength = lastNonZero + 1 - firstNonZero;

320

SkASSERT(filterLength > 0);

321

322

for (int i = firstNonZero; i <= lastNonZero; i++) {

rmistry@google.com

d7a9fcc

2014-03-06 15:37:53 +0000

[diff] [blame]

323

fFilterValues.push_back(filterValues[i]);

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

324

}

325

} else {

326

// Here all the factors were zeroes.

filterLength = 0;

}

FilterInstance instance;

331

332

// We pushed filterLength elements onto fFilterValues

333

instance.fDataLocation = (static_cast<int>(fFilterValues.count()) -

334

filterLength);

335

instance.fOffset = filterOffset;

336

instance.fTrimmedLength = filterLength;

337

instance.fLength = filterSize;

338

fFilters.push_back(instance);

339

340

fMaxFilter = SkTMax(fMaxFilter, filterLength);

341

}

342

343

const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter(

344

int* specifiedFilterlength,

345

int* filterOffset,

346

int* filterLength) const {

347

const FilterInstance& filter = fFilters[0];

348

*filterOffset = filter.fOffset;

349

*filterLength = filter.fTrimmedLength;

350

*specifiedFilterlength = filter.fLength;

351

if (filter.fTrimmedLength == 0) {

return NULL;

}

return &fFilterValues[filter.fDataLocation];

356

}

357

358

void BGRAConvolve2D(const unsigned char* sourceData,

359

int sourceByteRowStride,

360

bool sourceHasAlpha,

361

const SkConvolutionFilter1D& filterX,

362

const SkConvolutionFilter1D& filterY,

363

int outputByteRowStride,

364

unsigned char* output,

reed@google.com

2013-09-05 20:31:17 +0000

[diff] [blame]

365

const SkConvolutionProcs& convolveProcs,

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

366

bool useSimdIfPossible) {

367

368

int maxYFilterSize = filterY.maxFilter();

369

370

// The next row in the input that we will generate a horizontally

371

// convolved row for. If the filter doesn't start at the beginning of the

372

// image (this is the case when we are only resizing a subset), then we

373

// don't want to generate any output rows before that. Compute the starting

374

// row for convolution as the first pixel for the first vertical filter.

375

int filterOffset, filterLength;

376

const SkConvolutionFilter1D::ConvolutionFixed* filterValues =

377

filterY.FilterForValue(0, &filterOffset, &filterLength);

378

int nextXRow = filterOffset;

379

380

// We loop over each row in the input doing a horizontal convolution. This

381

// will result in a horizontally convolved image. We write the results into

382

// a circular buffer of convolved rows and do vertical convolution as rows

383

// are available. This prevents us from having to store the entire

384

// intermediate image and helps cache coherency.

385

// We will need four extra rows to allow horizontal convolution could be done

386

// simultaneously. We also pad each row in row buffer to be aligned-up to

387

// 16 bytes.

388

// TODO(jiesun): We do not use aligned load from row buffer in vertical

389

// convolution pass yet. Somehow Windows does not like it.

390

int rowBufferWidth = (filterX.numValues() + 15) & ~0xF;

391

int rowBufferHeight = maxYFilterSize +

reed@google.com

2013-09-05 20:31:17 +0000

[diff] [blame]

392

(convolveProcs.fConvolve4RowsHorizontally ? 4 : 0);

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

393

CircularRowBuffer rowBuffer(rowBufferWidth,

rowBufferHeight,

filterOffset);

// Loop over every possible output row, processing just enough horizontal

398

// convolutions to run each subsequent vertical convolution.

399

SkASSERT(outputByteRowStride >= filterX.numValues() * 4);

400

int numOutputRows = filterY.numValues();

401

402

// We need to check which is the last line to convolve before we advance 4

403

// lines in one iteration.

404

int lastFilterOffset, lastFilterLength;

405

406

// SSE2 can access up to 3 extra pixels past the end of the

407

// buffer. At the bottom of the image, we have to be careful

408

// not to access data past the end of the buffer. Normally

409

// we fall back to the C++ implementation for the last row.

410

// If the last row is less than 3 pixels wide, we may have to fall

411

// back to the C++ version for more rows. Compute how many

412

// rows we need to avoid the SSE implementation for here.

413

filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset,

414

&lastFilterLength);

reed@google.com

2013-09-05 20:31:17 +0000

[diff] [blame]

415

int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads /

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

416

(lastFilterOffset + lastFilterLength);

417

418

filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,

419

&lastFilterLength);

420

421

for (int outY = 0; outY < numOutputRows; outY++) {

422

filterValues = filterY.FilterForValue(outY,

423

&filterOffset, &filterLength);

424

425

// Generate output rows until we have enough to run the current filter.

426

while (nextXRow < filterOffset + filterLength) {

reed@google.com

2013-09-05 20:31:17 +0000

[diff] [blame]

427

if (convolveProcs.fConvolve4RowsHorizontally &&

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

428

nextXRow + 3 < lastFilterOffset + lastFilterLength -

429

avoidSimdRows) {

430

const unsigned char* src[4];

431

unsigned char* outRow[4];

432

for (int i = 0; i < 4; ++i) {

sugoi

2014-06-11 06:31:29 -0700

[diff] [blame]

433

src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride];

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

434

outRow[i] = rowBuffer.advanceRow();

435

}

reed@google.com

2013-09-05 20:31:17 +0000

[diff] [blame]

436

convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow);

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

437

nextXRow += 4;

438

} else {

439

// Check if we need to avoid SSE2 for this row.

reed@google.com

2013-09-05 20:31:17 +0000

[diff] [blame]

440

if (convolveProcs.fConvolveHorizontally &&

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

441

nextXRow < lastFilterOffset + lastFilterLength -

442

avoidSimdRows) {

reed@google.com

2013-09-05 20:31:17 +0000

[diff] [blame]

443

convolveProcs.fConvolveHorizontally(

sugoi

2014-06-11 06:31:29 -0700

[diff] [blame]

444

&sourceData[(uint64_t)nextXRow * sourceByteRowStride],

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

445

filterX, rowBuffer.advanceRow(), sourceHasAlpha);

446

} else {

447

if (sourceHasAlpha) {

mtklein

2014-06-25 11:38:00 -0700

[diff] [blame]

448

ConvolveHorizontallyAlpha(

sugoi

2014-06-11 06:31:29 -0700

[diff] [blame]

449

&sourceData[(uint64_t)nextXRow * sourceByteRowStride],

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

450

filterX, rowBuffer.advanceRow());

451

} else {

mtklein

2014-06-25 11:38:00 -0700

[diff] [blame]

452

ConvolveHorizontallyNoAlpha(

sugoi

2014-06-11 06:31:29 -0700

[diff] [blame]

453

&sourceData[(uint64_t)nextXRow * sourceByteRowStride],

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

454

filterX, rowBuffer.advanceRow());

}

}

nextXRow++;

}

}

// Compute where in the output image this row of final data will go.

sugoi

c197c8a

2014-07-03 10:44:26 -0700

[diff] [blame]

462

unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride];

humper@google.com

2013-07-19 20:20:04 +0000

[diff] [blame]

463

464

// Get the list of rows that the circular buffer has, in order.

465

int firstRowInCircularBuffer;

466

unsigned char* const* rowsToConvolve =

467

rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);

468

469

// Now compute the start of the subset of those rows that the filter

470

// needs.

471

unsigned char* const* firstRowForFilter =

472

&rowsToConvolve[filterOffset - firstRowInCircularBuffer];

473

reed@google.com

2013-09-05 20:31:17 +0000

[diff] [blame]

474

if (convolveProcs.fConvolveVertically) {

475

convolveProcs.fConvolveVertically(filterValues, filterLength,

humper@google.com