blob: cf13fd57be891da154f46730f8c8b61c9bfff522 [file] [log] [blame]
herb6eff52a2016-03-23 09:00:33 -07001/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkLinearBitmapPipeline_sampler_DEFINED
9#define SkLinearBitmapPipeline_sampler_DEFINED
10
herbcf05dcd2016-05-11 11:53:36 -070011#include <tuple>
12
herb670f01f2016-05-13 10:04:46 -070013#include "SkColor.h"
14#include "SkColorPriv.h"
benjaminwagner6c71e0a2016-04-07 08:49:31 -070015#include "SkFixed.h"
herbd5f2e2e2016-04-14 11:16:44 -070016#include "SkHalf.h"
herb6eff52a2016-03-23 09:00:33 -070017#include "SkLinearBitmapPipeline_core.h"
herb670f01f2016-05-13 10:04:46 -070018#include "SkNx.h"
herbcf05dcd2016-05-11 11:53:36 -070019#include "SkPM4fPriv.h"
herb6eff52a2016-03-23 09:00:33 -070020
21namespace {
22// Explaination of the math:
23// 1 - x x
24// +--------+--------+
25// | | |
26// 1 - y | px00 | px10 |
27// | | |
28// +--------+--------+
29// | | |
30// y | px01 | px11 |
31// | | |
32// +--------+--------+
33//
34//
35// Given a pixelxy each is multiplied by a different factor derived from the fractional part of x
36// and y:
37// * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
38// * px10 -> x(1 - y) = x - xy
39// * px01 -> (1 - x)y = y - xy
40// * px11 -> xy
41// So x * y is calculated first and then used to calculate all the other factors.
42static Sk4s VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
43 Sk4f px01, Sk4f px11) {
44 // Calculate fractional xs and ys.
45 Sk4s fxs = xs - xs.floor();
46 Sk4s fys = ys - ys.floor();
47 Sk4s fxys{fxs * fys};
48 Sk4f sum = px11 * fxys;
49 sum = sum + px01 * (fys - fxys);
50 sum = sum + px10 * (fxs - fxys);
51 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
52 return sum;
53}
54
herb15332a82016-05-12 11:37:00 -070055////////////////////////////////////////////////////////////////////////////////////////////////////
56// PixelGetter is the lowest level interface to the source data. There is a PixelGetter for each
57// of the different SkColorTypes.
58template <SkColorType colorType, SkColorProfileType colorProfile> class PixelGetter;
59
herb670f01f2016-05-13 10:04:46 -070060// Alpha handling:
61// The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate
62// the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can
63// modulate this color later.
64template <>
65class PixelGetter<kAlpha_8_SkColorType, kLinear_SkColorProfileType> {
66public:
67 using Element = uint8_t;
68 PixelGetter(const SkPixmap& srcPixmap, SkColor tintColor)
69 : fTintColor{set_alpha(Sk4f_from_SkColor(tintColor), 1.0f)} { }
70
71 Sk4f getPixelAt(const uint8_t* src) {
72 return fTintColor * (*src * (1.0f/255.0f));
73 }
74
75private:
76 const Sk4f fTintColor;
77};
78
79template <SkColorProfileType colorProfile>
80class PixelGetter<kRGB_565_SkColorType, colorProfile> {
81public:
82 using Element = uint16_t;
83 PixelGetter(const SkPixmap& srcPixmap) { }
84
85 Sk4f getPixelAt(const uint16_t* src) {
86 SkPMColor pixel = SkPixel16ToPixel32(*src);
87 return colorProfile == kSRGB_SkColorProfileType
88 ? Sk4f_fromS32(pixel)
89 : Sk4f_fromL32(pixel);
90 }
91};
92
93template <SkColorProfileType colorProfile>
94class PixelGetter<kARGB_4444_SkColorType, colorProfile> {
95public:
96 using Element = uint16_t;
97 PixelGetter(const SkPixmap& srcPixmap) { }
98
99 Sk4f getPixelAt(const uint16_t* src) {
100 SkPMColor pixel = SkPixel4444ToPixel32(*src);
101 return colorProfile == kSRGB_SkColorProfileType
102 ? Sk4f_fromS32(pixel)
103 : Sk4f_fromL32(pixel);
104 }
105};
106
herb15332a82016-05-12 11:37:00 -0700107template <SkColorProfileType colorProfile>
108class PixelGetter<kRGBA_8888_SkColorType, colorProfile> {
109public:
110 using Element = uint32_t;
111 PixelGetter(const SkPixmap& srcPixmap) { }
112
113 Sk4f getPixelAt(const uint32_t* src) {
114 return colorProfile == kSRGB_SkColorProfileType
115 ? Sk4f_fromS32(*src)
116 : Sk4f_fromL32(*src);
117 }
118};
119
120template <SkColorProfileType colorProfile>
121class PixelGetter<kBGRA_8888_SkColorType, colorProfile> {
122public:
123 using Element = uint32_t;
124 PixelGetter(const SkPixmap& srcPixmap) { }
125
126 Sk4f getPixelAt(const uint32_t* src) {
127 Sk4f pixel = colorProfile == kSRGB_SkColorProfileType
128 ? Sk4f_fromS32(*src)
129 : Sk4f_fromL32(*src);
herb670f01f2016-05-13 10:04:46 -0700130 return swizzle_rb(pixel);
herb15332a82016-05-12 11:37:00 -0700131 }
132};
133
134template <SkColorProfileType colorProfile>
135class PixelGetter<kIndex_8_SkColorType, colorProfile> {
136public:
137 using Element = uint8_t;
138 PixelGetter(const SkPixmap& srcPixmap) {
139 SkColorTable* skColorTable = srcPixmap.ctable();
140 SkASSERT(skColorTable != nullptr);
141
142 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
143 for (int i = 0; i < skColorTable->count(); i++) {
144 fColorTable[i] = this->convertPixel((*skColorTable)[i]);
145 }
146 }
147
148 PixelGetter(const PixelGetter& strategy) {
149 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
150 // TODO: figure out the count.
151 for (int i = 0; i < 256; i++) {
152 fColorTable[i] = strategy.fColorTable[i];
153 }
154 }
155
156 Sk4f getPixelAt(const uint8_t* src) {
157 return fColorTable[*src];
158 }
159
160private:
161 static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12;
162 Sk4f convertPixel(SkPMColor pmColor) {
163 Sk4f pixel = to_4f(pmColor);
164 float alpha = get_alpha(pixel);
165 if (alpha != 0.0f) {
166 float invAlpha = 1.0f / alpha;
167 Sk4f normalize = {invAlpha, invAlpha, invAlpha, 1.0f / 255.0f};
168 pixel = pixel * normalize;
169 if (colorProfile == kSRGB_SkColorProfileType) {
170 pixel = linear_to_srgb(pixel);
171 }
172 return pixel;
173 } else {
174 return Sk4f{0.0f};
175 }
176 }
177 SkAutoMalloc fColorTableStorage{kColorTableSize};
178 Sk4f* fColorTable;
179};
180
herb670f01f2016-05-13 10:04:46 -0700181template <SkColorProfileType colorProfile>
182class PixelGetter<kGray_8_SkColorType, colorProfile> {
183public:
184 using Element = uint8_t;
185 PixelGetter(const SkPixmap& srcPixmap) { }
186
187 Sk4f getPixelAt(const uint8_t* src) {
188 float gray = *src * (1.0f/255.0f);
189 Sk4f pixel = Sk4f{gray, gray, gray, 1.0f};
190 return colorProfile == kSRGB_SkColorProfileType
191 ? srgb_to_linear(pixel)
192 : pixel;
193 }
194};
195
herb15332a82016-05-12 11:37:00 -0700196template <>
197class PixelGetter<kRGBA_F16_SkColorType, kLinear_SkColorProfileType> {
198public:
199 using Element = uint64_t;
200 PixelGetter(const SkPixmap& srcPixmap) { }
201
202 Sk4f getPixelAt(const uint64_t* src) {
203 return SkHalfToFloat_01(*src);
204 }
205};
206
207////////////////////////////////////////////////////////////////////////////////////////////////////
208// PixelAccessor handles all the same plumbing for all the PixelGetters.
209template <SkColorType colorType, SkColorProfileType colorProfile>
210class PixelAccessor {
211 using Element = typename PixelGetter<colorType, colorProfile>::Element;
212public:
herb670f01f2016-05-13 10:04:46 -0700213 template <typename... Args>
214 PixelAccessor(const SkPixmap& srcPixmap, Args&&... args)
herb15332a82016-05-12 11:37:00 -0700215 : fSrc{static_cast<const Element*>(srcPixmap.addr())}
216 , fWidth{srcPixmap.rowBytesAsPixels()}
herb670f01f2016-05-13 10:04:46 -0700217 , fGetter{srcPixmap, std::move<Args>(args)...} { }
herb15332a82016-05-12 11:37:00 -0700218
219 void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) {
220 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
221 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
222 Sk4i bufferLoc = YIs * fWidth + XIs;
223 switch (n) {
224 case 3:
225 *px2 = this->getPixelAt(bufferLoc[2]);
226 case 2:
227 *px1 = this->getPixelAt(bufferLoc[1]);
228 case 1:
229 *px0 = this->getPixelAt(bufferLoc[0]);
230 default:
231 break;
232 }
233 }
234
235 void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
236 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
237 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
238 Sk4i bufferLoc = YIs * fWidth + XIs;
239 *px0 = this->getPixelAt(bufferLoc[0]);
240 *px1 = this->getPixelAt(bufferLoc[1]);
241 *px2 = this->getPixelAt(bufferLoc[2]);
242 *px3 = this->getPixelAt(bufferLoc[3]);
243 }
244
245 void get4Pixels(const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
246 *px0 = this->getPixelFromRow(src, index + 0);
247 *px1 = this->getPixelFromRow(src, index + 1);
248 *px2 = this->getPixelFromRow(src, index + 2);
249 *px3 = this->getPixelFromRow(src, index + 3);
250 }
251
252 Sk4f getPixelFromRow(const void* row, int index) {
253 const Element* src = static_cast<const Element*>(row);
254 return fGetter.getPixelAt(src + index);
255 }
256
257 Sk4f getPixelAt(int index) {
258 return this->getPixelFromRow(fSrc, index);
259 }
260
261 const void* row(int y) const { return fSrc + y * fWidth[0]; }
262
263private:
264 const Element* const fSrc;
265 const Sk4i fWidth;
266 PixelGetter<colorType, colorProfile> fGetter;
267};
268
269////////////////////////////////////////////////////////////////////////////////////////////////////
270// GeneralSampler handles all the different sampling scenarios. It makes runtime decisions to
271// choose the fastest stratagy given a particular job. It ultimately uses PixelGetters to access
272// the pixels.
273template<SkColorType colorType, SkColorProfileType colorProfile, typename Next>
herb6eff52a2016-03-23 09:00:33 -0700274class GeneralSampler {
275public:
276 template<typename... Args>
herb9e0efe52016-04-08 13:25:28 -0700277 GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
herb6eff52a2016-03-23 09:00:33 -0700278 : fNext{next}, fStrategy{std::forward<Args>(args)...} { }
279
herb9e0efe52016-04-08 13:25:28 -0700280 GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
281 const GeneralSampler& sampler)
282 : fNext{next}, fStrategy{sampler.fStrategy} { }
283
herb6eff52a2016-03-23 09:00:33 -0700284 void VECTORCALL nearestListFew(int n, Sk4s xs, Sk4s ys) {
285 SkASSERT(0 < n && n < 4);
286 Sk4f px0, px1, px2;
287 fStrategy.getFewPixels(n, xs, ys, &px0, &px1, &px2);
herb9e0efe52016-04-08 13:25:28 -0700288 if (n >= 1) fNext->blendPixel(px0);
289 if (n >= 2) fNext->blendPixel(px1);
290 if (n >= 3) fNext->blendPixel(px2);
herb6eff52a2016-03-23 09:00:33 -0700291 }
292
293 void VECTORCALL nearestList4(Sk4s xs, Sk4s ys) {
294 Sk4f px0, px1, px2, px3;
295 fStrategy.get4Pixels(xs, ys, &px0, &px1, &px2, &px3);
herb9e0efe52016-04-08 13:25:28 -0700296 fNext->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700297 }
298
299 void nearestSpan(Span span) {
300 SkASSERT(!span.isEmpty());
301 SkPoint start;
302 SkScalar length;
303 int count;
304 std::tie(start, length, count) = span;
305 SkScalar absLength = SkScalarAbs(length);
306 if (absLength < (count - 1)) {
307 this->nearestSpanSlowRate(span);
308 } else if (absLength == (count - 1)) {
309 this->nearestSpanUnitRate(span);
310 } else {
311 this->nearestSpanFastRate(span);
312 }
313 }
314
315 Sk4f bilerNonEdgePixel(SkScalar x, SkScalar y) {
316 Sk4f px00, px10, px01, px11;
317 Sk4f xs = Sk4f{x};
318 Sk4f ys = Sk4f{y};
319 Sk4f sampleXs = xs + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f};
320 Sk4f sampleYs = ys + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f};
321 fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
322 return bilerp4(xs, ys, px00, px10, px01, px11);
323 }
324
325 void VECTORCALL bilerpListFew(int n, Sk4s xs, Sk4s ys) {
326 SkASSERT(0 < n && n < 4);
327 auto bilerpPixel = [&](int index) {
328 return this->bilerNonEdgePixel(xs[index], ys[index]);
329 };
330
herb9e0efe52016-04-08 13:25:28 -0700331 if (n >= 1) fNext->blendPixel(bilerpPixel(0));
332 if (n >= 2) fNext->blendPixel(bilerpPixel(1));
333 if (n >= 3) fNext->blendPixel(bilerpPixel(2));
herb6eff52a2016-03-23 09:00:33 -0700334 }
335
336 void VECTORCALL bilerpList4(Sk4s xs, Sk4s ys) {
337 auto bilerpPixel = [&](int index) {
338 return this->bilerNonEdgePixel(xs[index], ys[index]);
339 };
herb9e0efe52016-04-08 13:25:28 -0700340 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
herb6eff52a2016-03-23 09:00:33 -0700341 }
342
343 void VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) {
344 Sk4f px00, px10, px01, px11;
345 Sk4f xs = Sk4f{sampleXs[0]};
346 Sk4f ys = Sk4f{sampleYs[0]};
347 fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
348 Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11);
herb9e0efe52016-04-08 13:25:28 -0700349 fNext->blendPixel(pixel);
herb6eff52a2016-03-23 09:00:33 -0700350 }
351
352 void bilerpSpan(Span span) {
353 this->bilerpSpanWithY(span, span.startY());
354 }
355
356 void bilerpSpanWithY(Span span, SkScalar y) {
357 SkASSERT(!span.isEmpty());
358 SkPoint start;
359 SkScalar length;
360 int count;
361 std::tie(start, length, count) = span;
362 SkScalar absLength = SkScalarAbs(length);
363 if (absLength == 0.0f) {
364 this->bilerpSpanZeroRate(span, y);
365 } else if (absLength < (count - 1)) {
366 this->bilerpSpanSlowRate(span, y);
367 } else if (absLength == (count - 1)) {
368 if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) {
369 if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) {
370 this->nearestSpanUnitRate(span);
371 } else {
372 this->bilerpSpanUnitRateAlignedX(span, y);
373 }
374 } else {
375 this->bilerpSpanUnitRate(span, y);
376 }
377 } else {
378 this->bilerpSpanFastRate(span, y);
379 }
380 }
381
382private:
383 // When moving through source space more slowly than dst space (zoomed in),
384 // we'll be sampling from the same source pixel more than once.
385 void nearestSpanSlowRate(Span span) {
386 SkPoint start;
387 SkScalar length;
388 int count;
389 std::tie(start, length, count) = span;
390 SkScalar x = X(start);
391 SkFixed fx = SkScalarToFixed(x);
392 SkScalar dx = length / (count - 1);
393 SkFixed fdx = SkScalarToFixed(dx);
394
395 const void* row = fStrategy.row((int)std::floor(Y(start)));
396 Next* next = fNext;
397
398 int ix = SkFixedFloorToInt(fx);
399 int prevIX = ix;
herbdd964892016-05-11 10:39:55 -0700400 Sk4f fpixel = fStrategy.getPixelFromRow(row, ix);
herb6eff52a2016-03-23 09:00:33 -0700401
402 // When dx is less than one, each pixel is used more than once. Using the fixed point fx
403 // allows the code to quickly check that the same pixel is being used. The code uses this
404 // same pixel check to do the sRGB and normalization only once.
405 auto getNextPixel = [&]() {
406 if (ix != prevIX) {
herbdd964892016-05-11 10:39:55 -0700407 fpixel = fStrategy.getPixelFromRow(row, ix);
herb6eff52a2016-03-23 09:00:33 -0700408 prevIX = ix;
409 }
410 fx += fdx;
411 ix = SkFixedFloorToInt(fx);
412 return fpixel;
413 };
414
415 while (count >= 4) {
416 Sk4f px0 = getNextPixel();
417 Sk4f px1 = getNextPixel();
418 Sk4f px2 = getNextPixel();
419 Sk4f px3 = getNextPixel();
herb9e0efe52016-04-08 13:25:28 -0700420 next->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700421 count -= 4;
422 }
423 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700424 next->blendPixel(getNextPixel());
herb6eff52a2016-03-23 09:00:33 -0700425 count -= 1;
426 }
427 }
428
429 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
430 // We'll never re-use pixels, but we can at least load contiguous pixels.
431 void nearestSpanUnitRate(Span span) {
432 SkPoint start;
433 SkScalar length;
434 int count;
435 std::tie(start, length, count) = span;
436 int ix = SkScalarFloorToInt(X(start));
437 const void* row = fStrategy.row((int)std::floor(Y(start)));
438 Next* next = fNext;
439 if (length > 0) {
440 while (count >= 4) {
441 Sk4f px0, px1, px2, px3;
442 fStrategy.get4Pixels(row, ix, &px0, &px1, &px2, &px3);
herb9e0efe52016-04-08 13:25:28 -0700443 next->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700444 ix += 4;
445 count -= 4;
446 }
447
448 while (count > 0) {
herbdd964892016-05-11 10:39:55 -0700449 next->blendPixel(fStrategy.getPixelFromRow(row, ix));
herb6eff52a2016-03-23 09:00:33 -0700450 ix += 1;
451 count -= 1;
452 }
453 } else {
454 while (count >= 4) {
455 Sk4f px0, px1, px2, px3;
456 fStrategy.get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
herb9e0efe52016-04-08 13:25:28 -0700457 next->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700458 ix -= 4;
459 count -= 4;
460 }
461
462 while (count > 0) {
herbdd964892016-05-11 10:39:55 -0700463 next->blendPixel(fStrategy.getPixelFromRow(row, ix));
herb6eff52a2016-03-23 09:00:33 -0700464 ix -= 1;
465 count -= 1;
466 }
467 }
468 }
469
470 // We're moving through source space faster than dst (zoomed out),
471 // so we'll never reuse a source pixel or be able to do contiguous loads.
472 void nearestSpanFastRate(Span span) {
473 struct NearestWrapper {
474 void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) {
475 fSampler.nearestListFew(n, xs, ys);
476 }
477
478 void VECTORCALL pointList4(Sk4s xs, Sk4s ys) {
479 fSampler.nearestList4(xs, ys);
480 }
481
482 GeneralSampler& fSampler;
483 };
484 NearestWrapper wrapper{*this};
485 span_fallback(span, &wrapper);
486 }
487
488 void bilerpSpanZeroRate(Span span, SkScalar y1) {
489 SkScalar y0 = span.startY() - 0.5f;
490 y1 += 0.5f;
491 int iy0 = SkScalarFloorToInt(y0);
492 SkScalar filterY1 = y0 - iy0;
493 SkScalar filterY0 = 1.0f - filterY1;
494 int iy1 = SkScalarFloorToInt(y1);
495 int ix = SkScalarFloorToInt(span.startX());
herbdd964892016-05-11 10:39:55 -0700496 Sk4f pixelY0 = fStrategy.getPixelFromRow(fStrategy.row(iy0), ix);
497 Sk4f pixelY1 = fStrategy.getPixelFromRow(fStrategy.row(iy1), ix);
herb6eff52a2016-03-23 09:00:33 -0700498 Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1;
499 int count = span.count();
500 while (count >= 4) {
herb9e0efe52016-04-08 13:25:28 -0700501 fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPixel);
herb6eff52a2016-03-23 09:00:33 -0700502 count -= 4;
503 }
504 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700505 fNext->blendPixel(filterPixel);
herb6eff52a2016-03-23 09:00:33 -0700506 count -= 1;
507 }
508 }
509
510 // When moving through source space more slowly than dst space (zoomed in),
511 // we'll be sampling from the same source pixel more than once.
512 void bilerpSpanSlowRate(Span span, SkScalar ry1) {
513 SkPoint start;
514 SkScalar length;
515 int count;
516 std::tie(start, length, count) = span;
517 SkFixed fx = SkScalarToFixed(X(start)
518 -0.5f);
519
520 SkFixed fdx = SkScalarToFixed(length / (count - 1));
521 //start = start + SkPoint{-0.5f, -0.5f};
522
523 Sk4f xAdjust;
524 if (fdx >= 0) {
525 xAdjust = Sk4f{-1.0f};
526 } else {
527 xAdjust = Sk4f{1.0f};
528 }
529 int ix = SkFixedFloorToInt(fx);
530 int ioldx = ix;
531 Sk4f x{SkFixedToScalar(fx) - ix};
532 Sk4f dx{SkFixedToScalar(fdx)};
533 SkScalar ry0 = Y(start) - 0.5f;
534 ry1 += 0.5f;
535 SkScalar yFloor = std::floor(ry0);
536 Sk4f y1 = Sk4f{ry0 - yFloor};
537 Sk4f y0 = Sk4f{1.0f} - y1;
herb222f8ff2016-03-23 15:14:23 -0700538 const void* const row0 = fStrategy.row(SkScalarFloorToInt(ry0));
539 const void* const row1 = fStrategy.row(SkScalarFloorToInt(ry1));
herbdd964892016-05-11 10:39:55 -0700540 Sk4f fpixel00 = y0 * fStrategy.getPixelFromRow(row0, ix);
541 Sk4f fpixel01 = y1 * fStrategy.getPixelFromRow(row1, ix);
542 Sk4f fpixel10 = y0 * fStrategy.getPixelFromRow(row0, ix + 1);
543 Sk4f fpixel11 = y1 * fStrategy.getPixelFromRow(row1, ix + 1);
herb6eff52a2016-03-23 09:00:33 -0700544 auto getNextPixel = [&]() {
545 if (ix != ioldx) {
546 fpixel00 = fpixel10;
547 fpixel01 = fpixel11;
herbdd964892016-05-11 10:39:55 -0700548 fpixel10 = y0 * fStrategy.getPixelFromRow(row0, ix + 1);
549 fpixel11 = y1 * fStrategy.getPixelFromRow(row1, ix + 1);
herb6eff52a2016-03-23 09:00:33 -0700550 ioldx = ix;
551 x = x + xAdjust;
552 }
553
554 Sk4f x0, x1;
555 x0 = Sk4f{1.0f} - x;
556 x1 = x;
557 Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11);
558 fx += fdx;
559 ix = SkFixedFloorToInt(fx);
560 x = x + dx;
561 return fpixel;
562 };
563
564 while (count >= 4) {
565 Sk4f fpixel0 = getNextPixel();
566 Sk4f fpixel1 = getNextPixel();
567 Sk4f fpixel2 = getNextPixel();
568 Sk4f fpixel3 = getNextPixel();
569
herb9e0efe52016-04-08 13:25:28 -0700570 fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3);
herb6eff52a2016-03-23 09:00:33 -0700571 count -= 4;
572 }
573
574 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700575 fNext->blendPixel(getNextPixel());
herb6eff52a2016-03-23 09:00:33 -0700576
577 count -= 1;
578 }
579 }
580
581 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
582 // We'll never re-use pixels, but we can at least load contiguous pixels.
583 void bilerpSpanUnitRate(Span span, SkScalar y1) {
584 y1 += 0.5f;
585 SkScalar y0 = span.startY() - 0.5f;
586 int iy0 = SkScalarFloorToInt(y0);
587 SkScalar filterY1 = y0 - iy0;
588 SkScalar filterY0 = 1.0f - filterY1;
589 int iy1 = SkScalarFloorToInt(y1);
590 const void* rowY0 = fStrategy.row(iy0);
591 const void* rowY1 = fStrategy.row(iy1);
592 SkScalar x0 = span.startX() - 0.5f;
593 int ix0 = SkScalarFloorToInt(x0);
594 SkScalar filterX1 = x0 - ix0;
595 SkScalar filterX0 = 1.0f - filterX1;
596
597 auto getPixelY0 = [&]() {
herbdd964892016-05-11 10:39:55 -0700598 Sk4f px = fStrategy.getPixelFromRow(rowY0, ix0);
herb6eff52a2016-03-23 09:00:33 -0700599 return px * filterY0;
600 };
601
602 auto getPixelY1 = [&]() {
herbdd964892016-05-11 10:39:55 -0700603 Sk4f px = fStrategy.getPixelFromRow(rowY1, ix0);
herb6eff52a2016-03-23 09:00:33 -0700604 return px * filterY1;
605 };
606
607 auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
608 fStrategy.get4Pixels(rowY0, ix, px0, px1, px2, px3);
609 *px0 = *px0 * filterY0;
610 *px1 = *px1 * filterY0;
611 *px2 = *px2 * filterY0;
612 *px3 = *px3 * filterY0;
613 };
614
615 auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
616 fStrategy.get4Pixels(rowY1, ix, px0, px1, px2, px3);
617 *px0 = *px0 * filterY1;
618 *px1 = *px1 * filterY1;
619 *px2 = *px2 * filterY1;
620 *px3 = *px3 * filterY1;
621 };
622
623 auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) {
624 return pixelX0 * filterX0 + pixelX1 * filterX1;
625 };
626
627 // Mid making 4 unit rate.
628 Sk4f pxB = getPixelY0() + getPixelY1();
629 if (span.length() > 0) {
630 int count = span.count();
631 while (count >= 4) {
632 Sk4f px00, px10, px20, px30;
633 get4PixelsY0(ix0, &px00, &px10, &px20, &px30);
634 Sk4f px01, px11, px21, px31;
635 get4PixelsY1(ix0, &px01, &px11, &px21, &px31);
636 Sk4f pxS0 = px00 + px01;
637 Sk4f px0 = lerp(pxB, pxS0);
638 Sk4f pxS1 = px10 + px11;
639 Sk4f px1 = lerp(pxS0, pxS1);
640 Sk4f pxS2 = px20 + px21;
641 Sk4f px2 = lerp(pxS1, pxS2);
642 Sk4f pxS3 = px30 + px31;
643 Sk4f px3 = lerp(pxS2, pxS3);
644 pxB = pxS3;
herb9e0efe52016-04-08 13:25:28 -0700645 fNext->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700646 ix0 += 4;
647 count -= 4;
648 }
649 while (count > 0) {
herbdd964892016-05-11 10:39:55 -0700650 Sk4f pixelY0 = fStrategy.getPixelFromRow(rowY0, ix0);
651 Sk4f pixelY1 = fStrategy.getPixelFromRow(rowY1, ix0);
herb6eff52a2016-03-23 09:00:33 -0700652
herb9e0efe52016-04-08 13:25:28 -0700653 fNext->blendPixel(lerp(pixelY0, pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700654 ix0 += 1;
655 count -= 1;
656 }
657 } else {
658 int count = span.count();
659 while (count >= 4) {
660 Sk4f px00, px10, px20, px30;
661 get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30);
662 Sk4f px01, px11, px21, px31;
663 get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31);
664 Sk4f pxS3 = px30 + px31;
665 Sk4f px0 = lerp(pxS3, pxB);
666 Sk4f pxS2 = px20 + px21;
667 Sk4f px1 = lerp(pxS2, pxS3);
668 Sk4f pxS1 = px10 + px11;
669 Sk4f px2 = lerp(pxS1, pxS2);
670 Sk4f pxS0 = px00 + px01;
671 Sk4f px3 = lerp(pxS0, pxS1);
672 pxB = pxS0;
herb9e0efe52016-04-08 13:25:28 -0700673 fNext->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700674 ix0 -= 4;
675 count -= 4;
676 }
677 while (count > 0) {
herbdd964892016-05-11 10:39:55 -0700678 Sk4f pixelY0 = fStrategy.getPixelFromRow(rowY0, ix0);
679 Sk4f pixelY1 = fStrategy.getPixelFromRow(rowY1, ix0);
herb6eff52a2016-03-23 09:00:33 -0700680
herb9e0efe52016-04-08 13:25:28 -0700681 fNext->blendPixel(lerp(pixelY0, pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700682 ix0 -= 1;
683 count -= 1;
684 }
685 }
686 }
687
688 void bilerpSpanUnitRateAlignedX(Span span, SkScalar y1) {
689 SkScalar y0 = span.startY() - 0.5f;
690 y1 += 0.5f;
691 int iy0 = SkScalarFloorToInt(y0);
692 SkScalar filterY1 = y0 - iy0;
693 SkScalar filterY0 = 1.0f - filterY1;
694 int iy1 = SkScalarFloorToInt(y1);
695 int ix = SkScalarFloorToInt(span.startX());
696 const void* rowY0 = fStrategy.row(iy0);
697 const void* rowY1 = fStrategy.row(iy1);
698 auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) {
699 return *pixelY0 * filterY0 + *pixelY1 * filterY1;
700 };
701
702 if (span.length() > 0) {
703 int count = span.count();
704 while (count >= 4) {
705 Sk4f px00, px10, px20, px30;
706 fStrategy.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30);
707 Sk4f px01, px11, px21, px31;
708 fStrategy.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31);
herb9e0efe52016-04-08 13:25:28 -0700709 fNext->blend4Pixels(
herb6eff52a2016-03-23 09:00:33 -0700710 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
711 ix += 4;
712 count -= 4;
713 }
714 while (count > 0) {
herbdd964892016-05-11 10:39:55 -0700715 Sk4f pixelY0 = fStrategy.getPixelFromRow(rowY0, ix);
716 Sk4f pixelY1 = fStrategy.getPixelFromRow(rowY1, ix);
herb6eff52a2016-03-23 09:00:33 -0700717
herb9e0efe52016-04-08 13:25:28 -0700718 fNext->blendPixel(lerp(&pixelY0, &pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700719 ix += 1;
720 count -= 1;
721 }
722 } else {
723 int count = span.count();
724 while (count >= 4) {
725 Sk4f px00, px10, px20, px30;
726 fStrategy.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00);
727 Sk4f px01, px11, px21, px31;
728 fStrategy.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01);
herb9e0efe52016-04-08 13:25:28 -0700729 fNext->blend4Pixels(
herb6eff52a2016-03-23 09:00:33 -0700730 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
731 ix -= 4;
732 count -= 4;
733 }
734 while (count > 0) {
herbdd964892016-05-11 10:39:55 -0700735 Sk4f pixelY0 = fStrategy.getPixelFromRow(rowY0, ix);
736 Sk4f pixelY1 = fStrategy.getPixelFromRow(rowY1, ix);
herb6eff52a2016-03-23 09:00:33 -0700737
herb9e0efe52016-04-08 13:25:28 -0700738 fNext->blendPixel(lerp(&pixelY0, &pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700739 ix -= 1;
740 count -= 1;
741 }
742 }
743 }
744
745 // We're moving through source space faster than dst (zoomed out),
746 // so we'll never reuse a source pixel or be able to do contiguous loads.
747 void bilerpSpanFastRate(Span span, SkScalar y1) {
748 SkPoint start;
749 SkScalar length;
750 int count;
751 std::tie(start, length, count) = span;
752 SkScalar x = X(start);
753 SkScalar y = Y(start);
754 if (false && y == y1) {
755 struct BilerpWrapper {
756 void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) {
757 fSampler.bilerpListFew(n, xs, ys);
758 }
759
760 void VECTORCALL pointList4(Sk4s xs, Sk4s ys) {
761 fSampler.bilerpList4(xs, ys);
762 }
763
764 GeneralSampler& fSampler;
765 };
766 BilerpWrapper wrapper{*this};
767 span_fallback(span, &wrapper);
768 } else {
769 SkScalar dx = length / (count - 1);
770 Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f};
771 while (count > 0) {
772 Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x};
773 this->bilerpEdge(xs, ys);
774 x += dx;
775 count -= 1;
776 }
777 }
778 }
779
herb670f01f2016-05-13 10:04:46 -0700780 Next* const fNext;
herb15332a82016-05-12 11:37:00 -0700781 PixelAccessor<colorType, colorProfile> fStrategy;
herb6eff52a2016-03-23 09:00:33 -0700782};
783
herb6eff52a2016-03-23 09:00:33 -0700784} // namespace
785
786#endif // SkLinearBitmapPipeline_sampler_DEFINED