blob: 7157ffc8ee0b6c0bdae715ae0e235a45781041fb [file] [log] [blame]
herb6eff52a2016-03-23 09:00:33 -07001/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkLinearBitmapPipeline_sampler_DEFINED
9#define SkLinearBitmapPipeline_sampler_DEFINED
10
benjaminwagner6c71e0a2016-04-07 08:49:31 -070011#include "SkFixed.h"
herb6eff52a2016-03-23 09:00:33 -070012#include "SkLinearBitmapPipeline_core.h"
herb222f8ff2016-03-23 15:14:23 -070013#include <array>
herb6eff52a2016-03-23 09:00:33 -070014#include <tuple>
15
16namespace {
17// Explaination of the math:
18// 1 - x x
19// +--------+--------+
20// | | |
21// 1 - y | px00 | px10 |
22// | | |
23// +--------+--------+
24// | | |
25// y | px01 | px11 |
26// | | |
27// +--------+--------+
28//
29//
30// Given a pixelxy each is multiplied by a different factor derived from the fractional part of x
31// and y:
32// * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
33// * px10 -> x(1 - y) = x - xy
34// * px01 -> (1 - x)y = y - xy
35// * px11 -> xy
36// So x * y is calculated first and then used to calculate all the other factors.
37static Sk4s VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
38 Sk4f px01, Sk4f px11) {
39 // Calculate fractional xs and ys.
40 Sk4s fxs = xs - xs.floor();
41 Sk4s fys = ys - ys.floor();
42 Sk4s fxys{fxs * fys};
43 Sk4f sum = px11 * fxys;
44 sum = sum + px01 * (fys - fxys);
45 sum = sum + px10 * (fxs - fxys);
46 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
47 return sum;
48}
49
50// The GeneralSampler class
51template<typename SourceStrategy, typename Next>
52class GeneralSampler {
53public:
54 template<typename... Args>
herb9e0efe52016-04-08 13:25:28 -070055 GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
herb6eff52a2016-03-23 09:00:33 -070056 : fNext{next}, fStrategy{std::forward<Args>(args)...} { }
57
herb9e0efe52016-04-08 13:25:28 -070058 GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
59 const GeneralSampler& sampler)
60 : fNext{next}, fStrategy{sampler.fStrategy} { }
61
herb6eff52a2016-03-23 09:00:33 -070062 void VECTORCALL nearestListFew(int n, Sk4s xs, Sk4s ys) {
63 SkASSERT(0 < n && n < 4);
64 Sk4f px0, px1, px2;
65 fStrategy.getFewPixels(n, xs, ys, &px0, &px1, &px2);
herb9e0efe52016-04-08 13:25:28 -070066 if (n >= 1) fNext->blendPixel(px0);
67 if (n >= 2) fNext->blendPixel(px1);
68 if (n >= 3) fNext->blendPixel(px2);
herb6eff52a2016-03-23 09:00:33 -070069 }
70
71 void VECTORCALL nearestList4(Sk4s xs, Sk4s ys) {
72 Sk4f px0, px1, px2, px3;
73 fStrategy.get4Pixels(xs, ys, &px0, &px1, &px2, &px3);
herb9e0efe52016-04-08 13:25:28 -070074 fNext->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -070075 }
76
77 void nearestSpan(Span span) {
78 SkASSERT(!span.isEmpty());
79 SkPoint start;
80 SkScalar length;
81 int count;
82 std::tie(start, length, count) = span;
83 SkScalar absLength = SkScalarAbs(length);
84 if (absLength < (count - 1)) {
85 this->nearestSpanSlowRate(span);
86 } else if (absLength == (count - 1)) {
87 this->nearestSpanUnitRate(span);
88 } else {
89 this->nearestSpanFastRate(span);
90 }
91 }
92
93 Sk4f bilerNonEdgePixel(SkScalar x, SkScalar y) {
94 Sk4f px00, px10, px01, px11;
95 Sk4f xs = Sk4f{x};
96 Sk4f ys = Sk4f{y};
97 Sk4f sampleXs = xs + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f};
98 Sk4f sampleYs = ys + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f};
99 fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
100 return bilerp4(xs, ys, px00, px10, px01, px11);
101 }
102
103 void VECTORCALL bilerpListFew(int n, Sk4s xs, Sk4s ys) {
104 SkASSERT(0 < n && n < 4);
105 auto bilerpPixel = [&](int index) {
106 return this->bilerNonEdgePixel(xs[index], ys[index]);
107 };
108
herb9e0efe52016-04-08 13:25:28 -0700109 if (n >= 1) fNext->blendPixel(bilerpPixel(0));
110 if (n >= 2) fNext->blendPixel(bilerpPixel(1));
111 if (n >= 3) fNext->blendPixel(bilerpPixel(2));
herb6eff52a2016-03-23 09:00:33 -0700112 }
113
114 void VECTORCALL bilerpList4(Sk4s xs, Sk4s ys) {
115 auto bilerpPixel = [&](int index) {
116 return this->bilerNonEdgePixel(xs[index], ys[index]);
117 };
herb9e0efe52016-04-08 13:25:28 -0700118 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
herb6eff52a2016-03-23 09:00:33 -0700119 }
120
121 void VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) {
122 Sk4f px00, px10, px01, px11;
123 Sk4f xs = Sk4f{sampleXs[0]};
124 Sk4f ys = Sk4f{sampleYs[0]};
125 fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
126 Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11);
herb9e0efe52016-04-08 13:25:28 -0700127 fNext->blendPixel(pixel);
herb6eff52a2016-03-23 09:00:33 -0700128 }
129
130 void bilerpSpan(Span span) {
131 this->bilerpSpanWithY(span, span.startY());
132 }
133
134 void bilerpSpanWithY(Span span, SkScalar y) {
135 SkASSERT(!span.isEmpty());
136 SkPoint start;
137 SkScalar length;
138 int count;
139 std::tie(start, length, count) = span;
140 SkScalar absLength = SkScalarAbs(length);
141 if (absLength == 0.0f) {
142 this->bilerpSpanZeroRate(span, y);
143 } else if (absLength < (count - 1)) {
144 this->bilerpSpanSlowRate(span, y);
145 } else if (absLength == (count - 1)) {
146 if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) {
147 if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) {
148 this->nearestSpanUnitRate(span);
149 } else {
150 this->bilerpSpanUnitRateAlignedX(span, y);
151 }
152 } else {
153 this->bilerpSpanUnitRate(span, y);
154 }
155 } else {
156 this->bilerpSpanFastRate(span, y);
157 }
158 }
159
160private:
161 // When moving through source space more slowly than dst space (zoomed in),
162 // we'll be sampling from the same source pixel more than once.
163 void nearestSpanSlowRate(Span span) {
164 SkPoint start;
165 SkScalar length;
166 int count;
167 std::tie(start, length, count) = span;
168 SkScalar x = X(start);
169 SkFixed fx = SkScalarToFixed(x);
170 SkScalar dx = length / (count - 1);
171 SkFixed fdx = SkScalarToFixed(dx);
172
173 const void* row = fStrategy.row((int)std::floor(Y(start)));
174 Next* next = fNext;
175
176 int ix = SkFixedFloorToInt(fx);
177 int prevIX = ix;
herb222f8ff2016-03-23 15:14:23 -0700178 Sk4f fpixel = fStrategy.getPixelAt(row, ix);
herb6eff52a2016-03-23 09:00:33 -0700179
180 // When dx is less than one, each pixel is used more than once. Using the fixed point fx
181 // allows the code to quickly check that the same pixel is being used. The code uses this
182 // same pixel check to do the sRGB and normalization only once.
183 auto getNextPixel = [&]() {
184 if (ix != prevIX) {
herb222f8ff2016-03-23 15:14:23 -0700185 fpixel = fStrategy.getPixelAt(row, ix);
herb6eff52a2016-03-23 09:00:33 -0700186 prevIX = ix;
187 }
188 fx += fdx;
189 ix = SkFixedFloorToInt(fx);
190 return fpixel;
191 };
192
193 while (count >= 4) {
194 Sk4f px0 = getNextPixel();
195 Sk4f px1 = getNextPixel();
196 Sk4f px2 = getNextPixel();
197 Sk4f px3 = getNextPixel();
herb9e0efe52016-04-08 13:25:28 -0700198 next->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700199 count -= 4;
200 }
201 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700202 next->blendPixel(getNextPixel());
herb6eff52a2016-03-23 09:00:33 -0700203 count -= 1;
204 }
205 }
206
207 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
208 // We'll never re-use pixels, but we can at least load contiguous pixels.
209 void nearestSpanUnitRate(Span span) {
210 SkPoint start;
211 SkScalar length;
212 int count;
213 std::tie(start, length, count) = span;
214 int ix = SkScalarFloorToInt(X(start));
215 const void* row = fStrategy.row((int)std::floor(Y(start)));
216 Next* next = fNext;
217 if (length > 0) {
218 while (count >= 4) {
219 Sk4f px0, px1, px2, px3;
220 fStrategy.get4Pixels(row, ix, &px0, &px1, &px2, &px3);
herb9e0efe52016-04-08 13:25:28 -0700221 next->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700222 ix += 4;
223 count -= 4;
224 }
225
226 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700227 next->blendPixel(fStrategy.getPixelAt(row, ix));
herb6eff52a2016-03-23 09:00:33 -0700228 ix += 1;
229 count -= 1;
230 }
231 } else {
232 while (count >= 4) {
233 Sk4f px0, px1, px2, px3;
234 fStrategy.get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
herb9e0efe52016-04-08 13:25:28 -0700235 next->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700236 ix -= 4;
237 count -= 4;
238 }
239
240 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700241 next->blendPixel(fStrategy.getPixelAt(row, ix));
herb6eff52a2016-03-23 09:00:33 -0700242 ix -= 1;
243 count -= 1;
244 }
245 }
246 }
247
248 // We're moving through source space faster than dst (zoomed out),
249 // so we'll never reuse a source pixel or be able to do contiguous loads.
250 void nearestSpanFastRate(Span span) {
251 struct NearestWrapper {
252 void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) {
253 fSampler.nearestListFew(n, xs, ys);
254 }
255
256 void VECTORCALL pointList4(Sk4s xs, Sk4s ys) {
257 fSampler.nearestList4(xs, ys);
258 }
259
260 GeneralSampler& fSampler;
261 };
262 NearestWrapper wrapper{*this};
263 span_fallback(span, &wrapper);
264 }
265
266 void bilerpSpanZeroRate(Span span, SkScalar y1) {
267 SkScalar y0 = span.startY() - 0.5f;
268 y1 += 0.5f;
269 int iy0 = SkScalarFloorToInt(y0);
270 SkScalar filterY1 = y0 - iy0;
271 SkScalar filterY0 = 1.0f - filterY1;
272 int iy1 = SkScalarFloorToInt(y1);
273 int ix = SkScalarFloorToInt(span.startX());
herb222f8ff2016-03-23 15:14:23 -0700274 Sk4f pixelY0 = fStrategy.getPixelAt(fStrategy.row(iy0), ix);
275 Sk4f pixelY1 = fStrategy.getPixelAt(fStrategy.row(iy1), ix);
herb6eff52a2016-03-23 09:00:33 -0700276 Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1;
277 int count = span.count();
278 while (count >= 4) {
herb9e0efe52016-04-08 13:25:28 -0700279 fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPixel);
herb6eff52a2016-03-23 09:00:33 -0700280 count -= 4;
281 }
282 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700283 fNext->blendPixel(filterPixel);
herb6eff52a2016-03-23 09:00:33 -0700284 count -= 1;
285 }
286 }
287
288 // When moving through source space more slowly than dst space (zoomed in),
289 // we'll be sampling from the same source pixel more than once.
290 void bilerpSpanSlowRate(Span span, SkScalar ry1) {
291 SkPoint start;
292 SkScalar length;
293 int count;
294 std::tie(start, length, count) = span;
295 SkFixed fx = SkScalarToFixed(X(start)
296 -0.5f);
297
298 SkFixed fdx = SkScalarToFixed(length / (count - 1));
299 //start = start + SkPoint{-0.5f, -0.5f};
300
301 Sk4f xAdjust;
302 if (fdx >= 0) {
303 xAdjust = Sk4f{-1.0f};
304 } else {
305 xAdjust = Sk4f{1.0f};
306 }
307 int ix = SkFixedFloorToInt(fx);
308 int ioldx = ix;
309 Sk4f x{SkFixedToScalar(fx) - ix};
310 Sk4f dx{SkFixedToScalar(fdx)};
311 SkScalar ry0 = Y(start) - 0.5f;
312 ry1 += 0.5f;
313 SkScalar yFloor = std::floor(ry0);
314 Sk4f y1 = Sk4f{ry0 - yFloor};
315 Sk4f y0 = Sk4f{1.0f} - y1;
herb222f8ff2016-03-23 15:14:23 -0700316 const void* const row0 = fStrategy.row(SkScalarFloorToInt(ry0));
317 const void* const row1 = fStrategy.row(SkScalarFloorToInt(ry1));
318 Sk4f fpixel00 = y0 * fStrategy.getPixelAt(row0, ix);
319 Sk4f fpixel01 = y1 * fStrategy.getPixelAt(row1, ix);
320 Sk4f fpixel10 = y0 * fStrategy.getPixelAt(row0, ix + 1);
321 Sk4f fpixel11 = y1 * fStrategy.getPixelAt(row1, ix + 1);
herb6eff52a2016-03-23 09:00:33 -0700322 auto getNextPixel = [&]() {
323 if (ix != ioldx) {
324 fpixel00 = fpixel10;
325 fpixel01 = fpixel11;
herb222f8ff2016-03-23 15:14:23 -0700326 fpixel10 = y0 * fStrategy.getPixelAt(row0, ix + 1);
327 fpixel11 = y1 * fStrategy.getPixelAt(row1, ix + 1);
herb6eff52a2016-03-23 09:00:33 -0700328 ioldx = ix;
329 x = x + xAdjust;
330 }
331
332 Sk4f x0, x1;
333 x0 = Sk4f{1.0f} - x;
334 x1 = x;
335 Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11);
336 fx += fdx;
337 ix = SkFixedFloorToInt(fx);
338 x = x + dx;
339 return fpixel;
340 };
341
342 while (count >= 4) {
343 Sk4f fpixel0 = getNextPixel();
344 Sk4f fpixel1 = getNextPixel();
345 Sk4f fpixel2 = getNextPixel();
346 Sk4f fpixel3 = getNextPixel();
347
herb9e0efe52016-04-08 13:25:28 -0700348 fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3);
herb6eff52a2016-03-23 09:00:33 -0700349 count -= 4;
350 }
351
352 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700353 fNext->blendPixel(getNextPixel());
herb6eff52a2016-03-23 09:00:33 -0700354
355 count -= 1;
356 }
357 }
358
359 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
360 // We'll never re-use pixels, but we can at least load contiguous pixels.
361 void bilerpSpanUnitRate(Span span, SkScalar y1) {
362 y1 += 0.5f;
363 SkScalar y0 = span.startY() - 0.5f;
364 int iy0 = SkScalarFloorToInt(y0);
365 SkScalar filterY1 = y0 - iy0;
366 SkScalar filterY0 = 1.0f - filterY1;
367 int iy1 = SkScalarFloorToInt(y1);
368 const void* rowY0 = fStrategy.row(iy0);
369 const void* rowY1 = fStrategy.row(iy1);
370 SkScalar x0 = span.startX() - 0.5f;
371 int ix0 = SkScalarFloorToInt(x0);
372 SkScalar filterX1 = x0 - ix0;
373 SkScalar filterX0 = 1.0f - filterX1;
374
375 auto getPixelY0 = [&]() {
herb222f8ff2016-03-23 15:14:23 -0700376 Sk4f px = fStrategy.getPixelAt(rowY0, ix0);
herb6eff52a2016-03-23 09:00:33 -0700377 return px * filterY0;
378 };
379
380 auto getPixelY1 = [&]() {
herb222f8ff2016-03-23 15:14:23 -0700381 Sk4f px = fStrategy.getPixelAt(rowY1, ix0);
herb6eff52a2016-03-23 09:00:33 -0700382 return px * filterY1;
383 };
384
385 auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
386 fStrategy.get4Pixels(rowY0, ix, px0, px1, px2, px3);
387 *px0 = *px0 * filterY0;
388 *px1 = *px1 * filterY0;
389 *px2 = *px2 * filterY0;
390 *px3 = *px3 * filterY0;
391 };
392
393 auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
394 fStrategy.get4Pixels(rowY1, ix, px0, px1, px2, px3);
395 *px0 = *px0 * filterY1;
396 *px1 = *px1 * filterY1;
397 *px2 = *px2 * filterY1;
398 *px3 = *px3 * filterY1;
399 };
400
401 auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) {
402 return pixelX0 * filterX0 + pixelX1 * filterX1;
403 };
404
405 // Mid making 4 unit rate.
406 Sk4f pxB = getPixelY0() + getPixelY1();
407 if (span.length() > 0) {
408 int count = span.count();
409 while (count >= 4) {
410 Sk4f px00, px10, px20, px30;
411 get4PixelsY0(ix0, &px00, &px10, &px20, &px30);
412 Sk4f px01, px11, px21, px31;
413 get4PixelsY1(ix0, &px01, &px11, &px21, &px31);
414 Sk4f pxS0 = px00 + px01;
415 Sk4f px0 = lerp(pxB, pxS0);
416 Sk4f pxS1 = px10 + px11;
417 Sk4f px1 = lerp(pxS0, pxS1);
418 Sk4f pxS2 = px20 + px21;
419 Sk4f px2 = lerp(pxS1, pxS2);
420 Sk4f pxS3 = px30 + px31;
421 Sk4f px3 = lerp(pxS2, pxS3);
422 pxB = pxS3;
herb9e0efe52016-04-08 13:25:28 -0700423 fNext->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700424 ix0 += 4;
425 count -= 4;
426 }
427 while (count > 0) {
herb222f8ff2016-03-23 15:14:23 -0700428 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix0);
429 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix0);
herb6eff52a2016-03-23 09:00:33 -0700430
herb9e0efe52016-04-08 13:25:28 -0700431 fNext->blendPixel(lerp(pixelY0, pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700432 ix0 += 1;
433 count -= 1;
434 }
435 } else {
436 int count = span.count();
437 while (count >= 4) {
438 Sk4f px00, px10, px20, px30;
439 get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30);
440 Sk4f px01, px11, px21, px31;
441 get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31);
442 Sk4f pxS3 = px30 + px31;
443 Sk4f px0 = lerp(pxS3, pxB);
444 Sk4f pxS2 = px20 + px21;
445 Sk4f px1 = lerp(pxS2, pxS3);
446 Sk4f pxS1 = px10 + px11;
447 Sk4f px2 = lerp(pxS1, pxS2);
448 Sk4f pxS0 = px00 + px01;
449 Sk4f px3 = lerp(pxS0, pxS1);
450 pxB = pxS0;
herb9e0efe52016-04-08 13:25:28 -0700451 fNext->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700452 ix0 -= 4;
453 count -= 4;
454 }
455 while (count > 0) {
herb222f8ff2016-03-23 15:14:23 -0700456 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix0);
457 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix0);
herb6eff52a2016-03-23 09:00:33 -0700458
herb9e0efe52016-04-08 13:25:28 -0700459 fNext->blendPixel(lerp(pixelY0, pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700460 ix0 -= 1;
461 count -= 1;
462 }
463 }
464 }
465
466 void bilerpSpanUnitRateAlignedX(Span span, SkScalar y1) {
467 SkScalar y0 = span.startY() - 0.5f;
468 y1 += 0.5f;
469 int iy0 = SkScalarFloorToInt(y0);
470 SkScalar filterY1 = y0 - iy0;
471 SkScalar filterY0 = 1.0f - filterY1;
472 int iy1 = SkScalarFloorToInt(y1);
473 int ix = SkScalarFloorToInt(span.startX());
474 const void* rowY0 = fStrategy.row(iy0);
475 const void* rowY1 = fStrategy.row(iy1);
476 auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) {
477 return *pixelY0 * filterY0 + *pixelY1 * filterY1;
478 };
479
480 if (span.length() > 0) {
481 int count = span.count();
482 while (count >= 4) {
483 Sk4f px00, px10, px20, px30;
484 fStrategy.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30);
485 Sk4f px01, px11, px21, px31;
486 fStrategy.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31);
herb9e0efe52016-04-08 13:25:28 -0700487 fNext->blend4Pixels(
herb6eff52a2016-03-23 09:00:33 -0700488 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
489 ix += 4;
490 count -= 4;
491 }
492 while (count > 0) {
herb222f8ff2016-03-23 15:14:23 -0700493 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix);
494 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix);
herb6eff52a2016-03-23 09:00:33 -0700495
herb9e0efe52016-04-08 13:25:28 -0700496 fNext->blendPixel(lerp(&pixelY0, &pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700497 ix += 1;
498 count -= 1;
499 }
500 } else {
501 int count = span.count();
502 while (count >= 4) {
503 Sk4f px00, px10, px20, px30;
504 fStrategy.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00);
505 Sk4f px01, px11, px21, px31;
506 fStrategy.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01);
herb9e0efe52016-04-08 13:25:28 -0700507 fNext->blend4Pixels(
herb6eff52a2016-03-23 09:00:33 -0700508 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
509 ix -= 4;
510 count -= 4;
511 }
512 while (count > 0) {
herb222f8ff2016-03-23 15:14:23 -0700513 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix);
514 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix);
herb6eff52a2016-03-23 09:00:33 -0700515
herb9e0efe52016-04-08 13:25:28 -0700516 fNext->blendPixel(lerp(&pixelY0, &pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700517 ix -= 1;
518 count -= 1;
519 }
520 }
521 }
522
523 // We're moving through source space faster than dst (zoomed out),
524 // so we'll never reuse a source pixel or be able to do contiguous loads.
525 void bilerpSpanFastRate(Span span, SkScalar y1) {
526 SkPoint start;
527 SkScalar length;
528 int count;
529 std::tie(start, length, count) = span;
530 SkScalar x = X(start);
531 SkScalar y = Y(start);
532 if (false && y == y1) {
533 struct BilerpWrapper {
534 void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) {
535 fSampler.bilerpListFew(n, xs, ys);
536 }
537
538 void VECTORCALL pointList4(Sk4s xs, Sk4s ys) {
539 fSampler.bilerpList4(xs, ys);
540 }
541
542 GeneralSampler& fSampler;
543 };
544 BilerpWrapper wrapper{*this};
545 span_fallback(span, &wrapper);
546 } else {
547 SkScalar dx = length / (count - 1);
548 Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f};
549 while (count > 0) {
550 Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x};
551 this->bilerpEdge(xs, ys);
552 x += dx;
553 count -= 1;
554 }
555 }
556 }
557
558 Next* const fNext;
559 SourceStrategy fStrategy;
560};
561
562class sRGBFast {
563public:
564 static Sk4s VECTORCALL sRGBToLinear(Sk4s pixel) {
565 Sk4s l = pixel * pixel;
566 return Sk4s{l[0], l[1], l[2], pixel[3]};
567 }
568};
569
570enum class ColorOrder {
571 kRGBA = false,
572 kBGRA = true,
573};
574template <SkColorProfileType colorProfile, ColorOrder colorOrder>
575class Pixel8888 {
576public:
577 Pixel8888(int width, const uint32_t* src) : fSrc{src}, fWidth{width}{ }
578 Pixel8888(const SkPixmap& srcPixmap)
579 : fSrc{srcPixmap.addr32()}
580 , fWidth{static_cast<int>(srcPixmap.rowBytes() / 4)} { }
581
582 void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) {
583 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
584 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
585 Sk4i bufferLoc = YIs * fWidth + XIs;
586 switch (n) {
587 case 3:
herb222f8ff2016-03-23 15:14:23 -0700588 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
herb6eff52a2016-03-23 09:00:33 -0700589 case 2:
herb222f8ff2016-03-23 15:14:23 -0700590 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
herb6eff52a2016-03-23 09:00:33 -0700591 case 1:
herb222f8ff2016-03-23 15:14:23 -0700592 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
herb6eff52a2016-03-23 09:00:33 -0700593 default:
594 break;
595 }
596 }
597
598 void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
599 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
600 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
601 Sk4i bufferLoc = YIs * fWidth + XIs;
herb222f8ff2016-03-23 15:14:23 -0700602 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
603 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
604 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
605 *px3 = this->getPixelAt(fSrc, bufferLoc[3]);
herb6eff52a2016-03-23 09:00:33 -0700606 }
607
608 void get4Pixels(const void* vsrc, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
609 const uint32_t* src = static_cast<const uint32_t*>(vsrc);
herb222f8ff2016-03-23 15:14:23 -0700610 *px0 = this->getPixelAt(src, index + 0);
611 *px1 = this->getPixelAt(src, index + 1);
612 *px2 = this->getPixelAt(src, index + 2);
613 *px3 = this->getPixelAt(src, index + 3);
herb6eff52a2016-03-23 09:00:33 -0700614 }
615
herb222f8ff2016-03-23 15:14:23 -0700616 Sk4f getPixelAt(const void* vsrc, int index) {
herb6eff52a2016-03-23 09:00:33 -0700617 const uint32_t* src = static_cast<const uint32_t*>(vsrc);
618 Sk4b bytePixel = Sk4b::Load((uint8_t *)(&src[index]));
619 Sk4f pixel = SkNx_cast<float, uint8_t>(bytePixel);
620 if (colorOrder == ColorOrder::kBGRA) {
621 pixel = SkNx_shuffle<2, 1, 0, 3>(pixel);
622 }
623 pixel = pixel * Sk4f{1.0f/255.0f};
624 if (colorProfile == kSRGB_SkColorProfileType) {
625 pixel = sRGBFast::sRGBToLinear(pixel);
626 }
627 return pixel;
628 }
629
herb222f8ff2016-03-23 15:14:23 -0700630 const void* row(int y) { return fSrc + y * fWidth[0]; }
herb6eff52a2016-03-23 09:00:33 -0700631
632private:
633 const uint32_t* const fSrc;
herb222f8ff2016-03-23 15:14:23 -0700634 const Sk4i fWidth;
herb6eff52a2016-03-23 09:00:33 -0700635};
636using Pixel8888SRGB = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kRGBA>;
637using Pixel8888LRGB = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kRGBA>;
638using Pixel8888SBGR = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kBGRA>;
639using Pixel8888LBGR = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kBGRA>;
herb222f8ff2016-03-23 15:14:23 -0700640
641template <SkColorProfileType colorProfile>
642class PixelIndex8 {
643public:
644 PixelIndex8(const SkPixmap& srcPixmap)
645 : fSrc{srcPixmap.addr8()}, fWidth{static_cast<int>(srcPixmap.rowBytes())} {
646 SkASSERT(srcPixmap.colorType() == kIndex_8_SkColorType);
647 SkColorTable* skColorTable = srcPixmap.ctable();
648 SkASSERT(skColorTable != nullptr);
649
650 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
651 for (int i = 0; i < skColorTable->count(); i++) {
652 fColorTable[i] = this->convertPixel((*skColorTable)[i]);
653 }
654 }
655
herb9e0efe52016-04-08 13:25:28 -0700656 PixelIndex8(const PixelIndex8& strategy)
657 : fSrc{strategy.fSrc}, fWidth{strategy.fWidth} {
658 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
659 // TODO: figure out the count.
660 for (int i = 0; i < 256; i++) {
661 fColorTable[i] = strategy.fColorTable[i];
662 }
663 }
664
herb222f8ff2016-03-23 15:14:23 -0700665 void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) {
666 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
667 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
668 Sk4i bufferLoc = YIs * fWidth + XIs;
669 switch (n) {
670 case 3:
671 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
672 case 2:
673 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
674 case 1:
675 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
676 default:
677 break;
678 }
679 }
680
681 void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
682 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
683 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
684 Sk4i bufferLoc = YIs * fWidth + XIs;
685 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
686 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
687 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
688 *px3 = this->getPixelAt(fSrc, bufferLoc[3]);
689 }
690
691 void get4Pixels(const void* vsrc, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
692 *px0 = this->getPixelAt(vsrc, index + 0);
693 *px1 = this->getPixelAt(vsrc, index + 1);
694 *px2 = this->getPixelAt(vsrc, index + 2);
695 *px3 = this->getPixelAt(vsrc, index + 3);
696 }
697
698 Sk4f getPixelAt(const void* vsrc, int index) {
699 const uint8_t* src = static_cast<const uint8_t*>(vsrc);
700 return getPixel(src + index);
701 }
702
703 Sk4f getPixel(const uint8_t* src) {
704 Sk4f pixel = fColorTable[*src];
705 return pixel;
706 }
707
708 const void* row(int y) { return fSrc + y * fWidth[0]; }
709
710private:
711 static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12;
712 Sk4f convertPixel(SkPMColor pmColor) {
713 Sk4b bPixel = Sk4b::Load(&pmColor);
714 Sk4f pixel = SkNx_cast<float, uint8_t>(bPixel);
715 float alpha = pixel[3];
716 if (alpha != 0.0f) {
717 float invAlpha = 1.0f / pixel[3];
718 Sk4f normalize = {invAlpha, invAlpha, invAlpha, 1.0f / 255.0f};
719 pixel = pixel * normalize;
720 if (colorProfile == kSRGB_SkColorProfileType) {
721 pixel = sRGBFast::sRGBToLinear(pixel);
722 }
723 return pixel;
724 } else {
725 return Sk4f{0.0f};
726 }
727 }
728 const uint8_t* const fSrc;
729 const Sk4i fWidth;
730 SkAutoMalloc fColorTableStorage{kColorTableSize};
731 Sk4f* fColorTable;
732};
733
734using PixelIndex8SRGB = PixelIndex8<kSRGB_SkColorProfileType>;
735using PixelIndex8LRGB = PixelIndex8<kLinear_SkColorProfileType>;
736
herb6eff52a2016-03-23 09:00:33 -0700737} // namespace
738
739#endif // SkLinearBitmapPipeline_sampler_DEFINED