blob: d436e393996db035a77344e0d8fb4da0ce175abd [file] [log] [blame]
herb6eff52a2016-03-23 09:00:33 -07001/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkLinearBitmapPipeline_sampler_DEFINED
9#define SkLinearBitmapPipeline_sampler_DEFINED
10
benjaminwagner6c71e0a2016-04-07 08:49:31 -070011#include "SkFixed.h"
herbd5f2e2e2016-04-14 11:16:44 -070012#include "SkHalf.h"
herb6eff52a2016-03-23 09:00:33 -070013#include "SkLinearBitmapPipeline_core.h"
herb222f8ff2016-03-23 15:14:23 -070014#include <array>
herb6eff52a2016-03-23 09:00:33 -070015#include <tuple>
16
17namespace {
18// Explaination of the math:
19// 1 - x x
20// +--------+--------+
21// | | |
22// 1 - y | px00 | px10 |
23// | | |
24// +--------+--------+
25// | | |
26// y | px01 | px11 |
27// | | |
28// +--------+--------+
29//
30//
31// Given a pixelxy each is multiplied by a different factor derived from the fractional part of x
32// and y:
33// * px00 -> (1 - x)(1 - y) = 1 - x - y + xy
34// * px10 -> x(1 - y) = x - xy
35// * px01 -> (1 - x)y = y - xy
36// * px11 -> xy
37// So x * y is calculated first and then used to calculate all the other factors.
38static Sk4s VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
39 Sk4f px01, Sk4f px11) {
40 // Calculate fractional xs and ys.
41 Sk4s fxs = xs - xs.floor();
42 Sk4s fys = ys - ys.floor();
43 Sk4s fxys{fxs * fys};
44 Sk4f sum = px11 * fxys;
45 sum = sum + px01 * (fys - fxys);
46 sum = sum + px10 * (fxs - fxys);
47 sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys);
48 return sum;
49}
50
51// The GeneralSampler class
52template<typename SourceStrategy, typename Next>
53class GeneralSampler {
54public:
55 template<typename... Args>
herb9e0efe52016-04-08 13:25:28 -070056 GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
herb6eff52a2016-03-23 09:00:33 -070057 : fNext{next}, fStrategy{std::forward<Args>(args)...} { }
58
herb9e0efe52016-04-08 13:25:28 -070059 GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
60 const GeneralSampler& sampler)
61 : fNext{next}, fStrategy{sampler.fStrategy} { }
62
herb6eff52a2016-03-23 09:00:33 -070063 void VECTORCALL nearestListFew(int n, Sk4s xs, Sk4s ys) {
64 SkASSERT(0 < n && n < 4);
65 Sk4f px0, px1, px2;
66 fStrategy.getFewPixels(n, xs, ys, &px0, &px1, &px2);
herb9e0efe52016-04-08 13:25:28 -070067 if (n >= 1) fNext->blendPixel(px0);
68 if (n >= 2) fNext->blendPixel(px1);
69 if (n >= 3) fNext->blendPixel(px2);
herb6eff52a2016-03-23 09:00:33 -070070 }
71
72 void VECTORCALL nearestList4(Sk4s xs, Sk4s ys) {
73 Sk4f px0, px1, px2, px3;
74 fStrategy.get4Pixels(xs, ys, &px0, &px1, &px2, &px3);
herb9e0efe52016-04-08 13:25:28 -070075 fNext->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -070076 }
77
78 void nearestSpan(Span span) {
79 SkASSERT(!span.isEmpty());
80 SkPoint start;
81 SkScalar length;
82 int count;
83 std::tie(start, length, count) = span;
84 SkScalar absLength = SkScalarAbs(length);
85 if (absLength < (count - 1)) {
86 this->nearestSpanSlowRate(span);
87 } else if (absLength == (count - 1)) {
88 this->nearestSpanUnitRate(span);
89 } else {
90 this->nearestSpanFastRate(span);
91 }
92 }
93
94 Sk4f bilerNonEdgePixel(SkScalar x, SkScalar y) {
95 Sk4f px00, px10, px01, px11;
96 Sk4f xs = Sk4f{x};
97 Sk4f ys = Sk4f{y};
98 Sk4f sampleXs = xs + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f};
99 Sk4f sampleYs = ys + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f};
100 fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
101 return bilerp4(xs, ys, px00, px10, px01, px11);
102 }
103
104 void VECTORCALL bilerpListFew(int n, Sk4s xs, Sk4s ys) {
105 SkASSERT(0 < n && n < 4);
106 auto bilerpPixel = [&](int index) {
107 return this->bilerNonEdgePixel(xs[index], ys[index]);
108 };
109
herb9e0efe52016-04-08 13:25:28 -0700110 if (n >= 1) fNext->blendPixel(bilerpPixel(0));
111 if (n >= 2) fNext->blendPixel(bilerpPixel(1));
112 if (n >= 3) fNext->blendPixel(bilerpPixel(2));
herb6eff52a2016-03-23 09:00:33 -0700113 }
114
115 void VECTORCALL bilerpList4(Sk4s xs, Sk4s ys) {
116 auto bilerpPixel = [&](int index) {
117 return this->bilerNonEdgePixel(xs[index], ys[index]);
118 };
herb9e0efe52016-04-08 13:25:28 -0700119 fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
herb6eff52a2016-03-23 09:00:33 -0700120 }
121
122 void VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) {
123 Sk4f px00, px10, px01, px11;
124 Sk4f xs = Sk4f{sampleXs[0]};
125 Sk4f ys = Sk4f{sampleYs[0]};
126 fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
127 Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11);
herb9e0efe52016-04-08 13:25:28 -0700128 fNext->blendPixel(pixel);
herb6eff52a2016-03-23 09:00:33 -0700129 }
130
131 void bilerpSpan(Span span) {
132 this->bilerpSpanWithY(span, span.startY());
133 }
134
135 void bilerpSpanWithY(Span span, SkScalar y) {
136 SkASSERT(!span.isEmpty());
137 SkPoint start;
138 SkScalar length;
139 int count;
140 std::tie(start, length, count) = span;
141 SkScalar absLength = SkScalarAbs(length);
142 if (absLength == 0.0f) {
143 this->bilerpSpanZeroRate(span, y);
144 } else if (absLength < (count - 1)) {
145 this->bilerpSpanSlowRate(span, y);
146 } else if (absLength == (count - 1)) {
147 if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) {
148 if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) {
149 this->nearestSpanUnitRate(span);
150 } else {
151 this->bilerpSpanUnitRateAlignedX(span, y);
152 }
153 } else {
154 this->bilerpSpanUnitRate(span, y);
155 }
156 } else {
157 this->bilerpSpanFastRate(span, y);
158 }
159 }
160
161private:
162 // When moving through source space more slowly than dst space (zoomed in),
163 // we'll be sampling from the same source pixel more than once.
164 void nearestSpanSlowRate(Span span) {
165 SkPoint start;
166 SkScalar length;
167 int count;
168 std::tie(start, length, count) = span;
169 SkScalar x = X(start);
170 SkFixed fx = SkScalarToFixed(x);
171 SkScalar dx = length / (count - 1);
172 SkFixed fdx = SkScalarToFixed(dx);
173
174 const void* row = fStrategy.row((int)std::floor(Y(start)));
175 Next* next = fNext;
176
177 int ix = SkFixedFloorToInt(fx);
178 int prevIX = ix;
herb222f8ff2016-03-23 15:14:23 -0700179 Sk4f fpixel = fStrategy.getPixelAt(row, ix);
herb6eff52a2016-03-23 09:00:33 -0700180
181 // When dx is less than one, each pixel is used more than once. Using the fixed point fx
182 // allows the code to quickly check that the same pixel is being used. The code uses this
183 // same pixel check to do the sRGB and normalization only once.
184 auto getNextPixel = [&]() {
185 if (ix != prevIX) {
herb222f8ff2016-03-23 15:14:23 -0700186 fpixel = fStrategy.getPixelAt(row, ix);
herb6eff52a2016-03-23 09:00:33 -0700187 prevIX = ix;
188 }
189 fx += fdx;
190 ix = SkFixedFloorToInt(fx);
191 return fpixel;
192 };
193
194 while (count >= 4) {
195 Sk4f px0 = getNextPixel();
196 Sk4f px1 = getNextPixel();
197 Sk4f px2 = getNextPixel();
198 Sk4f px3 = getNextPixel();
herb9e0efe52016-04-08 13:25:28 -0700199 next->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700200 count -= 4;
201 }
202 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700203 next->blendPixel(getNextPixel());
herb6eff52a2016-03-23 09:00:33 -0700204 count -= 1;
205 }
206 }
207
208 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
209 // We'll never re-use pixels, but we can at least load contiguous pixels.
210 void nearestSpanUnitRate(Span span) {
211 SkPoint start;
212 SkScalar length;
213 int count;
214 std::tie(start, length, count) = span;
215 int ix = SkScalarFloorToInt(X(start));
216 const void* row = fStrategy.row((int)std::floor(Y(start)));
217 Next* next = fNext;
218 if (length > 0) {
219 while (count >= 4) {
220 Sk4f px0, px1, px2, px3;
221 fStrategy.get4Pixels(row, ix, &px0, &px1, &px2, &px3);
herb9e0efe52016-04-08 13:25:28 -0700222 next->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700223 ix += 4;
224 count -= 4;
225 }
226
227 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700228 next->blendPixel(fStrategy.getPixelAt(row, ix));
herb6eff52a2016-03-23 09:00:33 -0700229 ix += 1;
230 count -= 1;
231 }
232 } else {
233 while (count >= 4) {
234 Sk4f px0, px1, px2, px3;
235 fStrategy.get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
herb9e0efe52016-04-08 13:25:28 -0700236 next->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700237 ix -= 4;
238 count -= 4;
239 }
240
241 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700242 next->blendPixel(fStrategy.getPixelAt(row, ix));
herb6eff52a2016-03-23 09:00:33 -0700243 ix -= 1;
244 count -= 1;
245 }
246 }
247 }
248
249 // We're moving through source space faster than dst (zoomed out),
250 // so we'll never reuse a source pixel or be able to do contiguous loads.
251 void nearestSpanFastRate(Span span) {
252 struct NearestWrapper {
253 void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) {
254 fSampler.nearestListFew(n, xs, ys);
255 }
256
257 void VECTORCALL pointList4(Sk4s xs, Sk4s ys) {
258 fSampler.nearestList4(xs, ys);
259 }
260
261 GeneralSampler& fSampler;
262 };
263 NearestWrapper wrapper{*this};
264 span_fallback(span, &wrapper);
265 }
266
267 void bilerpSpanZeroRate(Span span, SkScalar y1) {
268 SkScalar y0 = span.startY() - 0.5f;
269 y1 += 0.5f;
270 int iy0 = SkScalarFloorToInt(y0);
271 SkScalar filterY1 = y0 - iy0;
272 SkScalar filterY0 = 1.0f - filterY1;
273 int iy1 = SkScalarFloorToInt(y1);
274 int ix = SkScalarFloorToInt(span.startX());
herb222f8ff2016-03-23 15:14:23 -0700275 Sk4f pixelY0 = fStrategy.getPixelAt(fStrategy.row(iy0), ix);
276 Sk4f pixelY1 = fStrategy.getPixelAt(fStrategy.row(iy1), ix);
herb6eff52a2016-03-23 09:00:33 -0700277 Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1;
278 int count = span.count();
279 while (count >= 4) {
herb9e0efe52016-04-08 13:25:28 -0700280 fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPixel);
herb6eff52a2016-03-23 09:00:33 -0700281 count -= 4;
282 }
283 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700284 fNext->blendPixel(filterPixel);
herb6eff52a2016-03-23 09:00:33 -0700285 count -= 1;
286 }
287 }
288
289 // When moving through source space more slowly than dst space (zoomed in),
290 // we'll be sampling from the same source pixel more than once.
291 void bilerpSpanSlowRate(Span span, SkScalar ry1) {
292 SkPoint start;
293 SkScalar length;
294 int count;
295 std::tie(start, length, count) = span;
296 SkFixed fx = SkScalarToFixed(X(start)
297 -0.5f);
298
299 SkFixed fdx = SkScalarToFixed(length / (count - 1));
300 //start = start + SkPoint{-0.5f, -0.5f};
301
302 Sk4f xAdjust;
303 if (fdx >= 0) {
304 xAdjust = Sk4f{-1.0f};
305 } else {
306 xAdjust = Sk4f{1.0f};
307 }
308 int ix = SkFixedFloorToInt(fx);
309 int ioldx = ix;
310 Sk4f x{SkFixedToScalar(fx) - ix};
311 Sk4f dx{SkFixedToScalar(fdx)};
312 SkScalar ry0 = Y(start) - 0.5f;
313 ry1 += 0.5f;
314 SkScalar yFloor = std::floor(ry0);
315 Sk4f y1 = Sk4f{ry0 - yFloor};
316 Sk4f y0 = Sk4f{1.0f} - y1;
herb222f8ff2016-03-23 15:14:23 -0700317 const void* const row0 = fStrategy.row(SkScalarFloorToInt(ry0));
318 const void* const row1 = fStrategy.row(SkScalarFloorToInt(ry1));
319 Sk4f fpixel00 = y0 * fStrategy.getPixelAt(row0, ix);
320 Sk4f fpixel01 = y1 * fStrategy.getPixelAt(row1, ix);
321 Sk4f fpixel10 = y0 * fStrategy.getPixelAt(row0, ix + 1);
322 Sk4f fpixel11 = y1 * fStrategy.getPixelAt(row1, ix + 1);
herb6eff52a2016-03-23 09:00:33 -0700323 auto getNextPixel = [&]() {
324 if (ix != ioldx) {
325 fpixel00 = fpixel10;
326 fpixel01 = fpixel11;
herb222f8ff2016-03-23 15:14:23 -0700327 fpixel10 = y0 * fStrategy.getPixelAt(row0, ix + 1);
328 fpixel11 = y1 * fStrategy.getPixelAt(row1, ix + 1);
herb6eff52a2016-03-23 09:00:33 -0700329 ioldx = ix;
330 x = x + xAdjust;
331 }
332
333 Sk4f x0, x1;
334 x0 = Sk4f{1.0f} - x;
335 x1 = x;
336 Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11);
337 fx += fdx;
338 ix = SkFixedFloorToInt(fx);
339 x = x + dx;
340 return fpixel;
341 };
342
343 while (count >= 4) {
344 Sk4f fpixel0 = getNextPixel();
345 Sk4f fpixel1 = getNextPixel();
346 Sk4f fpixel2 = getNextPixel();
347 Sk4f fpixel3 = getNextPixel();
348
herb9e0efe52016-04-08 13:25:28 -0700349 fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3);
herb6eff52a2016-03-23 09:00:33 -0700350 count -= 4;
351 }
352
353 while (count > 0) {
herb9e0efe52016-04-08 13:25:28 -0700354 fNext->blendPixel(getNextPixel());
herb6eff52a2016-03-23 09:00:33 -0700355
356 count -= 1;
357 }
358 }
359
360 // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
361 // We'll never re-use pixels, but we can at least load contiguous pixels.
362 void bilerpSpanUnitRate(Span span, SkScalar y1) {
363 y1 += 0.5f;
364 SkScalar y0 = span.startY() - 0.5f;
365 int iy0 = SkScalarFloorToInt(y0);
366 SkScalar filterY1 = y0 - iy0;
367 SkScalar filterY0 = 1.0f - filterY1;
368 int iy1 = SkScalarFloorToInt(y1);
369 const void* rowY0 = fStrategy.row(iy0);
370 const void* rowY1 = fStrategy.row(iy1);
371 SkScalar x0 = span.startX() - 0.5f;
372 int ix0 = SkScalarFloorToInt(x0);
373 SkScalar filterX1 = x0 - ix0;
374 SkScalar filterX0 = 1.0f - filterX1;
375
376 auto getPixelY0 = [&]() {
herb222f8ff2016-03-23 15:14:23 -0700377 Sk4f px = fStrategy.getPixelAt(rowY0, ix0);
herb6eff52a2016-03-23 09:00:33 -0700378 return px * filterY0;
379 };
380
381 auto getPixelY1 = [&]() {
herb222f8ff2016-03-23 15:14:23 -0700382 Sk4f px = fStrategy.getPixelAt(rowY1, ix0);
herb6eff52a2016-03-23 09:00:33 -0700383 return px * filterY1;
384 };
385
386 auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
387 fStrategy.get4Pixels(rowY0, ix, px0, px1, px2, px3);
388 *px0 = *px0 * filterY0;
389 *px1 = *px1 * filterY0;
390 *px2 = *px2 * filterY0;
391 *px3 = *px3 * filterY0;
392 };
393
394 auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
395 fStrategy.get4Pixels(rowY1, ix, px0, px1, px2, px3);
396 *px0 = *px0 * filterY1;
397 *px1 = *px1 * filterY1;
398 *px2 = *px2 * filterY1;
399 *px3 = *px3 * filterY1;
400 };
401
402 auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) {
403 return pixelX0 * filterX0 + pixelX1 * filterX1;
404 };
405
406 // Mid making 4 unit rate.
407 Sk4f pxB = getPixelY0() + getPixelY1();
408 if (span.length() > 0) {
409 int count = span.count();
410 while (count >= 4) {
411 Sk4f px00, px10, px20, px30;
412 get4PixelsY0(ix0, &px00, &px10, &px20, &px30);
413 Sk4f px01, px11, px21, px31;
414 get4PixelsY1(ix0, &px01, &px11, &px21, &px31);
415 Sk4f pxS0 = px00 + px01;
416 Sk4f px0 = lerp(pxB, pxS0);
417 Sk4f pxS1 = px10 + px11;
418 Sk4f px1 = lerp(pxS0, pxS1);
419 Sk4f pxS2 = px20 + px21;
420 Sk4f px2 = lerp(pxS1, pxS2);
421 Sk4f pxS3 = px30 + px31;
422 Sk4f px3 = lerp(pxS2, pxS3);
423 pxB = pxS3;
herb9e0efe52016-04-08 13:25:28 -0700424 fNext->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700425 ix0 += 4;
426 count -= 4;
427 }
428 while (count > 0) {
herb222f8ff2016-03-23 15:14:23 -0700429 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix0);
430 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix0);
herb6eff52a2016-03-23 09:00:33 -0700431
herb9e0efe52016-04-08 13:25:28 -0700432 fNext->blendPixel(lerp(pixelY0, pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700433 ix0 += 1;
434 count -= 1;
435 }
436 } else {
437 int count = span.count();
438 while (count >= 4) {
439 Sk4f px00, px10, px20, px30;
440 get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30);
441 Sk4f px01, px11, px21, px31;
442 get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31);
443 Sk4f pxS3 = px30 + px31;
444 Sk4f px0 = lerp(pxS3, pxB);
445 Sk4f pxS2 = px20 + px21;
446 Sk4f px1 = lerp(pxS2, pxS3);
447 Sk4f pxS1 = px10 + px11;
448 Sk4f px2 = lerp(pxS1, pxS2);
449 Sk4f pxS0 = px00 + px01;
450 Sk4f px3 = lerp(pxS0, pxS1);
451 pxB = pxS0;
herb9e0efe52016-04-08 13:25:28 -0700452 fNext->blend4Pixels(px0, px1, px2, px3);
herb6eff52a2016-03-23 09:00:33 -0700453 ix0 -= 4;
454 count -= 4;
455 }
456 while (count > 0) {
herb222f8ff2016-03-23 15:14:23 -0700457 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix0);
458 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix0);
herb6eff52a2016-03-23 09:00:33 -0700459
herb9e0efe52016-04-08 13:25:28 -0700460 fNext->blendPixel(lerp(pixelY0, pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700461 ix0 -= 1;
462 count -= 1;
463 }
464 }
465 }
466
467 void bilerpSpanUnitRateAlignedX(Span span, SkScalar y1) {
468 SkScalar y0 = span.startY() - 0.5f;
469 y1 += 0.5f;
470 int iy0 = SkScalarFloorToInt(y0);
471 SkScalar filterY1 = y0 - iy0;
472 SkScalar filterY0 = 1.0f - filterY1;
473 int iy1 = SkScalarFloorToInt(y1);
474 int ix = SkScalarFloorToInt(span.startX());
475 const void* rowY0 = fStrategy.row(iy0);
476 const void* rowY1 = fStrategy.row(iy1);
477 auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) {
478 return *pixelY0 * filterY0 + *pixelY1 * filterY1;
479 };
480
481 if (span.length() > 0) {
482 int count = span.count();
483 while (count >= 4) {
484 Sk4f px00, px10, px20, px30;
485 fStrategy.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30);
486 Sk4f px01, px11, px21, px31;
487 fStrategy.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31);
herb9e0efe52016-04-08 13:25:28 -0700488 fNext->blend4Pixels(
herb6eff52a2016-03-23 09:00:33 -0700489 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
490 ix += 4;
491 count -= 4;
492 }
493 while (count > 0) {
herb222f8ff2016-03-23 15:14:23 -0700494 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix);
495 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix);
herb6eff52a2016-03-23 09:00:33 -0700496
herb9e0efe52016-04-08 13:25:28 -0700497 fNext->blendPixel(lerp(&pixelY0, &pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700498 ix += 1;
499 count -= 1;
500 }
501 } else {
502 int count = span.count();
503 while (count >= 4) {
504 Sk4f px00, px10, px20, px30;
505 fStrategy.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00);
506 Sk4f px01, px11, px21, px31;
507 fStrategy.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01);
herb9e0efe52016-04-08 13:25:28 -0700508 fNext->blend4Pixels(
herb6eff52a2016-03-23 09:00:33 -0700509 lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
510 ix -= 4;
511 count -= 4;
512 }
513 while (count > 0) {
herb222f8ff2016-03-23 15:14:23 -0700514 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix);
515 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix);
herb6eff52a2016-03-23 09:00:33 -0700516
herb9e0efe52016-04-08 13:25:28 -0700517 fNext->blendPixel(lerp(&pixelY0, &pixelY1));
herb6eff52a2016-03-23 09:00:33 -0700518 ix -= 1;
519 count -= 1;
520 }
521 }
522 }
523
524 // We're moving through source space faster than dst (zoomed out),
525 // so we'll never reuse a source pixel or be able to do contiguous loads.
526 void bilerpSpanFastRate(Span span, SkScalar y1) {
527 SkPoint start;
528 SkScalar length;
529 int count;
530 std::tie(start, length, count) = span;
531 SkScalar x = X(start);
532 SkScalar y = Y(start);
533 if (false && y == y1) {
534 struct BilerpWrapper {
535 void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) {
536 fSampler.bilerpListFew(n, xs, ys);
537 }
538
539 void VECTORCALL pointList4(Sk4s xs, Sk4s ys) {
540 fSampler.bilerpList4(xs, ys);
541 }
542
543 GeneralSampler& fSampler;
544 };
545 BilerpWrapper wrapper{*this};
546 span_fallback(span, &wrapper);
547 } else {
548 SkScalar dx = length / (count - 1);
549 Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f};
550 while (count > 0) {
551 Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x};
552 this->bilerpEdge(xs, ys);
553 x += dx;
554 count -= 1;
555 }
556 }
557 }
558
559 Next* const fNext;
560 SourceStrategy fStrategy;
561};
562
563class sRGBFast {
564public:
565 static Sk4s VECTORCALL sRGBToLinear(Sk4s pixel) {
566 Sk4s l = pixel * pixel;
567 return Sk4s{l[0], l[1], l[2], pixel[3]};
568 }
569};
570
571enum class ColorOrder {
572 kRGBA = false,
573 kBGRA = true,
574};
575template <SkColorProfileType colorProfile, ColorOrder colorOrder>
576class Pixel8888 {
577public:
578 Pixel8888(int width, const uint32_t* src) : fSrc{src}, fWidth{width}{ }
579 Pixel8888(const SkPixmap& srcPixmap)
580 : fSrc{srcPixmap.addr32()}
581 , fWidth{static_cast<int>(srcPixmap.rowBytes() / 4)} { }
582
583 void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) {
584 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
585 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
586 Sk4i bufferLoc = YIs * fWidth + XIs;
587 switch (n) {
588 case 3:
herb222f8ff2016-03-23 15:14:23 -0700589 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
herb6eff52a2016-03-23 09:00:33 -0700590 case 2:
herb222f8ff2016-03-23 15:14:23 -0700591 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
herb6eff52a2016-03-23 09:00:33 -0700592 case 1:
herb222f8ff2016-03-23 15:14:23 -0700593 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
herb6eff52a2016-03-23 09:00:33 -0700594 default:
595 break;
596 }
597 }
598
599 void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
600 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
601 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
602 Sk4i bufferLoc = YIs * fWidth + XIs;
herb222f8ff2016-03-23 15:14:23 -0700603 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
604 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
605 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
606 *px3 = this->getPixelAt(fSrc, bufferLoc[3]);
herb6eff52a2016-03-23 09:00:33 -0700607 }
608
609 void get4Pixels(const void* vsrc, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
610 const uint32_t* src = static_cast<const uint32_t*>(vsrc);
herb222f8ff2016-03-23 15:14:23 -0700611 *px0 = this->getPixelAt(src, index + 0);
612 *px1 = this->getPixelAt(src, index + 1);
613 *px2 = this->getPixelAt(src, index + 2);
614 *px3 = this->getPixelAt(src, index + 3);
herb6eff52a2016-03-23 09:00:33 -0700615 }
616
herb222f8ff2016-03-23 15:14:23 -0700617 Sk4f getPixelAt(const void* vsrc, int index) {
herb6eff52a2016-03-23 09:00:33 -0700618 const uint32_t* src = static_cast<const uint32_t*>(vsrc);
619 Sk4b bytePixel = Sk4b::Load((uint8_t *)(&src[index]));
620 Sk4f pixel = SkNx_cast<float, uint8_t>(bytePixel);
621 if (colorOrder == ColorOrder::kBGRA) {
622 pixel = SkNx_shuffle<2, 1, 0, 3>(pixel);
623 }
624 pixel = pixel * Sk4f{1.0f/255.0f};
625 if (colorProfile == kSRGB_SkColorProfileType) {
626 pixel = sRGBFast::sRGBToLinear(pixel);
627 }
628 return pixel;
629 }
630
herb222f8ff2016-03-23 15:14:23 -0700631 const void* row(int y) { return fSrc + y * fWidth[0]; }
herb6eff52a2016-03-23 09:00:33 -0700632
633private:
634 const uint32_t* const fSrc;
herb222f8ff2016-03-23 15:14:23 -0700635 const Sk4i fWidth;
herb6eff52a2016-03-23 09:00:33 -0700636};
637using Pixel8888SRGB = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kRGBA>;
638using Pixel8888LRGB = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kRGBA>;
639using Pixel8888SBGR = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kBGRA>;
640using Pixel8888LBGR = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kBGRA>;
herb222f8ff2016-03-23 15:14:23 -0700641
642template <SkColorProfileType colorProfile>
643class PixelIndex8 {
644public:
645 PixelIndex8(const SkPixmap& srcPixmap)
646 : fSrc{srcPixmap.addr8()}, fWidth{static_cast<int>(srcPixmap.rowBytes())} {
647 SkASSERT(srcPixmap.colorType() == kIndex_8_SkColorType);
648 SkColorTable* skColorTable = srcPixmap.ctable();
649 SkASSERT(skColorTable != nullptr);
650
651 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
652 for (int i = 0; i < skColorTable->count(); i++) {
653 fColorTable[i] = this->convertPixel((*skColorTable)[i]);
654 }
655 }
656
herb9e0efe52016-04-08 13:25:28 -0700657 PixelIndex8(const PixelIndex8& strategy)
658 : fSrc{strategy.fSrc}, fWidth{strategy.fWidth} {
659 fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
660 // TODO: figure out the count.
661 for (int i = 0; i < 256; i++) {
662 fColorTable[i] = strategy.fColorTable[i];
663 }
664 }
665
herb222f8ff2016-03-23 15:14:23 -0700666 void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) {
667 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
668 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
669 Sk4i bufferLoc = YIs * fWidth + XIs;
670 switch (n) {
671 case 3:
672 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
673 case 2:
674 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
675 case 1:
676 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
677 default:
678 break;
679 }
680 }
681
682 void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
683 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
684 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
685 Sk4i bufferLoc = YIs * fWidth + XIs;
686 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
687 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
688 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
689 *px3 = this->getPixelAt(fSrc, bufferLoc[3]);
690 }
691
692 void get4Pixels(const void* vsrc, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
693 *px0 = this->getPixelAt(vsrc, index + 0);
694 *px1 = this->getPixelAt(vsrc, index + 1);
695 *px2 = this->getPixelAt(vsrc, index + 2);
696 *px3 = this->getPixelAt(vsrc, index + 3);
697 }
698
699 Sk4f getPixelAt(const void* vsrc, int index) {
700 const uint8_t* src = static_cast<const uint8_t*>(vsrc);
701 return getPixel(src + index);
702 }
703
704 Sk4f getPixel(const uint8_t* src) {
705 Sk4f pixel = fColorTable[*src];
706 return pixel;
707 }
708
709 const void* row(int y) { return fSrc + y * fWidth[0]; }
710
711private:
712 static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12;
713 Sk4f convertPixel(SkPMColor pmColor) {
714 Sk4b bPixel = Sk4b::Load(&pmColor);
715 Sk4f pixel = SkNx_cast<float, uint8_t>(bPixel);
716 float alpha = pixel[3];
717 if (alpha != 0.0f) {
718 float invAlpha = 1.0f / pixel[3];
719 Sk4f normalize = {invAlpha, invAlpha, invAlpha, 1.0f / 255.0f};
720 pixel = pixel * normalize;
721 if (colorProfile == kSRGB_SkColorProfileType) {
722 pixel = sRGBFast::sRGBToLinear(pixel);
723 }
724 return pixel;
725 } else {
726 return Sk4f{0.0f};
727 }
728 }
729 const uint8_t* const fSrc;
730 const Sk4i fWidth;
731 SkAutoMalloc fColorTableStorage{kColorTableSize};
732 Sk4f* fColorTable;
733};
734
735using PixelIndex8SRGB = PixelIndex8<kSRGB_SkColorProfileType>;
736using PixelIndex8LRGB = PixelIndex8<kLinear_SkColorProfileType>;
737
herbd5f2e2e2016-04-14 11:16:44 -0700738class PixelHalfLinear {
739public:
740 PixelHalfLinear(int width, const uint64_t* src) : fSrc{src}, fWidth{width}{ }
741 PixelHalfLinear(const SkPixmap& srcPixmap)
742 : fSrc{srcPixmap.addr64()}
743 , fWidth{static_cast<int>(srcPixmap.rowBytes() / 8)} { }
744
745 void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) {
746 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
747 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
748 Sk4i bufferLoc = YIs * fWidth + XIs;
749 switch (n) {
750 case 3:
751 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
752 case 2:
753 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
754 case 1:
755 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
756 default:
757 break;
758 }
759 }
760
761 void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
762 Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
763 Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
764 Sk4i bufferLoc = YIs * fWidth + XIs;
765 *px0 = this->getPixelAt(fSrc, bufferLoc[0]);
766 *px1 = this->getPixelAt(fSrc, bufferLoc[1]);
767 *px2 = this->getPixelAt(fSrc, bufferLoc[2]);
768 *px3 = this->getPixelAt(fSrc, bufferLoc[3]);
769 }
770
771 void get4Pixels(const void* vsrc, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
772 const uint32_t* src = static_cast<const uint32_t*>(vsrc);
773 *px0 = this->getPixelAt(src, index + 0);
774 *px1 = this->getPixelAt(src, index + 1);
775 *px2 = this->getPixelAt(src, index + 2);
776 *px3 = this->getPixelAt(src, index + 3);
777 }
778
779 Sk4f getPixelAt(const void* vsrc, int index) {
780 const uint64_t* src = static_cast<const uint64_t*>(vsrc);
781 return SkHalfToFloat_01(*src);
782 }
783
784 const void* row(int y) { return fSrc + y * fWidth[0]; }
785
786private:
787 const uint64_t* const fSrc;
788 const Sk4i fWidth;
789};
790
herb6eff52a2016-03-23 09:00:33 -0700791} // namespace
792
793#endif // SkLinearBitmapPipeline_sampler_DEFINED