herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2016 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #ifndef SkLinearBitmapPipeline_sampler_DEFINED |
| 9 | #define SkLinearBitmapPipeline_sampler_DEFINED |
| 10 | |
benjaminwagner | 6c71e0a | 2016-04-07 08:49:31 -0700 | [diff] [blame] | 11 | #include "SkFixed.h" |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 12 | #include "SkLinearBitmapPipeline_core.h" |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 13 | #include <array> |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 14 | #include <tuple> |
| 15 | |
| 16 | namespace { |
| 17 | // Explaination of the math: |
| 18 | // 1 - x x |
| 19 | // +--------+--------+ |
| 20 | // | | | |
| 21 | // 1 - y | px00 | px10 | |
| 22 | // | | | |
| 23 | // +--------+--------+ |
| 24 | // | | | |
| 25 | // y | px01 | px11 | |
| 26 | // | | | |
| 27 | // +--------+--------+ |
| 28 | // |
| 29 | // |
| 30 | // Given a pixelxy each is multiplied by a different factor derived from the fractional part of x |
| 31 | // and y: |
| 32 | // * px00 -> (1 - x)(1 - y) = 1 - x - y + xy |
| 33 | // * px10 -> x(1 - y) = x - xy |
| 34 | // * px01 -> (1 - x)y = y - xy |
| 35 | // * px11 -> xy |
| 36 | // So x * y is calculated first and then used to calculate all the other factors. |
| 37 | static Sk4s VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, |
| 38 | Sk4f px01, Sk4f px11) { |
| 39 | // Calculate fractional xs and ys. |
| 40 | Sk4s fxs = xs - xs.floor(); |
| 41 | Sk4s fys = ys - ys.floor(); |
| 42 | Sk4s fxys{fxs * fys}; |
| 43 | Sk4f sum = px11 * fxys; |
| 44 | sum = sum + px01 * (fys - fxys); |
| 45 | sum = sum + px10 * (fxs - fxys); |
| 46 | sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys); |
| 47 | return sum; |
| 48 | } |
| 49 | |
| 50 | // The GeneralSampler class |
| 51 | template<typename SourceStrategy, typename Next> |
| 52 | class GeneralSampler { |
| 53 | public: |
| 54 | template<typename... Args> |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 55 | GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args) |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 56 | : fNext{next}, fStrategy{std::forward<Args>(args)...} { } |
| 57 | |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 58 | GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, |
| 59 | const GeneralSampler& sampler) |
| 60 | : fNext{next}, fStrategy{sampler.fStrategy} { } |
| 61 | |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 62 | void VECTORCALL nearestListFew(int n, Sk4s xs, Sk4s ys) { |
| 63 | SkASSERT(0 < n && n < 4); |
| 64 | Sk4f px0, px1, px2; |
| 65 | fStrategy.getFewPixels(n, xs, ys, &px0, &px1, &px2); |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 66 | if (n >= 1) fNext->blendPixel(px0); |
| 67 | if (n >= 2) fNext->blendPixel(px1); |
| 68 | if (n >= 3) fNext->blendPixel(px2); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 69 | } |
| 70 | |
| 71 | void VECTORCALL nearestList4(Sk4s xs, Sk4s ys) { |
| 72 | Sk4f px0, px1, px2, px3; |
| 73 | fStrategy.get4Pixels(xs, ys, &px0, &px1, &px2, &px3); |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 74 | fNext->blend4Pixels(px0, px1, px2, px3); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 75 | } |
| 76 | |
| 77 | void nearestSpan(Span span) { |
| 78 | SkASSERT(!span.isEmpty()); |
| 79 | SkPoint start; |
| 80 | SkScalar length; |
| 81 | int count; |
| 82 | std::tie(start, length, count) = span; |
| 83 | SkScalar absLength = SkScalarAbs(length); |
| 84 | if (absLength < (count - 1)) { |
| 85 | this->nearestSpanSlowRate(span); |
| 86 | } else if (absLength == (count - 1)) { |
| 87 | this->nearestSpanUnitRate(span); |
| 88 | } else { |
| 89 | this->nearestSpanFastRate(span); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | Sk4f bilerNonEdgePixel(SkScalar x, SkScalar y) { |
| 94 | Sk4f px00, px10, px01, px11; |
| 95 | Sk4f xs = Sk4f{x}; |
| 96 | Sk4f ys = Sk4f{y}; |
| 97 | Sk4f sampleXs = xs + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f}; |
| 98 | Sk4f sampleYs = ys + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f}; |
| 99 | fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); |
| 100 | return bilerp4(xs, ys, px00, px10, px01, px11); |
| 101 | } |
| 102 | |
| 103 | void VECTORCALL bilerpListFew(int n, Sk4s xs, Sk4s ys) { |
| 104 | SkASSERT(0 < n && n < 4); |
| 105 | auto bilerpPixel = [&](int index) { |
| 106 | return this->bilerNonEdgePixel(xs[index], ys[index]); |
| 107 | }; |
| 108 | |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 109 | if (n >= 1) fNext->blendPixel(bilerpPixel(0)); |
| 110 | if (n >= 2) fNext->blendPixel(bilerpPixel(1)); |
| 111 | if (n >= 3) fNext->blendPixel(bilerpPixel(2)); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 112 | } |
| 113 | |
| 114 | void VECTORCALL bilerpList4(Sk4s xs, Sk4s ys) { |
| 115 | auto bilerpPixel = [&](int index) { |
| 116 | return this->bilerNonEdgePixel(xs[index], ys[index]); |
| 117 | }; |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 118 | fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3)); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 119 | } |
| 120 | |
| 121 | void VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) { |
| 122 | Sk4f px00, px10, px01, px11; |
| 123 | Sk4f xs = Sk4f{sampleXs[0]}; |
| 124 | Sk4f ys = Sk4f{sampleYs[0]}; |
| 125 | fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); |
| 126 | Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11); |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 127 | fNext->blendPixel(pixel); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | void bilerpSpan(Span span) { |
| 131 | this->bilerpSpanWithY(span, span.startY()); |
| 132 | } |
| 133 | |
| 134 | void bilerpSpanWithY(Span span, SkScalar y) { |
| 135 | SkASSERT(!span.isEmpty()); |
| 136 | SkPoint start; |
| 137 | SkScalar length; |
| 138 | int count; |
| 139 | std::tie(start, length, count) = span; |
| 140 | SkScalar absLength = SkScalarAbs(length); |
| 141 | if (absLength == 0.0f) { |
| 142 | this->bilerpSpanZeroRate(span, y); |
| 143 | } else if (absLength < (count - 1)) { |
| 144 | this->bilerpSpanSlowRate(span, y); |
| 145 | } else if (absLength == (count - 1)) { |
| 146 | if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) { |
| 147 | if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) { |
| 148 | this->nearestSpanUnitRate(span); |
| 149 | } else { |
| 150 | this->bilerpSpanUnitRateAlignedX(span, y); |
| 151 | } |
| 152 | } else { |
| 153 | this->bilerpSpanUnitRate(span, y); |
| 154 | } |
| 155 | } else { |
| 156 | this->bilerpSpanFastRate(span, y); |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | private: |
| 161 | // When moving through source space more slowly than dst space (zoomed in), |
| 162 | // we'll be sampling from the same source pixel more than once. |
| 163 | void nearestSpanSlowRate(Span span) { |
| 164 | SkPoint start; |
| 165 | SkScalar length; |
| 166 | int count; |
| 167 | std::tie(start, length, count) = span; |
| 168 | SkScalar x = X(start); |
| 169 | SkFixed fx = SkScalarToFixed(x); |
| 170 | SkScalar dx = length / (count - 1); |
| 171 | SkFixed fdx = SkScalarToFixed(dx); |
| 172 | |
| 173 | const void* row = fStrategy.row((int)std::floor(Y(start))); |
| 174 | Next* next = fNext; |
| 175 | |
| 176 | int ix = SkFixedFloorToInt(fx); |
| 177 | int prevIX = ix; |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 178 | Sk4f fpixel = fStrategy.getPixelAt(row, ix); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 179 | |
| 180 | // When dx is less than one, each pixel is used more than once. Using the fixed point fx |
| 181 | // allows the code to quickly check that the same pixel is being used. The code uses this |
| 182 | // same pixel check to do the sRGB and normalization only once. |
| 183 | auto getNextPixel = [&]() { |
| 184 | if (ix != prevIX) { |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 185 | fpixel = fStrategy.getPixelAt(row, ix); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 186 | prevIX = ix; |
| 187 | } |
| 188 | fx += fdx; |
| 189 | ix = SkFixedFloorToInt(fx); |
| 190 | return fpixel; |
| 191 | }; |
| 192 | |
| 193 | while (count >= 4) { |
| 194 | Sk4f px0 = getNextPixel(); |
| 195 | Sk4f px1 = getNextPixel(); |
| 196 | Sk4f px2 = getNextPixel(); |
| 197 | Sk4f px3 = getNextPixel(); |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 198 | next->blend4Pixels(px0, px1, px2, px3); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 199 | count -= 4; |
| 200 | } |
| 201 | while (count > 0) { |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 202 | next->blendPixel(getNextPixel()); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 203 | count -= 1; |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. |
| 208 | // We'll never re-use pixels, but we can at least load contiguous pixels. |
| 209 | void nearestSpanUnitRate(Span span) { |
| 210 | SkPoint start; |
| 211 | SkScalar length; |
| 212 | int count; |
| 213 | std::tie(start, length, count) = span; |
| 214 | int ix = SkScalarFloorToInt(X(start)); |
| 215 | const void* row = fStrategy.row((int)std::floor(Y(start))); |
| 216 | Next* next = fNext; |
| 217 | if (length > 0) { |
| 218 | while (count >= 4) { |
| 219 | Sk4f px0, px1, px2, px3; |
| 220 | fStrategy.get4Pixels(row, ix, &px0, &px1, &px2, &px3); |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 221 | next->blend4Pixels(px0, px1, px2, px3); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 222 | ix += 4; |
| 223 | count -= 4; |
| 224 | } |
| 225 | |
| 226 | while (count > 0) { |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 227 | next->blendPixel(fStrategy.getPixelAt(row, ix)); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 228 | ix += 1; |
| 229 | count -= 1; |
| 230 | } |
| 231 | } else { |
| 232 | while (count >= 4) { |
| 233 | Sk4f px0, px1, px2, px3; |
| 234 | fStrategy.get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0); |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 235 | next->blend4Pixels(px0, px1, px2, px3); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 236 | ix -= 4; |
| 237 | count -= 4; |
| 238 | } |
| 239 | |
| 240 | while (count > 0) { |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 241 | next->blendPixel(fStrategy.getPixelAt(row, ix)); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 242 | ix -= 1; |
| 243 | count -= 1; |
| 244 | } |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | // We're moving through source space faster than dst (zoomed out), |
| 249 | // so we'll never reuse a source pixel or be able to do contiguous loads. |
| 250 | void nearestSpanFastRate(Span span) { |
| 251 | struct NearestWrapper { |
| 252 | void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) { |
| 253 | fSampler.nearestListFew(n, xs, ys); |
| 254 | } |
| 255 | |
| 256 | void VECTORCALL pointList4(Sk4s xs, Sk4s ys) { |
| 257 | fSampler.nearestList4(xs, ys); |
| 258 | } |
| 259 | |
| 260 | GeneralSampler& fSampler; |
| 261 | }; |
| 262 | NearestWrapper wrapper{*this}; |
| 263 | span_fallback(span, &wrapper); |
| 264 | } |
| 265 | |
| 266 | void bilerpSpanZeroRate(Span span, SkScalar y1) { |
| 267 | SkScalar y0 = span.startY() - 0.5f; |
| 268 | y1 += 0.5f; |
| 269 | int iy0 = SkScalarFloorToInt(y0); |
| 270 | SkScalar filterY1 = y0 - iy0; |
| 271 | SkScalar filterY0 = 1.0f - filterY1; |
| 272 | int iy1 = SkScalarFloorToInt(y1); |
| 273 | int ix = SkScalarFloorToInt(span.startX()); |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 274 | Sk4f pixelY0 = fStrategy.getPixelAt(fStrategy.row(iy0), ix); |
| 275 | Sk4f pixelY1 = fStrategy.getPixelAt(fStrategy.row(iy1), ix); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 276 | Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1; |
| 277 | int count = span.count(); |
| 278 | while (count >= 4) { |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 279 | fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPixel); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 280 | count -= 4; |
| 281 | } |
| 282 | while (count > 0) { |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 283 | fNext->blendPixel(filterPixel); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 284 | count -= 1; |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | // When moving through source space more slowly than dst space (zoomed in), |
| 289 | // we'll be sampling from the same source pixel more than once. |
| 290 | void bilerpSpanSlowRate(Span span, SkScalar ry1) { |
| 291 | SkPoint start; |
| 292 | SkScalar length; |
| 293 | int count; |
| 294 | std::tie(start, length, count) = span; |
| 295 | SkFixed fx = SkScalarToFixed(X(start) |
| 296 | -0.5f); |
| 297 | |
| 298 | SkFixed fdx = SkScalarToFixed(length / (count - 1)); |
| 299 | //start = start + SkPoint{-0.5f, -0.5f}; |
| 300 | |
| 301 | Sk4f xAdjust; |
| 302 | if (fdx >= 0) { |
| 303 | xAdjust = Sk4f{-1.0f}; |
| 304 | } else { |
| 305 | xAdjust = Sk4f{1.0f}; |
| 306 | } |
| 307 | int ix = SkFixedFloorToInt(fx); |
| 308 | int ioldx = ix; |
| 309 | Sk4f x{SkFixedToScalar(fx) - ix}; |
| 310 | Sk4f dx{SkFixedToScalar(fdx)}; |
| 311 | SkScalar ry0 = Y(start) - 0.5f; |
| 312 | ry1 += 0.5f; |
| 313 | SkScalar yFloor = std::floor(ry0); |
| 314 | Sk4f y1 = Sk4f{ry0 - yFloor}; |
| 315 | Sk4f y0 = Sk4f{1.0f} - y1; |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 316 | const void* const row0 = fStrategy.row(SkScalarFloorToInt(ry0)); |
| 317 | const void* const row1 = fStrategy.row(SkScalarFloorToInt(ry1)); |
| 318 | Sk4f fpixel00 = y0 * fStrategy.getPixelAt(row0, ix); |
| 319 | Sk4f fpixel01 = y1 * fStrategy.getPixelAt(row1, ix); |
| 320 | Sk4f fpixel10 = y0 * fStrategy.getPixelAt(row0, ix + 1); |
| 321 | Sk4f fpixel11 = y1 * fStrategy.getPixelAt(row1, ix + 1); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 322 | auto getNextPixel = [&]() { |
| 323 | if (ix != ioldx) { |
| 324 | fpixel00 = fpixel10; |
| 325 | fpixel01 = fpixel11; |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 326 | fpixel10 = y0 * fStrategy.getPixelAt(row0, ix + 1); |
| 327 | fpixel11 = y1 * fStrategy.getPixelAt(row1, ix + 1); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 328 | ioldx = ix; |
| 329 | x = x + xAdjust; |
| 330 | } |
| 331 | |
| 332 | Sk4f x0, x1; |
| 333 | x0 = Sk4f{1.0f} - x; |
| 334 | x1 = x; |
| 335 | Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11); |
| 336 | fx += fdx; |
| 337 | ix = SkFixedFloorToInt(fx); |
| 338 | x = x + dx; |
| 339 | return fpixel; |
| 340 | }; |
| 341 | |
| 342 | while (count >= 4) { |
| 343 | Sk4f fpixel0 = getNextPixel(); |
| 344 | Sk4f fpixel1 = getNextPixel(); |
| 345 | Sk4f fpixel2 = getNextPixel(); |
| 346 | Sk4f fpixel3 = getNextPixel(); |
| 347 | |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 348 | fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 349 | count -= 4; |
| 350 | } |
| 351 | |
| 352 | while (count > 0) { |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 353 | fNext->blendPixel(getNextPixel()); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 354 | |
| 355 | count -= 1; |
| 356 | } |
| 357 | } |
| 358 | |
| 359 | // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. |
| 360 | // We'll never re-use pixels, but we can at least load contiguous pixels. |
| 361 | void bilerpSpanUnitRate(Span span, SkScalar y1) { |
| 362 | y1 += 0.5f; |
| 363 | SkScalar y0 = span.startY() - 0.5f; |
| 364 | int iy0 = SkScalarFloorToInt(y0); |
| 365 | SkScalar filterY1 = y0 - iy0; |
| 366 | SkScalar filterY0 = 1.0f - filterY1; |
| 367 | int iy1 = SkScalarFloorToInt(y1); |
| 368 | const void* rowY0 = fStrategy.row(iy0); |
| 369 | const void* rowY1 = fStrategy.row(iy1); |
| 370 | SkScalar x0 = span.startX() - 0.5f; |
| 371 | int ix0 = SkScalarFloorToInt(x0); |
| 372 | SkScalar filterX1 = x0 - ix0; |
| 373 | SkScalar filterX0 = 1.0f - filterX1; |
| 374 | |
| 375 | auto getPixelY0 = [&]() { |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 376 | Sk4f px = fStrategy.getPixelAt(rowY0, ix0); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 377 | return px * filterY0; |
| 378 | }; |
| 379 | |
| 380 | auto getPixelY1 = [&]() { |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 381 | Sk4f px = fStrategy.getPixelAt(rowY1, ix0); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 382 | return px * filterY1; |
| 383 | }; |
| 384 | |
| 385 | auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { |
| 386 | fStrategy.get4Pixels(rowY0, ix, px0, px1, px2, px3); |
| 387 | *px0 = *px0 * filterY0; |
| 388 | *px1 = *px1 * filterY0; |
| 389 | *px2 = *px2 * filterY0; |
| 390 | *px3 = *px3 * filterY0; |
| 391 | }; |
| 392 | |
| 393 | auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { |
| 394 | fStrategy.get4Pixels(rowY1, ix, px0, px1, px2, px3); |
| 395 | *px0 = *px0 * filterY1; |
| 396 | *px1 = *px1 * filterY1; |
| 397 | *px2 = *px2 * filterY1; |
| 398 | *px3 = *px3 * filterY1; |
| 399 | }; |
| 400 | |
| 401 | auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) { |
| 402 | return pixelX0 * filterX0 + pixelX1 * filterX1; |
| 403 | }; |
| 404 | |
| 405 | // Mid making 4 unit rate. |
| 406 | Sk4f pxB = getPixelY0() + getPixelY1(); |
| 407 | if (span.length() > 0) { |
| 408 | int count = span.count(); |
| 409 | while (count >= 4) { |
| 410 | Sk4f px00, px10, px20, px30; |
| 411 | get4PixelsY0(ix0, &px00, &px10, &px20, &px30); |
| 412 | Sk4f px01, px11, px21, px31; |
| 413 | get4PixelsY1(ix0, &px01, &px11, &px21, &px31); |
| 414 | Sk4f pxS0 = px00 + px01; |
| 415 | Sk4f px0 = lerp(pxB, pxS0); |
| 416 | Sk4f pxS1 = px10 + px11; |
| 417 | Sk4f px1 = lerp(pxS0, pxS1); |
| 418 | Sk4f pxS2 = px20 + px21; |
| 419 | Sk4f px2 = lerp(pxS1, pxS2); |
| 420 | Sk4f pxS3 = px30 + px31; |
| 421 | Sk4f px3 = lerp(pxS2, pxS3); |
| 422 | pxB = pxS3; |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 423 | fNext->blend4Pixels(px0, px1, px2, px3); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 424 | ix0 += 4; |
| 425 | count -= 4; |
| 426 | } |
| 427 | while (count > 0) { |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 428 | Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix0); |
| 429 | Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix0); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 430 | |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 431 | fNext->blendPixel(lerp(pixelY0, pixelY1)); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 432 | ix0 += 1; |
| 433 | count -= 1; |
| 434 | } |
| 435 | } else { |
| 436 | int count = span.count(); |
| 437 | while (count >= 4) { |
| 438 | Sk4f px00, px10, px20, px30; |
| 439 | get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30); |
| 440 | Sk4f px01, px11, px21, px31; |
| 441 | get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31); |
| 442 | Sk4f pxS3 = px30 + px31; |
| 443 | Sk4f px0 = lerp(pxS3, pxB); |
| 444 | Sk4f pxS2 = px20 + px21; |
| 445 | Sk4f px1 = lerp(pxS2, pxS3); |
| 446 | Sk4f pxS1 = px10 + px11; |
| 447 | Sk4f px2 = lerp(pxS1, pxS2); |
| 448 | Sk4f pxS0 = px00 + px01; |
| 449 | Sk4f px3 = lerp(pxS0, pxS1); |
| 450 | pxB = pxS0; |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 451 | fNext->blend4Pixels(px0, px1, px2, px3); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 452 | ix0 -= 4; |
| 453 | count -= 4; |
| 454 | } |
| 455 | while (count > 0) { |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 456 | Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix0); |
| 457 | Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix0); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 458 | |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 459 | fNext->blendPixel(lerp(pixelY0, pixelY1)); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 460 | ix0 -= 1; |
| 461 | count -= 1; |
| 462 | } |
| 463 | } |
| 464 | } |
| 465 | |
| 466 | void bilerpSpanUnitRateAlignedX(Span span, SkScalar y1) { |
| 467 | SkScalar y0 = span.startY() - 0.5f; |
| 468 | y1 += 0.5f; |
| 469 | int iy0 = SkScalarFloorToInt(y0); |
| 470 | SkScalar filterY1 = y0 - iy0; |
| 471 | SkScalar filterY0 = 1.0f - filterY1; |
| 472 | int iy1 = SkScalarFloorToInt(y1); |
| 473 | int ix = SkScalarFloorToInt(span.startX()); |
| 474 | const void* rowY0 = fStrategy.row(iy0); |
| 475 | const void* rowY1 = fStrategy.row(iy1); |
| 476 | auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) { |
| 477 | return *pixelY0 * filterY0 + *pixelY1 * filterY1; |
| 478 | }; |
| 479 | |
| 480 | if (span.length() > 0) { |
| 481 | int count = span.count(); |
| 482 | while (count >= 4) { |
| 483 | Sk4f px00, px10, px20, px30; |
| 484 | fStrategy.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30); |
| 485 | Sk4f px01, px11, px21, px31; |
| 486 | fStrategy.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31); |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 487 | fNext->blend4Pixels( |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 488 | lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); |
| 489 | ix += 4; |
| 490 | count -= 4; |
| 491 | } |
| 492 | while (count > 0) { |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 493 | Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix); |
| 494 | Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 495 | |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 496 | fNext->blendPixel(lerp(&pixelY0, &pixelY1)); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 497 | ix += 1; |
| 498 | count -= 1; |
| 499 | } |
| 500 | } else { |
| 501 | int count = span.count(); |
| 502 | while (count >= 4) { |
| 503 | Sk4f px00, px10, px20, px30; |
| 504 | fStrategy.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00); |
| 505 | Sk4f px01, px11, px21, px31; |
| 506 | fStrategy.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01); |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 507 | fNext->blend4Pixels( |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 508 | lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); |
| 509 | ix -= 4; |
| 510 | count -= 4; |
| 511 | } |
| 512 | while (count > 0) { |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 513 | Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix); |
| 514 | Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 515 | |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 516 | fNext->blendPixel(lerp(&pixelY0, &pixelY1)); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 517 | ix -= 1; |
| 518 | count -= 1; |
| 519 | } |
| 520 | } |
| 521 | } |
| 522 | |
| 523 | // We're moving through source space faster than dst (zoomed out), |
| 524 | // so we'll never reuse a source pixel or be able to do contiguous loads. |
| 525 | void bilerpSpanFastRate(Span span, SkScalar y1) { |
| 526 | SkPoint start; |
| 527 | SkScalar length; |
| 528 | int count; |
| 529 | std::tie(start, length, count) = span; |
| 530 | SkScalar x = X(start); |
| 531 | SkScalar y = Y(start); |
| 532 | if (false && y == y1) { |
| 533 | struct BilerpWrapper { |
| 534 | void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) { |
| 535 | fSampler.bilerpListFew(n, xs, ys); |
| 536 | } |
| 537 | |
| 538 | void VECTORCALL pointList4(Sk4s xs, Sk4s ys) { |
| 539 | fSampler.bilerpList4(xs, ys); |
| 540 | } |
| 541 | |
| 542 | GeneralSampler& fSampler; |
| 543 | }; |
| 544 | BilerpWrapper wrapper{*this}; |
| 545 | span_fallback(span, &wrapper); |
| 546 | } else { |
| 547 | SkScalar dx = length / (count - 1); |
| 548 | Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f}; |
| 549 | while (count > 0) { |
| 550 | Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x}; |
| 551 | this->bilerpEdge(xs, ys); |
| 552 | x += dx; |
| 553 | count -= 1; |
| 554 | } |
| 555 | } |
| 556 | } |
| 557 | |
| 558 | Next* const fNext; |
| 559 | SourceStrategy fStrategy; |
| 560 | }; |
| 561 | |
| 562 | class sRGBFast { |
| 563 | public: |
| 564 | static Sk4s VECTORCALL sRGBToLinear(Sk4s pixel) { |
| 565 | Sk4s l = pixel * pixel; |
| 566 | return Sk4s{l[0], l[1], l[2], pixel[3]}; |
| 567 | } |
| 568 | }; |
| 569 | |
| 570 | enum class ColorOrder { |
| 571 | kRGBA = false, |
| 572 | kBGRA = true, |
| 573 | }; |
| 574 | template <SkColorProfileType colorProfile, ColorOrder colorOrder> |
| 575 | class Pixel8888 { |
| 576 | public: |
| 577 | Pixel8888(int width, const uint32_t* src) : fSrc{src}, fWidth{width}{ } |
| 578 | Pixel8888(const SkPixmap& srcPixmap) |
| 579 | : fSrc{srcPixmap.addr32()} |
| 580 | , fWidth{static_cast<int>(srcPixmap.rowBytes() / 4)} { } |
| 581 | |
| 582 | void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) { |
| 583 | Sk4i XIs = SkNx_cast<int, SkScalar>(xs); |
| 584 | Sk4i YIs = SkNx_cast<int, SkScalar>(ys); |
| 585 | Sk4i bufferLoc = YIs * fWidth + XIs; |
| 586 | switch (n) { |
| 587 | case 3: |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 588 | *px2 = this->getPixelAt(fSrc, bufferLoc[2]); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 589 | case 2: |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 590 | *px1 = this->getPixelAt(fSrc, bufferLoc[1]); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 591 | case 1: |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 592 | *px0 = this->getPixelAt(fSrc, bufferLoc[0]); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 593 | default: |
| 594 | break; |
| 595 | } |
| 596 | } |
| 597 | |
| 598 | void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { |
| 599 | Sk4i XIs = SkNx_cast<int, SkScalar>(xs); |
| 600 | Sk4i YIs = SkNx_cast<int, SkScalar>(ys); |
| 601 | Sk4i bufferLoc = YIs * fWidth + XIs; |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 602 | *px0 = this->getPixelAt(fSrc, bufferLoc[0]); |
| 603 | *px1 = this->getPixelAt(fSrc, bufferLoc[1]); |
| 604 | *px2 = this->getPixelAt(fSrc, bufferLoc[2]); |
| 605 | *px3 = this->getPixelAt(fSrc, bufferLoc[3]); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 606 | } |
| 607 | |
| 608 | void get4Pixels(const void* vsrc, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { |
| 609 | const uint32_t* src = static_cast<const uint32_t*>(vsrc); |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 610 | *px0 = this->getPixelAt(src, index + 0); |
| 611 | *px1 = this->getPixelAt(src, index + 1); |
| 612 | *px2 = this->getPixelAt(src, index + 2); |
| 613 | *px3 = this->getPixelAt(src, index + 3); |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 614 | } |
| 615 | |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 616 | Sk4f getPixelAt(const void* vsrc, int index) { |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 617 | const uint32_t* src = static_cast<const uint32_t*>(vsrc); |
| 618 | Sk4b bytePixel = Sk4b::Load((uint8_t *)(&src[index])); |
| 619 | Sk4f pixel = SkNx_cast<float, uint8_t>(bytePixel); |
| 620 | if (colorOrder == ColorOrder::kBGRA) { |
| 621 | pixel = SkNx_shuffle<2, 1, 0, 3>(pixel); |
| 622 | } |
| 623 | pixel = pixel * Sk4f{1.0f/255.0f}; |
| 624 | if (colorProfile == kSRGB_SkColorProfileType) { |
| 625 | pixel = sRGBFast::sRGBToLinear(pixel); |
| 626 | } |
| 627 | return pixel; |
| 628 | } |
| 629 | |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 630 | const void* row(int y) { return fSrc + y * fWidth[0]; } |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 631 | |
| 632 | private: |
| 633 | const uint32_t* const fSrc; |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 634 | const Sk4i fWidth; |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 635 | }; |
| 636 | using Pixel8888SRGB = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kRGBA>; |
| 637 | using Pixel8888LRGB = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kRGBA>; |
| 638 | using Pixel8888SBGR = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kBGRA>; |
| 639 | using Pixel8888LBGR = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kBGRA>; |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 640 | |
| 641 | template <SkColorProfileType colorProfile> |
| 642 | class PixelIndex8 { |
| 643 | public: |
| 644 | PixelIndex8(const SkPixmap& srcPixmap) |
| 645 | : fSrc{srcPixmap.addr8()}, fWidth{static_cast<int>(srcPixmap.rowBytes())} { |
| 646 | SkASSERT(srcPixmap.colorType() == kIndex_8_SkColorType); |
| 647 | SkColorTable* skColorTable = srcPixmap.ctable(); |
| 648 | SkASSERT(skColorTable != nullptr); |
| 649 | |
| 650 | fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); |
| 651 | for (int i = 0; i < skColorTable->count(); i++) { |
| 652 | fColorTable[i] = this->convertPixel((*skColorTable)[i]); |
| 653 | } |
| 654 | } |
| 655 | |
herb | 9e0efe5 | 2016-04-08 13:25:28 -0700 | [diff] [blame^] | 656 | PixelIndex8(const PixelIndex8& strategy) |
| 657 | : fSrc{strategy.fSrc}, fWidth{strategy.fWidth} { |
| 658 | fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); |
| 659 | // TODO: figure out the count. |
| 660 | for (int i = 0; i < 256; i++) { |
| 661 | fColorTable[i] = strategy.fColorTable[i]; |
| 662 | } |
| 663 | } |
| 664 | |
herb | 222f8ff | 2016-03-23 15:14:23 -0700 | [diff] [blame] | 665 | void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) { |
| 666 | Sk4i XIs = SkNx_cast<int, SkScalar>(xs); |
| 667 | Sk4i YIs = SkNx_cast<int, SkScalar>(ys); |
| 668 | Sk4i bufferLoc = YIs * fWidth + XIs; |
| 669 | switch (n) { |
| 670 | case 3: |
| 671 | *px2 = this->getPixelAt(fSrc, bufferLoc[2]); |
| 672 | case 2: |
| 673 | *px1 = this->getPixelAt(fSrc, bufferLoc[1]); |
| 674 | case 1: |
| 675 | *px0 = this->getPixelAt(fSrc, bufferLoc[0]); |
| 676 | default: |
| 677 | break; |
| 678 | } |
| 679 | } |
| 680 | |
| 681 | void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { |
| 682 | Sk4i XIs = SkNx_cast<int, SkScalar>(xs); |
| 683 | Sk4i YIs = SkNx_cast<int, SkScalar>(ys); |
| 684 | Sk4i bufferLoc = YIs * fWidth + XIs; |
| 685 | *px0 = this->getPixelAt(fSrc, bufferLoc[0]); |
| 686 | *px1 = this->getPixelAt(fSrc, bufferLoc[1]); |
| 687 | *px2 = this->getPixelAt(fSrc, bufferLoc[2]); |
| 688 | *px3 = this->getPixelAt(fSrc, bufferLoc[3]); |
| 689 | } |
| 690 | |
| 691 | void get4Pixels(const void* vsrc, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { |
| 692 | *px0 = this->getPixelAt(vsrc, index + 0); |
| 693 | *px1 = this->getPixelAt(vsrc, index + 1); |
| 694 | *px2 = this->getPixelAt(vsrc, index + 2); |
| 695 | *px3 = this->getPixelAt(vsrc, index + 3); |
| 696 | } |
| 697 | |
| 698 | Sk4f getPixelAt(const void* vsrc, int index) { |
| 699 | const uint8_t* src = static_cast<const uint8_t*>(vsrc); |
| 700 | return getPixel(src + index); |
| 701 | } |
| 702 | |
| 703 | Sk4f getPixel(const uint8_t* src) { |
| 704 | Sk4f pixel = fColorTable[*src]; |
| 705 | return pixel; |
| 706 | } |
| 707 | |
| 708 | const void* row(int y) { return fSrc + y * fWidth[0]; } |
| 709 | |
| 710 | private: |
| 711 | static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12; |
| 712 | Sk4f convertPixel(SkPMColor pmColor) { |
| 713 | Sk4b bPixel = Sk4b::Load(&pmColor); |
| 714 | Sk4f pixel = SkNx_cast<float, uint8_t>(bPixel); |
| 715 | float alpha = pixel[3]; |
| 716 | if (alpha != 0.0f) { |
| 717 | float invAlpha = 1.0f / pixel[3]; |
| 718 | Sk4f normalize = {invAlpha, invAlpha, invAlpha, 1.0f / 255.0f}; |
| 719 | pixel = pixel * normalize; |
| 720 | if (colorProfile == kSRGB_SkColorProfileType) { |
| 721 | pixel = sRGBFast::sRGBToLinear(pixel); |
| 722 | } |
| 723 | return pixel; |
| 724 | } else { |
| 725 | return Sk4f{0.0f}; |
| 726 | } |
| 727 | } |
| 728 | const uint8_t* const fSrc; |
| 729 | const Sk4i fWidth; |
| 730 | SkAutoMalloc fColorTableStorage{kColorTableSize}; |
| 731 | Sk4f* fColorTable; |
| 732 | }; |
| 733 | |
| 734 | using PixelIndex8SRGB = PixelIndex8<kSRGB_SkColorProfileType>; |
| 735 | using PixelIndex8LRGB = PixelIndex8<kLinear_SkColorProfileType>; |
| 736 | |
herb | 6eff52a | 2016-03-23 09:00:33 -0700 | [diff] [blame] | 737 | } // namespace |
| 738 | |
| 739 | #endif // SkLinearBitmapPipeline_sampler_DEFINED |