update image blur to use SkVx
This will allow more flexible data types for fixing the blur sigma limit.
Change-Id: I14b6222ea80f458deab08166a3930e4862f3b229
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/439116
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Herb Derby <herb@google.com>
diff --git a/src/effects/imagefilters/SkBlurImageFilter.cpp b/src/effects/imagefilters/SkBlurImageFilter.cpp
index 58d1a15..ef9b674 100644
--- a/src/effects/imagefilters/SkBlurImageFilter.cpp
+++ b/src/effects/imagefilters/SkBlurImageFilter.cpp
@@ -11,9 +11,9 @@
#include "include/core/SkTileMode.h"
#include "include/effects/SkImageFilters.h"
#include "include/private/SkColorData.h"
-#include "include/private/SkNx.h"
#include "include/private/SkTFitsIn.h"
#include "include/private/SkTPin.h"
+#include "include/private/SkVx.h"
#include "src/core/SkArenaAlloc.h"
#include "src/core/SkAutoPixmapStorage.h"
#include "src/core/SkGpuBlurUtils.h"
@@ -216,9 +216,9 @@
//
// This is all encapsulated in the processValue function below.
//
-using Pass0And1 = Sk4u[2];
+using Pass0And1 = skvx::Vec<4, uint32_t>[2];
// The would be dLeft parameter is assumed to be 0.
-static void blur_one_direction(Sk4u* buffer, int window,
+static void blur_one_direction(skvx::Vec<4, uint32_t>* buffer, int window,
int srcLeft, int srcRight, int dstRight,
const uint32_t* src, int srcXStride, int srcYStride, int srcH,
uint32_t* dst, int dstXStride, int dstYStride) {
@@ -228,10 +228,10 @@
pass1Count = window - 1,
pass2Count = (window & 1) == 1 ? window - 1 : window;
- Pass0And1* buffer01Start = (Pass0And1*)buffer;
- Sk4u* buffer2Start = buffer + pass0Count + pass1Count;
- Pass0And1* buffer01End = (Pass0And1*)buffer2Start;
- Sk4u* buffer2End = buffer2Start + pass2Count;
+ Pass0And1* buffer01Start = (Pass0And1*)buffer;
+ skvx::Vec<4, uint32_t>* buffer2Start = buffer + pass0Count + pass1Count;
+ Pass0And1* buffer01End = (Pass0And1*)buffer2Start;
+ skvx::Vec<4, uint32_t>* buffer2End = buffer2Start + pass2Count;
// If the window is odd then the divisor is just window ^ 3 otherwise,
// it is window * window * (window + 1) = window ^ 3 + window ^ 2;
@@ -250,7 +250,7 @@
// h == d/2 + 1/2 == (d + 1) / 2
//
// weight = 1 / d * 2 ^ 32
- auto weight = static_cast<uint32_t>(round(1.0 / divisor * (1ull << 32)));
+ auto weight = static_cast<uint64_t>(round((1.0 / divisor) * (1ull << 32)));
auto half = static_cast<uint32_t>((divisor + 1) / 2);
auto border = calculate_border(window);
@@ -264,19 +264,23 @@
auto buffer01Cursor = buffer01Start;
auto buffer2Cursor = buffer2Start;
- Sk4u sum0{0u};
- Sk4u sum1{0u};
- Sk4u sum2{half};
+ skvx::Vec<4, uint32_t> sum0{0u, 0u, 0u, 0u};
+ skvx::Vec<4, uint32_t> sum1{0u, 0u, 0u, 0u};
+ skvx::Vec<4, uint32_t> sum2{half, half, half, half};
- sk_bzero(buffer01Start, (buffer2End - (Sk4u *) (buffer01Start)) * sizeof(*buffer2Start));
+ sk_bzero(
+ buffer01Start,
+ (buffer2End - (skvx::Vec<4, uint32_t> *) (buffer01Start)) * sizeof(*buffer2Start));
// Given an expanded input pixel, move the window ahead using the leadingEdge value.
- auto processValue = [&](const Sk4u& leadingEdge) -> Sk4u {
+ auto processValue =
+ [&](const skvx::Vec<4, uint32_t>& leadingEdge) -> skvx::Vec<4, uint32_t> {
sum0 += leadingEdge;
sum1 += sum0;
sum2 += sum1;
- Sk4u value = sum2.mulHi(weight);
+ skvx::Vec<4, uint64_t> w = skvx::cast<uint64_t>(sum2) * weight;
+ skvx::Vec<4, uint32_t> value = skvx::cast<uint32_t>(w >> 32);
sum2 -= *buffer2Cursor;
*buffer2Cursor = sum1;
@@ -310,7 +314,9 @@
// The edge of the source is before the edge of the destination. Calculate the sums for
// the pixels before the start of the destination.
while (dstIdx > srcIdx) {
- Sk4u leadingEdge = srcIdx < srcEnd ? SkNx_cast<uint32_t>(Sk4b::Load(srcCursor)) : 0;
+ skvx::Vec<4, uint32_t> leadingEdge =
+ srcIdx < srcEnd ? skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor))
+ : 0;
(void) processValue(leadingEdge);
srcCursor += srcXStride;
srcIdx++;
@@ -320,8 +326,9 @@
// Consume the source generating pixels to dst.
auto loopEnd = std::min(dstEnd, srcEnd);
while (dstIdx < loopEnd) {
- Sk4u leadingEdge = SkNx_cast<uint32_t>(Sk4b::Load(srcCursor));
- SkNx_cast<uint8_t>(processValue(leadingEdge)).store(dstCursor);
+ skvx::Vec<4, uint32_t> leadingEdge =
+ skvx::cast<uint32_t>(skvx::Vec<4, uint8_t>::Load(srcCursor));
+ skvx::cast<uint8_t>(processValue(leadingEdge)).store(dstCursor);
srcCursor += srcXStride;
dstCursor += dstXStride;
SK_PREFETCH(dstCursor);
@@ -332,7 +339,7 @@
// are now 0x0000 until the end of the destination.
loopEnd = dstEnd;
while (dstIdx < loopEnd) {
- SkNx_cast<uint8_t>(processValue(0u)).store(dstCursor);
+ skvx::cast<uint8_t>(processValue(0u)).store(dstCursor);
dstCursor += dstXStride;
SK_PREFETCH(dstCursor);
dstIdx++;
@@ -455,7 +462,8 @@
// The amount 1024 is enough for buffers up to 10 sigma. The tmp bitmap will be
// allocated on the heap.
SkSTArenaAlloc<1024> alloc;
- Sk4u* buffer = alloc.makeArrayDefault<Sk4u>(std::max(bufferSizeW, bufferSizeH));
+ skvx::Vec<4, uint32_t>* buffer =
+ alloc.makeArrayDefault<skvx::Vec<4, uint32_t>>(std::max(bufferSizeW, bufferSizeH));
// Basic Plan: The three cases to handle
// * Horizontal and Vertical - blur horizontally while copying values from the source to