Reland "have SkConvertPixels use SkColorSpaceXformSteps"
This is a reland of 6d0e566e941b09f5f9fb9f5a95c123459409c4ad
On second thought, it's probably better to correct the
types of the swizzle functions to express their required alignment.
This is a more involved CL, but I think leaves things better off.
Original change's description:
> have SkConvertPixels use SkColorSpaceXformSteps
>
> This ought to allow the fast paths in more cases, e.g. memcpy() when
> both src and dst are the same format. Today if we tag a dst color space
> at all, we'll think we need to fall back to the general case pipeline.
>
> Some refactoring too, but no big functional change beyond using steps.
>
> Change-Id: I8fa01025229e3b9418e7f43241a2f03628a97288
> Reviewed-on: https://skia-review.googlesource.com/155640
> Reviewed-by: Brian Osman <brianosman@google.com>
> Commit-Queue: Mike Klein <mtklein@google.com>
Change-Id: Ia17d93acfe88a36c4c36d29e3a0b243f91178b61
Reviewed-on: https://skia-review.googlesource.com/156241
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
diff --git a/src/codec/SkPngCodec.cpp b/src/codec/SkPngCodec.cpp
index 0e7cd2b..b986121 100644
--- a/src/codec/SkPngCodec.cpp
+++ b/src/codec/SkPngCodec.cpp
@@ -293,10 +293,10 @@
#endif
if (is_rgba(tableColorType)) {
- SkOpts::RGB_to_RGB1(colorTable + numColorsWithAlpha, palette,
+ SkOpts::RGB_to_RGB1(colorTable + numColorsWithAlpha, (const uint8_t*)palette,
numColors - numColorsWithAlpha);
} else {
- SkOpts::RGB_to_BGR1(colorTable + numColorsWithAlpha, palette,
+ SkOpts::RGB_to_BGR1(colorTable + numColorsWithAlpha, (const uint8_t*)palette,
numColors - numColorsWithAlpha);
}
}
diff --git a/src/codec/SkSwizzler.cpp b/src/codec/SkSwizzler.cpp
index cca8c41..e1e2ecb 100644
--- a/src/codec/SkSwizzler.cpp
+++ b/src/codec/SkSwizzler.cpp
@@ -475,7 +475,7 @@
// sampling, deltaSrc should equal bpp.
SkASSERT(deltaSrc == bpp);
- SkOpts::RGBA_to_rgbA((uint32_t*) dst, src + offset, width);
+ SkOpts::RGBA_to_rgbA((uint32_t*) dst, (const uint32_t*)(src + offset), width);
}
static void fast_swizzle_rgba_to_bgra_premul(
@@ -486,7 +486,7 @@
// sampling, deltaSrc should equal bpp.
SkASSERT(deltaSrc == bpp);
- SkOpts::RGBA_to_bgrA((uint32_t*) dst, src + offset, width);
+ SkOpts::RGBA_to_bgrA((uint32_t*) dst, (const uint32_t*)(src + offset), width);
}
static void swizzle_rgba_to_bgra_unpremul(
@@ -510,7 +510,7 @@
// sampling, deltaSrc should equal bpp.
SkASSERT(deltaSrc == bpp);
- SkOpts::RGBA_to_BGRA((uint32_t*) dst, src + offset, width);
+ SkOpts::RGBA_to_BGRA((uint32_t*) dst, (const uint32_t*)(src + offset), width);
}
// 16-bits per component kRGB and kRGBA
@@ -705,7 +705,7 @@
// sampling, deltaSrc should equal bpp.
SkASSERT(deltaSrc == bpp);
- SkOpts::inverted_CMYK_to_RGB1((uint32_t*) dst, src + offset, width);
+ SkOpts::inverted_CMYK_to_RGB1((uint32_t*) dst, (const uint32_t*)(src + offset), width);
}
static void fast_swizzle_cmyk_to_bgra(
@@ -716,7 +716,7 @@
// sampling, deltaSrc should equal bpp.
SkASSERT(deltaSrc == bpp);
- SkOpts::inverted_CMYK_to_BGR1((uint32_t*) dst, src + offset, width);
+ SkOpts::inverted_CMYK_to_BGR1((uint32_t*) dst, (const uint32_t*)(src + offset), width);
}
static void swizzle_cmyk_to_565(
diff --git a/src/core/SkConvertPixels.cpp b/src/core/SkConvertPixels.cpp
index 373bc00..b613a8b 100644
--- a/src/core/SkConvertPixels.cpp
+++ b/src/core/SkConvertPixels.cpp
@@ -16,87 +16,102 @@
#include "SkUnPreMultiplyPriv.h"
#include "../jumper/SkJumper.h"
-// Fast Path 1: The memcpy() case.
-static inline bool can_memcpy(const SkImageInfo& dstInfo, const SkImageInfo& srcInfo) {
+static bool rect_memcpy(const SkImageInfo& dstInfo, void* dstPixels, size_t dstRB,
+ const SkImageInfo& srcInfo, const void* srcPixels, size_t srcRB,
+ const SkColorSpaceXformSteps& steps) {
+ // We can copy the pixels when no color type, alpha type, or color space changes.
if (dstInfo.colorType() != srcInfo.colorType()) {
return false;
}
-
- if (kAlpha_8_SkColorType == dstInfo.colorType()) {
- return true;
- }
-
- if (dstInfo.alphaType() != srcInfo.alphaType() &&
- kOpaque_SkAlphaType != dstInfo.alphaType() &&
- kOpaque_SkAlphaType != srcInfo.alphaType())
- {
- // We need to premultiply or unpremultiply.
+ if (dstInfo.colorType() != kAlpha_8_SkColorType
+ && steps.flags.mask() != 0b00000) {
return false;
}
- return !dstInfo.colorSpace() ||
- SkColorSpace::Equals(dstInfo.colorSpace(), srcInfo.colorSpace());
+ SkRectMemcpy(dstPixels, dstRB,
+ srcPixels, srcRB, dstInfo.minRowBytes(), dstInfo.height());
+ return true;
}
-// Fast Path 2: Simple swizzles and premuls.
-enum AlphaVerb {
- kNothing_AlphaVerb,
- kPremul_AlphaVerb,
- kUnpremul_AlphaVerb,
-};
-
-template <bool kSwapRB>
-static void wrap_unpremultiply(uint32_t* dst, const void* src, int count) {
- SkUnpremultiplyRow<kSwapRB>(dst, (const uint32_t*) src, count);
-}
-
-void swizzle_and_multiply(const SkImageInfo& dstInfo, void* dstPixels, size_t dstRB,
- const SkImageInfo& srcInfo, const void* srcPixels, size_t srcRB) {
- void (*proc)(uint32_t* dst, const void* src, int count);
- const bool swapRB = dstInfo.colorType() != srcInfo.colorType();
- AlphaVerb alphaVerb = kNothing_AlphaVerb;
- if (kPremul_SkAlphaType == dstInfo.alphaType() &&
- kUnpremul_SkAlphaType == srcInfo.alphaType())
- {
- alphaVerb = kPremul_AlphaVerb;
- } else if (kUnpremul_SkAlphaType == dstInfo.alphaType() &&
- kPremul_SkAlphaType == srcInfo.alphaType()) {
- alphaVerb = kUnpremul_AlphaVerb;
+static bool swizzle_and_multiply(const SkImageInfo& dstInfo, void* dstPixels, size_t dstRB,
+ const SkImageInfo& srcInfo, const void* srcPixels, size_t srcRB,
+ const SkColorSpaceXformSteps& steps) {
+ auto is_8888 = [](SkColorType ct) {
+ return ct == kRGBA_8888_SkColorType || ct == kBGRA_8888_SkColorType;
+ };
+ if (!is_8888(dstInfo.colorType()) ||
+ !is_8888(srcInfo.colorType()) ||
+ steps.flags.linearize || steps.flags.gamut_transform || steps.flags.encode) {
+ return false;
}
- switch (alphaVerb) {
- case kNothing_AlphaVerb:
- // If we do not need to swap or multiply, we should hit the memcpy case.
- SkASSERT(swapRB);
- proc = SkOpts::RGBA_to_BGRA;
- break;
- case kPremul_AlphaVerb:
- proc = swapRB ? SkOpts::RGBA_to_bgrA : SkOpts::RGBA_to_rgbA;
- break;
- case kUnpremul_AlphaVerb:
- proc = swapRB ? wrap_unpremultiply<true> : wrap_unpremultiply<false>;
- break;
+ // It'd be kind of silly for us to both...
+ SkASSERT(!(steps.flags.premul && steps.flags.unpremul));
+
+ const bool swapRB = dstInfo.colorType() != srcInfo.colorType();
+
+ void (*fn)(uint32_t*, const uint32_t*, int) = nullptr;
+
+ if (steps.flags.premul) {
+ fn = swapRB ? SkOpts::RGBA_to_bgrA
+ : SkOpts::RGBA_to_rgbA;
+ } else if (steps.flags.unpremul) {
+ fn = swapRB ? SkUnpremultiplyRow<true>
+ : SkUnpremultiplyRow<false>;
+ } else {
+ // If we're not swizzling, we ought to have used rect_memcpy().
+ SkASSERT(swapRB);
+ fn = SkOpts::RGBA_to_BGRA;
}
for (int y = 0; y < dstInfo.height(); y++) {
- proc((uint32_t*) dstPixels, srcPixels, dstInfo.width());
+ fn((uint32_t*)dstPixels, (const uint32_t*)srcPixels, dstInfo.width());
dstPixels = SkTAddOffset<void>(dstPixels, dstRB);
srcPixels = SkTAddOffset<const void>(srcPixels, srcRB);
}
+ return true;
}
-// Fast Path 3: Alpha 8 dsts.
-static void convert_to_alpha8(uint8_t* dst, size_t dstRB, const SkImageInfo& srcInfo,
- const void* src, size_t srcRB) {
- if (srcInfo.isOpaque()) {
- for (int y = 0; y < srcInfo.height(); ++y) {
- memset(dst, 0xFF, srcInfo.width());
- dst = SkTAddOffset<uint8_t>(dst, dstRB);
- }
- return;
+static bool convert_to_alpha8(const SkImageInfo& dstInfo, void* vdst, size_t dstRB,
+ const SkImageInfo& srcInfo, const void* src, size_t srcRB,
+ const SkColorSpaceXformSteps&) {
+ if (dstInfo.colorType() != kAlpha_8_SkColorType) {
+ return false;
}
+ auto dst = (uint8_t*)vdst;
switch (srcInfo.colorType()) {
+ case kUnknown_SkColorType:
+ case kAlpha_8_SkColorType: {
+ // Unknown should never happen.
+ // Alpha8 should have been handled by rect_memcpy().
+ SkASSERT(false);
+ return false;
+ }
+
+ case kGray_8_SkColorType:
+ case kRGB_565_SkColorType:
+ case kRGB_888x_SkColorType:
+ case kRGB_101010x_SkColorType: {
+ for (int y = 0; y < srcInfo.height(); ++y) {
+ memset(dst, 0xFF, srcInfo.width());
+ dst = SkTAddOffset<uint8_t>(dst, dstRB);
+ }
+ return true;
+ }
+
+ case kARGB_4444_SkColorType: {
+ auto src16 = (const uint16_t*) src;
+ for (int y = 0; y < srcInfo.height(); y++) {
+ for (int x = 0; x < srcInfo.width(); x++) {
+ dst[x] = SkPacked4444ToA32(src16[x]);
+ }
+ dst = SkTAddOffset<uint8_t>(dst, dstRB);
+ src16 = SkTAddOffset<const uint16_t>(src16, srcRB);
+ }
+ return true;
+ }
+
case kBGRA_8888_SkColorType:
case kRGBA_8888_SkColorType: {
auto src32 = (const uint32_t*) src;
@@ -107,43 +122,21 @@
dst = SkTAddOffset<uint8_t>(dst, dstRB);
src32 = SkTAddOffset<const uint32_t>(src32, srcRB);
}
- break;
+ return true;
}
+
case kRGBA_1010102_SkColorType: {
auto src32 = (const uint32_t*) src;
for (int y = 0; y < srcInfo.height(); y++) {
for (int x = 0; x < srcInfo.width(); x++) {
- switch (src32[x] >> 30) {
- case 0:
- dst[x] = 0;
- break;
- case 1:
- dst[x] = 0x55;
- break;
- case 2:
- dst[x] = 0xAA;
- break;
- case 3:
- dst[x] = 0xFF;
- break;
- }
+ dst[x] = (src32[x] >> 30) * 0x55;
}
dst = SkTAddOffset<uint8_t>(dst, dstRB);
src32 = SkTAddOffset<const uint32_t>(src32, srcRB);
}
- break;
+ return true;
}
- case kARGB_4444_SkColorType: {
- auto src16 = (const uint16_t*) src;
- for (int y = 0; y < srcInfo.height(); y++) {
- for (int x = 0; x < srcInfo.width(); x++) {
- dst[x] = SkPacked4444ToA32(src16[x]);
- }
- dst = SkTAddOffset<uint8_t>(dst, dstRB);
- src16 = SkTAddOffset<const uint16_t>(src16, srcRB);
- }
- break;
- }
+
case kRGBA_F16_SkColorType: {
auto src64 = (const uint64_t*) src;
for (int y = 0; y < srcInfo.height(); y++) {
@@ -153,8 +146,9 @@
dst = SkTAddOffset<uint8_t>(dst, dstRB);
src64 = SkTAddOffset<const uint64_t>(src64, srcRB);
}
- break;
+ return true;
}
+
case kRGBA_F32_SkColorType: {
auto rgba = (const float*)src;
for (int y = 0; y < srcInfo.height(); y++) {
@@ -164,25 +158,22 @@
dst = SkTAddOffset<uint8_t>(dst, dstRB);
rgba = SkTAddOffset<const float>(rgba, srcRB);
}
- } break;
- default:
- SkASSERT(false);
- break;
+ return true;
+ }
}
+ return false;
}
// Default: Use the pipeline.
static void convert_with_pipeline(const SkImageInfo& dstInfo, void* dstRow, size_t dstRB,
- const SkImageInfo& srcInfo, const void* srcRow, size_t srcRB) {
+ const SkImageInfo& srcInfo, const void* srcRow, size_t srcRB,
+ const SkColorSpaceXformSteps& steps) {
SkJumper_MemoryCtx src = { (void*)srcRow, (int)(srcRB / srcInfo.bytesPerPixel()) },
dst = { (void*)dstRow, (int)(dstRB / dstInfo.bytesPerPixel()) };
SkRasterPipeline_<256> pipeline;
pipeline.append_load(srcInfo.colorType(), &src);
-
- SkColorSpaceXformSteps steps{srcInfo.colorSpace(), srcInfo.alphaType(),
- dstInfo.colorSpace(), dstInfo.alphaType()};
steps.apply(&pipeline);
// We'll dither if we're decreasing precision below 32-bit.
@@ -202,40 +193,18 @@
pipeline.run(0,0, srcInfo.width(), srcInfo.height());
}
-static bool swizzle_and_multiply_color_type(SkColorType ct) {
- switch (ct) {
- case kRGBA_8888_SkColorType:
- case kBGRA_8888_SkColorType:
- return true;
- default:
- return false;
- }
-}
-
-void SkConvertPixels(const SkImageInfo& dstInfo, void* dstPixels, size_t dstRB,
+void SkConvertPixels(const SkImageInfo& dstInfo, void* dstPixels, size_t dstRB,
const SkImageInfo& srcInfo, const void* srcPixels, size_t srcRB) {
SkASSERT(dstInfo.dimensions() == srcInfo.dimensions());
SkASSERT(SkImageInfoValidConversion(dstInfo, srcInfo));
- // Fast Path 1: The memcpy() case.
- if (can_memcpy(dstInfo, srcInfo)) {
- SkRectMemcpy(dstPixels, dstRB, srcPixels, srcRB, dstInfo.minRowBytes(), dstInfo.height());
- return;
- }
+ SkColorSpaceXformSteps steps{srcInfo.colorSpace(), srcInfo.alphaType(),
+ dstInfo.colorSpace(), dstInfo.alphaType()};
- // Fast Path 2: Simple swizzles and premuls.
- if (swizzle_and_multiply_color_type(srcInfo.colorType()) &&
- swizzle_and_multiply_color_type(dstInfo.colorType()) && !dstInfo.colorSpace()) {
- swizzle_and_multiply(dstInfo, dstPixels, dstRB, srcInfo, srcPixels, srcRB);
- return;
+ for (auto fn : {rect_memcpy, swizzle_and_multiply, convert_to_alpha8}) {
+ if (fn(dstInfo, dstPixels, dstRB, srcInfo, srcPixels, srcRB, steps)) {
+ return;
+ }
}
-
- // Fast Path 3: Alpha 8 dsts.
- if (kAlpha_8_SkColorType == dstInfo.colorType()) {
- convert_to_alpha8((uint8_t*) dstPixels, dstRB, srcInfo, srcPixels, srcRB);
- return;
- }
-
- // Default: Use the pipeline.
- convert_with_pipeline(dstInfo, dstPixels, dstRB, srcInfo, srcPixels, srcRB);
+ convert_with_pipeline(dstInfo, dstPixels, dstRB, srcInfo, srcPixels, srcRB, steps);
}
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 7f4e066..862afac 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -33,17 +33,19 @@
extern void (*blit_row_s32a_opaque)(SkPMColor*, const SkPMColor*, int, U8CPU);
// Swizzle input into some sort of 8888 pixel, {premul,unpremul} x {rgba,bgra}.
- typedef void (*Swizzle_8888)(uint32_t*, const void*, int);
- extern Swizzle_8888 RGBA_to_BGRA, // i.e. just swap RB
- RGBA_to_rgbA, // i.e. just premultiply
- RGBA_to_bgrA, // i.e. swap RB and premultiply
- RGB_to_RGB1, // i.e. insert an opaque alpha
- RGB_to_BGR1, // i.e. swap RB and insert an opaque alpha
- gray_to_RGB1, // i.e. expand to color channels + an opaque alpha
- grayA_to_RGBA, // i.e. expand to color channels
- grayA_to_rgbA, // i.e. expand to color channels and premultiply
- inverted_CMYK_to_RGB1, // i.e. convert color space
- inverted_CMYK_to_BGR1; // i.e. convert color space
+ typedef void (*Swizzle_8888_u32)(uint32_t*, const uint32_t*, int);
+ extern Swizzle_8888_u32 RGBA_to_BGRA, // i.e. just swap RB
+ RGBA_to_rgbA, // i.e. just premultiply
+ RGBA_to_bgrA, // i.e. swap RB and premultiply
+ inverted_CMYK_to_RGB1, // i.e. convert color space
+ inverted_CMYK_to_BGR1; // i.e. convert color space
+
+ typedef void (*Swizzle_8888_u8)(uint32_t*, const uint8_t*, int);
+ extern Swizzle_8888_u8 RGB_to_RGB1, // i.e. insert an opaque alpha
+ RGB_to_BGR1, // i.e. swap RB and insert an opaque alpha
+ gray_to_RGB1, // i.e. expand to color channels + an opaque alpha
+ grayA_to_RGBA, // i.e. expand to color channels
+ grayA_to_rgbA; // i.e. expand to color channels and premultiply
extern void (*memset16)(uint16_t[], uint16_t, int);
extern void SK_API (*memset32)(uint32_t[], uint32_t, int);
diff --git a/src/gpu/GrDrawOpAtlas.cpp b/src/gpu/GrDrawOpAtlas.cpp
index d891c02..9c994a0 100644
--- a/src/gpu/GrDrawOpAtlas.cpp
+++ b/src/gpu/GrDrawOpAtlas.cpp
@@ -112,7 +112,7 @@
// copy into the data buffer, swizzling as we go if this is ARGB data
if (4 == fBytesPerPixel && kSkia8888_GrPixelConfig == kBGRA_8888_GrPixelConfig) {
for (int i = 0; i < height; ++i) {
- SkOpts::RGBA_to_BGRA(reinterpret_cast<uint32_t*>(dataPtr), imagePtr, width);
+ SkOpts::RGBA_to_BGRA((uint32_t*)dataPtr, (const uint32_t*)imagePtr, width);
dataPtr += fBytesPerPixel * fWidth;
imagePtr += rowBytes;
}
diff --git a/src/opts/SkSwizzler_opts.h b/src/opts/SkSwizzler_opts.h
index 892dc31..82eb7b6 100644
--- a/src/opts/SkSwizzler_opts.h
+++ b/src/opts/SkSwizzler_opts.h
@@ -20,8 +20,7 @@
namespace SK_OPTS_NS {
-static void RGBA_to_rgbA_portable(uint32_t* dst, const void* vsrc, int count) {
- auto src = (const uint32_t*)vsrc;
+static void RGBA_to_rgbA_portable(uint32_t* dst, const uint32_t* src, int count) {
for (int i = 0; i < count; i++) {
uint8_t a = src[i] >> 24,
b = src[i] >> 16,
@@ -37,8 +36,7 @@
}
}
-static void RGBA_to_bgrA_portable(uint32_t* dst, const void* vsrc, int count) {
- auto src = (const uint32_t*)vsrc;
+static void RGBA_to_bgrA_portable(uint32_t* dst, const uint32_t* src, int count) {
for (int i = 0; i < count; i++) {
uint8_t a = src[i] >> 24,
b = src[i] >> 16,
@@ -54,8 +52,7 @@
}
}
-static void RGBA_to_BGRA_portable(uint32_t* dst, const void* vsrc, int count) {
- auto src = (const uint32_t*)vsrc;
+static void RGBA_to_BGRA_portable(uint32_t* dst, const uint32_t* src, int count) {
for (int i = 0; i < count; i++) {
uint8_t a = src[i] >> 24,
b = src[i] >> 16,
@@ -68,8 +65,7 @@
}
}
-static void RGB_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*)vsrc;
+static void RGB_to_RGB1_portable(uint32_t dst[], const uint8_t* src, int count) {
for (int i = 0; i < count; i++) {
uint8_t r = src[0],
g = src[1],
@@ -82,8 +78,7 @@
}
}
-static void RGB_to_BGR1_portable(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*)vsrc;
+static void RGB_to_BGR1_portable(uint32_t dst[], const uint8_t* src, int count) {
for (int i = 0; i < count; i++) {
uint8_t r = src[0],
g = src[1],
@@ -96,8 +91,7 @@
}
}
-static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*)vsrc;
+static void gray_to_RGB1_portable(uint32_t dst[], const uint8_t* src, int count) {
for (int i = 0; i < count; i++) {
dst[i] = (uint32_t)0xFF << 24
| (uint32_t)src[i] << 16
@@ -106,8 +100,7 @@
}
}
-static void grayA_to_RGBA_portable(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*)vsrc;
+static void grayA_to_RGBA_portable(uint32_t dst[], const uint8_t* src, int count) {
for (int i = 0; i < count; i++) {
uint8_t g = src[0],
a = src[1];
@@ -119,8 +112,7 @@
}
}
-static void grayA_to_rgbA_portable(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*)vsrc;
+static void grayA_to_rgbA_portable(uint32_t dst[], const uint8_t* src, int count) {
for (int i = 0; i < count; i++) {
uint8_t g = src[0],
a = src[1];
@@ -133,8 +125,7 @@
}
}
-static void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int count) {
- const uint32_t* src = (const uint32_t*)vsrc;
+static void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const uint32_t* src, int count) {
for (int i = 0; i < count; i++) {
uint8_t k = src[i] >> 24,
y = src[i] >> 16,
@@ -151,8 +142,7 @@
}
}
-static void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int count) {
- const uint32_t* src = (const uint32_t*)vsrc;
+static void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const uint32_t* src, int count) {
for (int i = 0; i < count; i++) {
uint8_t k = src[i] >> 24,
y = src[i] >> 16,
@@ -200,8 +190,7 @@
}
template <bool kSwapRB>
-static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) {
- auto src = (const uint32_t*)vsrc;
+static void premul_should_swapRB(uint32_t* dst, const uint32_t* src, int count) {
while (count >= 8) {
// Load 8 pixels.
uint8x8x4_t rgba = vld4_u8((const uint8_t*) src);
@@ -237,17 +226,16 @@
proc(dst, src, count);
}
-/*not static*/ inline void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {
+/*not static*/ inline void RGBA_to_rgbA(uint32_t* dst, const uint32_t* src, int count) {
premul_should_swapRB<false>(dst, src, count);
}
-/*not static*/ inline void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) {
+/*not static*/ inline void RGBA_to_bgrA(uint32_t* dst, const uint32_t* src, int count) {
premul_should_swapRB<true>(dst, src, count);
}
-/*not static*/ inline void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) {
+/*not static*/ inline void RGBA_to_BGRA(uint32_t* dst, const uint32_t* src, int count) {
using std::swap;
- auto src = (const uint32_t*)vsrc;
while (count >= 16) {
// Load 16 pixels.
uint8x16x4_t rgba = vld4q_u8((const uint8_t*) src);
@@ -280,8 +268,7 @@
}
template <bool kSwapRB>
-static void insert_alpha_should_swaprb(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*) vsrc;
+static void insert_alpha_should_swaprb(uint32_t dst[], const uint8_t* src, int count) {
while (count >= 16) {
// Load 16 pixels.
uint8x16x3_t rgb = vld3q_u8(src);
@@ -333,16 +320,15 @@
proc(dst, src, count);
}
-/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
insert_alpha_should_swaprb<false>(dst, src, count);
}
-/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const uint8_t* src, int count) {
insert_alpha_should_swaprb<true>(dst, src, count);
}
-/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*) vsrc;
+/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
while (count >= 16) {
// Load 16 pixels.
uint8x16_t gray = vld1q_u8(src);
@@ -383,8 +369,7 @@
}
template <bool kPremul>
-static void expand_grayA(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*) vsrc;
+static void expand_grayA(uint32_t dst[], const uint8_t* src, int count) {
while (count >= 16) {
// Load 16 pixels.
uint8x16x2_t ga = vld2q_u8(src);
@@ -437,18 +422,17 @@
proc(dst, src, count);
}
-/*not static*/ inline void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void grayA_to_RGBA(uint32_t dst[], const uint8_t* src, int count) {
expand_grayA<false>(dst, src, count);
}
-/*not static*/ inline void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void grayA_to_rgbA(uint32_t dst[], const uint8_t* src, int count) {
expand_grayA<true>(dst, src, count);
}
enum Format { kRGB1, kBGR1 };
template <Format format>
-static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) {
- auto src = (const uint32_t*)vsrc;
+static void inverted_cmyk_to(uint32_t* dst, const uint32_t* src, int count) {
while (count >= 8) {
// Load 8 cmyk pixels.
uint8x8x4_t pixels = vld4_u8((const uint8_t*) src);
@@ -485,11 +469,11 @@
proc(dst, src, count);
}
-/*not static*/ inline void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void inverted_CMYK_to_RGB1(uint32_t dst[], const uint32_t* src, int count) {
inverted_cmyk_to<kRGB1>(dst, src, count);
}
-/*not static*/ inline void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void inverted_CMYK_to_BGR1(uint32_t dst[], const uint32_t* src, int count) {
inverted_cmyk_to<kBGR1>(dst, src, count);
}
@@ -506,8 +490,7 @@
}
template <bool kSwapRB>
-static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) {
- auto src = (const uint32_t*)vsrc;
+static void premul_should_swapRB(uint32_t* dst, const uint32_t* src, int count) {
auto premul8 = [](__m128i* lo, __m128i* hi) {
const __m128i zeros = _mm_setzero_si128();
@@ -574,16 +557,15 @@
proc(dst, src, count);
}
-/*not static*/ inline void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {
+/*not static*/ inline void RGBA_to_rgbA(uint32_t* dst, const uint32_t* src, int count) {
premul_should_swapRB<false>(dst, src, count);
}
-/*not static*/ inline void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) {
+/*not static*/ inline void RGBA_to_bgrA(uint32_t* dst, const uint32_t* src, int count) {
premul_should_swapRB<true>(dst, src, count);
}
-/*not static*/ inline void RGBA_to_BGRA(uint32_t* dst, const void* vsrc, int count) {
- auto src = (const uint32_t*)vsrc;
+/*not static*/ inline void RGBA_to_BGRA(uint32_t* dst, const uint32_t* src, int count) {
const __m128i swapRB = _mm_setr_epi8(2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15);
while (count >= 4) {
@@ -600,9 +582,7 @@
}
template <bool kSwapRB>
-static void insert_alpha_should_swaprb(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*) vsrc;
-
+static void insert_alpha_should_swaprb(uint32_t dst[], const uint8_t* src, int count) {
const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
__m128i expand;
const uint8_t X = 0xFF; // Used a placeholder. The value of X is irrelevant.
@@ -634,17 +614,15 @@
proc(dst, src, count);
}
-/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
insert_alpha_should_swaprb<false>(dst, src, count);
}
-/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const uint8_t* src, int count) {
insert_alpha_should_swaprb<true>(dst, src, count);
}
-/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*) vsrc;
-
+/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
const __m128i alphas = _mm_set1_epi8((uint8_t) 0xFF);
while (count >= 16) {
__m128i grays = _mm_loadu_si128((const __m128i*) src);
@@ -672,8 +650,7 @@
gray_to_RGB1_portable(dst, src, count);
}
-/*not static*/ inline void grayA_to_RGBA(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*) vsrc;
+/*not static*/ inline void grayA_to_RGBA(uint32_t dst[], const uint8_t* src, int count) {
while (count >= 8) {
__m128i ga = _mm_loadu_si128((const __m128i*) src);
@@ -694,8 +671,7 @@
grayA_to_RGBA_portable(dst, src, count);
}
-/*not static*/ inline void grayA_to_rgbA(uint32_t dst[], const void* vsrc, int count) {
- const uint8_t* src = (const uint8_t*) vsrc;
+/*not static*/ inline void grayA_to_rgbA(uint32_t dst[], const uint8_t* src, int count) {
while (count >= 8) {
__m128i grayA = _mm_loadu_si128((const __m128i*) src);
@@ -725,9 +701,7 @@
enum Format { kRGB1, kBGR1 };
template <Format format>
-static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) {
- auto src = (const uint32_t*)vsrc;
-
+static void inverted_cmyk_to(uint32_t* dst, const uint32_t* src, int count) {
auto convert8 = [](__m128i* lo, __m128i* hi) {
const __m128i zeros = _mm_setzero_si128();
__m128i planar;
@@ -792,53 +766,53 @@
proc(dst, src, count);
}
-/*not static*/ inline void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void inverted_CMYK_to_RGB1(uint32_t dst[], const uint32_t* src, int count) {
inverted_cmyk_to<kRGB1>(dst, src, count);
}
-/*not static*/ inline void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void inverted_CMYK_to_BGR1(uint32_t dst[], const uint32_t* src, int count) {
inverted_cmyk_to<kBGR1>(dst, src, count);
}
#else
-/*not static*/ inline void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {
+/*not static*/ inline void RGBA_to_rgbA(uint32_t* dst, const uint32_t* src, int count) {
RGBA_to_rgbA_portable(dst, src, count);
}
-/*not static*/ inline void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) {
+/*not static*/ inline void RGBA_to_bgrA(uint32_t* dst, const uint32_t* src, int count) {
RGBA_to_bgrA_portable(dst, src, count);
}
-/*not static*/ inline void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) {
+/*not static*/ inline void RGBA_to_BGRA(uint32_t* dst, const uint32_t* src, int count) {
RGBA_to_BGRA_portable(dst, src, count);
}
-/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void RGB_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
RGB_to_RGB1_portable(dst, src, count);
}
-/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void RGB_to_BGR1(uint32_t dst[], const uint8_t* src, int count) {
RGB_to_BGR1_portable(dst, src, count);
}
-/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void gray_to_RGB1(uint32_t dst[], const uint8_t* src, int count) {
gray_to_RGB1_portable(dst, src, count);
}
-/*not static*/ inline void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void grayA_to_RGBA(uint32_t dst[], const uint8_t* src, int count) {
grayA_to_RGBA_portable(dst, src, count);
}
-/*not static*/ inline void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void grayA_to_rgbA(uint32_t dst[], const uint8_t* src, int count) {
grayA_to_rgbA_portable(dst, src, count);
}
-/*not static*/ inline void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void inverted_CMYK_to_RGB1(uint32_t dst[], const uint32_t* src, int count) {
inverted_CMYK_to_RGB1_portable(dst, src, count);
}
-/*not static*/ inline void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
+/*not static*/ inline void inverted_CMYK_to_BGR1(uint32_t dst[], const uint32_t* src, int count) {
inverted_CMYK_to_BGR1_portable(dst, src, count);
}