commit to alpha bitmap procs
The alpha procs differ from the opaque ones only in a very isolated,
branch-predictable place. If we need to make any of these care about
whether alphaScale == 256, we can probably just put a branch in there.
More refactoring to follow...
For now I've kept Clamp_S32_opaque_D32_nofilter_DX_shaderproc() around.
Cq-Include-Trybots: master.tryserver.blink:linux_trusty_blink_rel
Change-Id: I2739b6fc7d36c1adf6c702b271b20986f86e413f
Commit-Queue: Mike Klein <>
Reviewed-by: Herb Derby <>
diff --git a/src/core/SkBitmapProcState.cpp b/src/core/SkBitmapProcState.cpp
index 8d5e1a8..26e5ac9 100644
--- a/src/core/SkBitmapProcState.cpp
+++ b/src/core/SkBitmapProcState.cpp
@@ -22,7 +22,62 @@
extern const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[];
-extern void Clamp_S32_opaque_D32_nofilter_DX_shaderproc(const void*, int, int, uint32_t*, int);
+// One-stop-shop shader for,
+// - nearest-neighbor sampling (_nofilter_),
+// - clamp tiling in X and Y both (Clamp_),
+// - with at most a scale and translate matrix (_DX_),
+// - and no extra alpha applied (_opaque_),
+// - sampling from 8888 (_S32_) and drawing to 8888 (_S32_).
+static void Clamp_S32_opaque_D32_nofilter_DX_shaderproc(const void* sIn, int x, int y,
+ SkPMColor* SK_RESTRICT dst, int count) {
+ const SkBitmapProcState& s = *static_cast<const SkBitmapProcState*>(sIn);
+ SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
+ SkMatrix::kScale_Mask)) == 0);
+ SkASSERT(s.fAlphaScale == 256);
+ const unsigned maxX = s.fPixmap.width() - 1;
+ SkFractionalInt fx;
+ int dstY;
+ {
+ const SkBitmapProcStateAutoMapper mapper(s, x, y);
+ const unsigned maxY = s.fPixmap.height() - 1;
+ dstY = SkClampMax(mapper.intY(), maxY);
+ fx = mapper.fractionalIntX();
+ }
+ const SkPMColor* SK_RESTRICT src = s.fPixmap.addr32(0, dstY);
+ const SkFractionalInt dx = s.fInvSxFractionalInt;
+ // Check if we're safely inside [0...maxX] so no need to clamp each computed index.
+ //
+ if ((uint64_t)SkFractionalIntToInt(fx) <= maxX &&
+ (uint64_t)SkFractionalIntToInt(fx + dx * (count - 1)) <= maxX)
+ {
+ int count4 = count >> 2;
+ for (int i = 0; i < count4; ++i) {
+ SkPMColor src0 = src[SkFractionalIntToInt(fx)]; fx += dx;
+ SkPMColor src1 = src[SkFractionalIntToInt(fx)]; fx += dx;
+ SkPMColor src2 = src[SkFractionalIntToInt(fx)]; fx += dx;
+ SkPMColor src3 = src[SkFractionalIntToInt(fx)]; fx += dx;
+ dst[0] = src0;
+ dst[1] = src1;
+ dst[2] = src2;
+ dst[3] = src3;
+ dst += 4;
+ }
+ for (int i = (count4 << 2); i < count; ++i) {
+ unsigned index = SkFractionalIntToInt(fx);
+ SkASSERT(index <= maxX);
+ *dst++ = src[index];
+ fx += dx;
+ }
+ } else {
+ for (int i = 0; i < count; ++i) {
+ dst[i] = src[SkClampMax(SkFractionalIntToInt(fx), maxX)];
+ fx += dx;
+ }
+ }
#define NAME_WRAP(x) x
#include "SkBitmapProcState_filter.h"
@@ -190,25 +245,18 @@
if (fFilterQuality < kHigh_SkFilterQuality) {
int index = 0;
- if (fAlphaScale < 256) { // note: this distinction is not used for D16
+ if (fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) {
index |= 1;
- if (fInvType <= (SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) {
- index |= 2;
- }
if (fFilterQuality > kNone_SkFilterQuality) {
- index |= 4;
+ index |= 2;
#if !defined(SK_ARM_HAS_NEON)
static const SampleProc32 gSkBitmapProcStateSample32[] = {
- S32_opaque_D32_nofilter_DXDY,
- S32_opaque_D32_nofilter_DX,
- S32_opaque_D32_filter_DXDY,
- S32_opaque_D32_filter_DX,
@@ -216,7 +264,9 @@
fSampleProc32 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample32)[index];
// our special-case shaderprocs
- if (S32_opaque_D32_nofilter_DX == fSampleProc32 && clampClamp) {
+ if (fAlphaScale == 256
+ && fSampleProc32 == S32_alpha_D32_nofilter_DX
+ && clampClamp) {
fShaderProc32 = Clamp_S32_opaque_D32_nofilter_DX_shaderproc;
@@ -416,12 +466,7 @@
if (kNone_SkFilterQuality != s.fFilterQuality) {
const SkPMColor* row1 = s.fPixmap.addr32(0, iY1);
- if (s.fAlphaScale < 256) {
- Filter_32_alpha(iSubY, *row0, *row1, &color, s.fAlphaScale);
- } else {
- Filter_32_opaque(iSubY, *row0, *row1, &color);
- }
+ Filter_32_alpha(iSubY, *row0, *row1, &color, s.fAlphaScale);
} else {
if (s.fAlphaScale < 256) {
color = SkAlphaMulQ(*row0, s.fAlphaScale);
@@ -596,52 +641,3 @@
-void Clamp_S32_opaque_D32_nofilter_DX_shaderproc(const void* sIn, int x, int y,
- SkPMColor* SK_RESTRICT dst, int count) {
- const SkBitmapProcState& s = *static_cast<const SkBitmapProcState*>(sIn);
- SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
- SkMatrix::kScale_Mask)) == 0);
- const unsigned maxX = s.fPixmap.width() - 1;
- SkFractionalInt fx;
- int dstY;
- {
- const SkBitmapProcStateAutoMapper mapper(s, x, y);
- const unsigned maxY = s.fPixmap.height() - 1;
- dstY = SkClampMax(mapper.intY(), maxY);
- fx = mapper.fractionalIntX();
- }
- const SkPMColor* SK_RESTRICT src = s.fPixmap.addr32(0, dstY);
- const SkFractionalInt dx = s.fInvSxFractionalInt;
- // Check if we're safely inside [0...maxX] so no need to clamp each computed index.
- //
- if ((uint64_t)SkFractionalIntToInt(fx) <= maxX &&
- (uint64_t)SkFractionalIntToInt(fx + dx * (count - 1)) <= maxX)
- {
- int count4 = count >> 2;
- for (int i = 0; i < count4; ++i) {
- SkPMColor src0 = src[SkFractionalIntToInt(fx)]; fx += dx;
- SkPMColor src1 = src[SkFractionalIntToInt(fx)]; fx += dx;
- SkPMColor src2 = src[SkFractionalIntToInt(fx)]; fx += dx;
- SkPMColor src3 = src[SkFractionalIntToInt(fx)]; fx += dx;
- dst[0] = src0;
- dst[1] = src1;
- dst[2] = src2;
- dst[3] = src3;
- dst += 4;
- }
- for (int i = (count4 << 2); i < count; ++i) {
- unsigned index = SkFractionalIntToInt(fx);
- SkASSERT(index <= maxX);
- *dst++ = src[index];
- fx += dx;
- }
- } else {
- for (int i = 0; i < count; ++i) {
- dst[i] = src[SkClampMax(SkFractionalIntToInt(fx), maxX)];
- fx += dx;
- }
- }
diff --git a/src/core/SkBitmapProcState.h b/src/core/SkBitmapProcState.h
index bcc5da3..6aa5865 100644
--- a/src/core/SkBitmapProcState.h
+++ b/src/core/SkBitmapProcState.h
@@ -173,10 +173,9 @@
// These functions are generated via macros, but are exposed here so that
// platformProcs may test for them by name.
-void S32_opaque_D32_filter_DX(const SkBitmapProcState& s, const uint32_t xy[],
- int count, SkPMColor colors[]);
-void S32_alpha_D32_filter_DX(const SkBitmapProcState& s, const uint32_t xy[],
- int count, SkPMColor colors[]);
+void S32_alpha_D32_filter_DX(const SkBitmapProcState& s,
+ const uint32_t xy[], int count, SkPMColor colors[]);
void ClampX_ClampY_filter_scale(const SkBitmapProcState& s, uint32_t xy[],
int count, int x, int y);
void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
diff --git a/src/core/SkBitmapProcState_filter.h b/src/core/SkBitmapProcState_filter.h
index a30b76d..a7b9a84 100644
--- a/src/core/SkBitmapProcState_filter.h
+++ b/src/core/SkBitmapProcState_filter.h
@@ -8,43 +8,6 @@
#include "SkColorData.h"
- Filter_32_opaque
- There is no hard-n-fast rule that the filtering must produce
- exact results for the color components, but if the 4 incoming colors are
- all opaque, then the output color must also be opaque. Subsequent parts of
- the drawing pipeline may rely on this (e.g. which blitrow proc to use).
- */
-static inline void Filter_32_opaque(unsigned x, unsigned y,
- SkPMColor a00, SkPMColor a01,
- SkPMColor a10, SkPMColor a11,
- SkPMColor* dstColor) {
- SkASSERT((unsigned)x <= 0xF);
- SkASSERT((unsigned)y <= 0xF);
- int xy = x * y;
- const uint32_t mask = 0xFF00FF;
- int scale = 256 - 16*y - 16*x + xy;
- uint32_t lo = (a00 & mask) * scale;
- uint32_t hi = ((a00 >> 8) & mask) * scale;
- scale = 16*x - xy;
- lo += (a01 & mask) * scale;
- hi += ((a01 >> 8) & mask) * scale;
- scale = 16*y - xy;
- lo += (a10 & mask) * scale;
- hi += ((a10 >> 8) & mask) * scale;
- lo += (a11 & mask) * xy;
- hi += ((a11 >> 8) & mask) * xy;
- *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
static inline void Filter_32_alpha(unsigned x, unsigned y,
SkPMColor a00, SkPMColor a01,
SkPMColor a10, SkPMColor a11,
@@ -72,6 +35,7 @@
lo += (a11 & mask) * xy;
hi += ((a11 >> 8) & mask) * xy;
+ // TODO: if (alphaScale < 256) ...
lo = ((lo >> 8) & mask) * alphaScale;
hi = ((hi >> 8) & mask) * alphaScale;
@@ -79,26 +43,6 @@
// Two color version, where we filter only along 1 axis
-static inline void Filter_32_opaque(unsigned t,
- SkPMColor color0,
- SkPMColor color1,
- SkPMColor* dstColor) {
- SkASSERT((unsigned)t <= 0xF);
- const uint32_t mask = 0xFF00FF;
- int scale = 256 - 16*t;
- uint32_t lo = (color0 & mask) * scale;
- uint32_t hi = ((color0 >> 8) & mask) * scale;
- scale = 16*t;
- lo += (color1 & mask) * scale;
- hi += ((color1 >> 8) & mask) * scale;
- *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
-// Two color version, where we filter only along 1 axis
static inline void Filter_32_alpha(unsigned t,
SkPMColor color0,
SkPMColor color1,
@@ -117,6 +61,7 @@
lo += (color1 & mask) * scale;
hi += ((color1 >> 8) & mask) * scale;
+ // TODO: if (alphaScale < 256) ...
lo = ((lo >> 8) & mask) * alphaScale;
hi = ((hi >> 8) & mask) * alphaScale;
diff --git a/src/core/SkBitmapProcState_procs.h b/src/core/SkBitmapProcState_procs.h
index 6badd62..ebdf155 100644
--- a/src/core/SkBitmapProcState_procs.h
+++ b/src/core/SkBitmapProcState_procs.h
@@ -9,28 +9,16 @@
// E.g. for ARM NEON, defined it as 'x ## _neon' to ensure all important
// identifiers have a _neon suffix.
#ifndef NAME_WRAP
-#error "Please define NAME_WRAP() before including this file"
+ #error "Please define NAME_WRAP() before including this file"
-// SRC == 8888
-#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst)
-#define MAKENAME(suffix) NAME_WRAP(S32_opaque_D32 ## suffix)
-#define SRCTYPE SkPMColor
-#define CHECKSTATE(state) SkASSERT(4 ==; \
- SkASSERT(state.fAlphaScale == 256)
-#define RETURNDST(src) src
-#define SRC_TO_FILTER(src) src
-#include "SkBitmapProcState_sample.h"
-#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_alpha)(x, y, a, b, c, d, dst, alphaScale)
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+ NAME_WRAP(Filter_32_alpha)(x, y, a, b, c, d, dst, alphaScale)
#define MAKENAME(suffix) NAME_WRAP(S32_alpha_D32 ## suffix)
#define SRCTYPE SkPMColor
#define CHECKSTATE(state) SkASSERT(4 ==; \
- SkASSERT(state.fAlphaScale < 256)
+ SkASSERT(state.fAlphaScale <= 256)
#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale
#define RETURNDST(src) SkAlphaMulQ(src, alphaScale)
#define SRC_TO_FILTER(src) src
diff --git a/src/opts/SkBitmapProcState_arm_neon.cpp b/src/opts/SkBitmapProcState_arm_neon.cpp
index b954d85..1087b4d 100644
--- a/src/opts/SkBitmapProcState_arm_neon.cpp
+++ b/src/opts/SkBitmapProcState_arm_neon.cpp
@@ -20,12 +20,8 @@
#include "SkBitmapProcState_procs.h"
const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[] = {
- S32_opaque_D32_nofilter_DXDY_neon,
- S32_opaque_D32_nofilter_DX_neon,
- S32_opaque_D32_filter_DXDY_neon,
- S32_opaque_D32_filter_DX_neon,
diff --git a/src/opts/SkBitmapProcState_filter_neon.h b/src/opts/SkBitmapProcState_filter_neon.h
index 5352ce4..ab3cec8 100644
--- a/src/opts/SkBitmapProcState_filter_neon.h
+++ b/src/opts/SkBitmapProcState_filter_neon.h
@@ -5,54 +5,8 @@
* found in the LICENSE file.
-#include <arm_neon.h>
#include "SkColorData.h"
- * Filter_32_opaque
- *
- * There is no hard-n-fast rule that the filtering must produce
- * exact results for the color components, but if the 4 incoming colors are
- * all opaque, then the output color must also be opaque. Subsequent parts of
- * the drawing pipeline may rely on this (e.g. which blitrow proc to use).
- *
- */
-// Chrome on Android uses -Os so we need to force these inline. Otherwise
-// calling the function in the inner loops will cause significant overhead on
-// some platforms.
-static SK_ALWAYS_INLINE void Filter_32_opaque_neon(unsigned x, unsigned y,
- SkPMColor a00, SkPMColor a01,
- SkPMColor a10, SkPMColor a11,
- SkPMColor *dst) {
- uint8x8_t vy, vconst16_8, v16_y, vres;
- uint16x4_t vx, vconst16_16, v16_x, tmp;
- uint32x2_t va0, va1;
- uint16x8_t tmp1, tmp2;
- vy = vdup_n_u8(y); // duplicate y into vy
- vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8
- v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y
- va0 = vdup_n_u32(a00); // duplicate a00
- va1 = vdup_n_u32(a10); // duplicate a10
- va0 = vset_lane_u32(a01, va0, 1); // set top to a01
- va1 = vset_lane_u32(a11, va1, 1); // set top to a11
- tmp1 = vmull_u8(vreinterpret_u8_u32(va0), v16_y); // tmp1 = [a01|a00] * (16-y)
- tmp2 = vmull_u8(vreinterpret_u8_u32(va1), vy); // tmp2 = [a11|a10] * y
- vx = vdup_n_u16(x); // duplicate x into vx
- vconst16_16 = vmov_n_u16(16); // set up constant in vconst16_16
- v16_x = vsub_u16(vconst16_16, vx); // v16_x = 16-x
- tmp = vmul_u16(vget_high_u16(tmp1), vx); // tmp = a01 * x
- tmp = vmla_u16(tmp, vget_high_u16(tmp2), vx); // tmp += a11 * x
- tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x)
- tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x)
- vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down result by 8
- vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result
+#include <arm_neon.h>
static SK_ALWAYS_INLINE void Filter_32_alpha_neon(unsigned x, unsigned y,
SkPMColor a00, SkPMColor a01,
diff --git a/src/opts/SkBitmapProcState_opts_SSE2.cpp b/src/opts/SkBitmapProcState_opts_SSE2.cpp
index 92be971..391d421 100644
--- a/src/opts/SkBitmapProcState_opts_SSE2.cpp
+++ b/src/opts/SkBitmapProcState_opts_SSE2.cpp
@@ -8,125 +8,17 @@
#include "SkBitmapProcState_opts_SSE2.h"
#include "SkBitmapProcState_utils.h"
#include "SkColorData.h"
-#include "SkPaint.h"
#include "SkTo.h"
-#include "SkUTF.h"
#include <emmintrin.h>
-void S32_opaque_D32_filter_DX_SSE2(const SkBitmapProcState& s,
- const uint32_t* xy,
- int count, uint32_t* colors) {
- SkASSERT(count > 0 && colors != nullptr);
- SkASSERT(s.fFilterQuality != kNone_SkFilterQuality);
- SkASSERT(kN32_SkColorType == s.fPixmap.colorType());
- SkASSERT(s.fAlphaScale == 256);
- const char* srcAddr = static_cast<const char*>(s.fPixmap.addr());
- size_t rb = s.fPixmap.rowBytes();
- uint32_t XY = *xy++;
- unsigned y0 = XY >> 14;
- const uint32_t* row0 = reinterpret_cast<const uint32_t*>(srcAddr + (y0 >> 4) * rb);
- const uint32_t* row1 = reinterpret_cast<const uint32_t*>(srcAddr + (XY & 0x3FFF) * rb);
- unsigned subY = y0 & 0xF;
- // ( 0, 0, 0, 0, 0, 0, 0, 16)
- __m128i sixteen = _mm_cvtsi32_si128(16);
- // ( 0, 0, 0, 0, 16, 16, 16, 16)
- sixteen = _mm_shufflelo_epi16(sixteen, 0);
- // ( 0, 0, 0, 0, 0, 0, 0, y)
- __m128i allY = _mm_cvtsi32_si128(subY);
- // ( 0, 0, 0, 0, y, y, y, y)
- allY = _mm_shufflelo_epi16(allY, 0);
- // ( 0, 0, 0, 0, 16-y, 16-y, 16-y, 16-y)
- __m128i negY = _mm_sub_epi16(sixteen, allY);
- // (16-y, 16-y, 16-y, 16-y, y, y, y, y)
- allY = _mm_unpacklo_epi64(allY, negY);
- // (16, 16, 16, 16, 16, 16, 16, 16 )
- sixteen = _mm_shuffle_epi32(sixteen, 0);
- // ( 0, 0, 0, 0, 0, 0, 0, 0)
- __m128i zero = _mm_setzero_si128();
- do {
- uint32_t XX = *xy++; // x0:14 | 4 | x1:14
- unsigned x0 = XX >> 18;
- unsigned x1 = XX & 0x3FFF;
- // (0, 0, 0, 0, 0, 0, 0, x)
- __m128i allX = _mm_cvtsi32_si128((XX >> 14) & 0x0F);
- // (0, 0, 0, 0, x, x, x, x)
- allX = _mm_shufflelo_epi16(allX, 0);
- // (x, x, x, x, x, x, x, x)
- allX = _mm_shuffle_epi32(allX, 0);
- // (16-x, 16-x, 16-x, 16-x, 16-x, 16-x, 16-x)
- __m128i negX = _mm_sub_epi16(sixteen, allX);
- // Load 4 samples (pixels).
- __m128i a00 = _mm_cvtsi32_si128(row0[x0]);
- __m128i a01 = _mm_cvtsi32_si128(row0[x1]);
- __m128i a10 = _mm_cvtsi32_si128(row1[x0]);
- __m128i a11 = _mm_cvtsi32_si128(row1[x1]);
- // (0, 0, a00, a10)
- __m128i a00a10 = _mm_unpacklo_epi32(a10, a00);
- // Expand to 16 bits per component.
- a00a10 = _mm_unpacklo_epi8(a00a10, zero);
- // ((a00 * (16-y)), (a10 * y)).
- a00a10 = _mm_mullo_epi16(a00a10, allY);
- // (a00 * (16-y) * (16-x), a10 * y * (16-x)).
- a00a10 = _mm_mullo_epi16(a00a10, negX);
- // (0, 0, a01, a10)
- __m128i a01a11 = _mm_unpacklo_epi32(a11, a01);
- // Expand to 16 bits per component.
- a01a11 = _mm_unpacklo_epi8(a01a11, zero);
- // (a01 * (16-y)), (a11 * y)
- a01a11 = _mm_mullo_epi16(a01a11, allY);
- // (a01 * (16-y) * x), (a11 * y * x)
- a01a11 = _mm_mullo_epi16(a01a11, allX);
- // (a00*w00 + a01*w01, a10*w10 + a11*w11)
- __m128i sum = _mm_add_epi16(a00a10, a01a11);
- // (DC, a00*w00 + a01*w01)
- __m128i shifted = _mm_shuffle_epi32(sum, 0xEE);
- // (DC, a00*w00 + a01*w01 + a10*w10 + a11*w11)
- sum = _mm_add_epi16(sum, shifted);
- // Divide each 16 bit component by 256.
- sum = _mm_srli_epi16(sum, 8);
- // Pack lower 4 16 bit values of sum into lower 4 bytes.
- sum = _mm_packus_epi16(sum, zero);
- // Extract low int and store.
- *colors++ = _mm_cvtsi128_si32(sum);
- } while (--count > 0);
void S32_alpha_D32_filter_DX_SSE2(const SkBitmapProcState& s,
const uint32_t* xy,
int count, uint32_t* colors) {
SkASSERT(count > 0 && colors != nullptr);
SkASSERT(s.fFilterQuality != kNone_SkFilterQuality);
SkASSERT(kN32_SkColorType == s.fPixmap.colorType());
- SkASSERT(s.fAlphaScale < 256);
+ SkASSERT(s.fAlphaScale <= 256);
const char* srcAddr = static_cast<const char*>(s.fPixmap.addr());
size_t rb = s.fPixmap.rowBytes();
diff --git a/src/opts/SkBitmapProcState_opts_SSE2.h b/src/opts/SkBitmapProcState_opts_SSE2.h
index 42fe80b..7faeab4 100644
--- a/src/opts/SkBitmapProcState_opts_SSE2.h
+++ b/src/opts/SkBitmapProcState_opts_SSE2.h
@@ -10,12 +10,9 @@
#include "SkBitmapProcState.h"
-void S32_opaque_D32_filter_DX_SSE2(const SkBitmapProcState& s,
- const uint32_t* xy,
- int count, uint32_t* colors);
void S32_alpha_D32_filter_DX_SSE2(const SkBitmapProcState& s,
- const uint32_t* xy,
- int count, uint32_t* colors);
+ const uint32_t* xy, int count, uint32_t* colors);
void ClampX_ClampY_filter_scale_SSE2(const SkBitmapProcState& s, uint32_t xy[],
int count, int x, int y);
void ClampX_ClampY_nofilter_scale_SSE2(const SkBitmapProcState& s,
diff --git a/src/opts/SkBitmapProcState_opts_none.cpp b/src/opts/SkBitmapProcState_opts_none.cpp
index 9b3c7b6..44e975b 100644
--- a/src/opts/SkBitmapProcState_opts_none.cpp
+++ b/src/opts/SkBitmapProcState_opts_none.cpp
@@ -7,15 +7,4 @@
#include "SkBitmapProcState.h"
-/* A platform may optionally overwrite any of these with accelerated
- versions. On input, these will already have valid function pointers,
- so a platform need only overwrite the ones it chooses, based on the
- current state (e.g. fBitmap, fInvMatrix, etc.)
- fShaderProc32
- fMatrixProc
- fSampleProc32
- */
-// empty implementation just uses default supplied function pointers
void SkBitmapProcState::platformProcs() {}
diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp
index 29debd7..9b63e4a 100644
--- a/src/opts/opts_check_x86.cpp
+++ b/src/opts/opts_check_x86.cpp
@@ -9,7 +9,6 @@
#include "SkBitmapProcState_opts_SSSE3.h"
#include "SkCpu.h"
*********This file is deprecated*********
@@ -21,41 +20,20 @@
-/* This file must *not* be compiled with -msse or any other optional SIMD
- extension, otherwise gcc may generate SIMD instructions even for scalar ops
- (and thus give an invalid instruction on Pentium3 on the code below).
- For example, only files named *_SSE2.cpp in this directory should be
- compiled with -msse2 or higher. */
void SkBitmapProcState::platformProcs() {
- /* Every optimization in the function requires at least SSE2 */
if (!SkCpu::Supports(SkCpu::SSE2)) {
- const bool ssse3 = SkCpu::Supports(SkCpu::SSSE3);
- /* Check fSampleProc32 */
- if (fSampleProc32 == S32_opaque_D32_filter_DX) {
- if (ssse3) {
- fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
- } else {
- fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
- }
- } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
- if (ssse3) {
- fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
- } else {
- fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
- }
+ if (fSampleProc32 == S32_alpha_D32_filter_DX) {
+ fSampleProc32 = SkCpu::Supports(SkCpu::SSSE3) ? S32_alpha_D32_filter_DX_SSSE3
+ : S32_alpha_D32_filter_DX_SSE2;
- /* Check fMatrixProc */
if (fMatrixProc == ClampX_ClampY_filter_scale) {
- fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
- } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
- fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
+ fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
+ }
+ if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
+ fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;