Fix perf regression in Color32.
The regression was due to the fact that we were calling PlatformColorProc() for
every span (which in turns makes CPUID, a fairly expensive call). Since we draw
a lot of rects, and rects have 1-pixel wide spans for the vertical segments,
that's a lot of CPUID.
Fixed by cacheing the result of PlatformColorProc(), as is done for the other
platform-specific blitters.
Review URL: http://codereview.appspot.com/3669042/
git-svn-id: http://skia.googlecode.com/svn/trunk@636 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/Makefile b/Makefile
index 65f0b71..146d11a 100644
--- a/Makefile
+++ b/Makefile
@@ -94,6 +94,7 @@
# For these files, and these files only, compile with -msse2.
SSE2_OBJS := out/src/opts/SkBlitRow_opts_SSE2.o \
+ out/src/opts/SkBitmapProcState_opts_SSE2.o \
out/src/opts/SkUtils_opts_SSE2.o
$(SSE2_OBJS) : CFLAGS := $(CFLAGS_SSE2)
diff --git a/include/core/SkBlitRow.h b/include/core/SkBlitRow.h
index a592167..2b652c2 100644
--- a/include/core/SkBlitRow.h
+++ b/include/core/SkBlitRow.h
@@ -55,24 +55,16 @@
const SkPMColor* SK_RESTRICT src,
int count, U8CPU alpha);
- static void Color32_BlitRow32(SkPMColor dst[], const SkPMColor src[],
- int count, SkPMColor color);
-
static Proc32 Factory32(unsigned flags32);
/** Blend a single color onto a row of S32 pixels, writing the result
into a row of D32 pixels. src and dst may be the same memory, but
if they are not, they may not overlap.
*/
- static void Color32(SkPMColor dst[], const SkPMColor src[], int count,
- SkPMColor color);
+ static void Color32(SkPMColor dst[], const SkPMColor src[],
+ int count, SkPMColor color);
- /** Blend a single color onto a row of 32bit pixels, writing the result
- into the same row.
- */
- static void Color32(SkPMColor row[], int count, SkPMColor color) {
- Color32(row, row, count, color);
- }
+ static ColorProc ColorProcFactory();
/** These static functions are called by the Factory and Factory32
functions, and should return either NULL, or a
diff --git a/src/core/SkBlitRow_D32.cpp b/src/core/SkBlitRow_D32.cpp
index 1f154a4..f1dcb30 100644
--- a/src/core/SkBlitRow_D32.cpp
+++ b/src/core/SkBlitRow_D32.cpp
@@ -86,18 +86,17 @@
return proc;
}
-void SkBlitRow::Color32(SkPMColor dst[], const SkPMColor src[], int count,
- SkPMColor color) {
+SkBlitRow::Proc32 SkBlitRow::ColorProcFactory() {
SkBlitRow::ColorProc proc = PlatformColorProc();
if (NULL == proc) {
- Color32_BlitRow32(dst, src, count, color);
- return;
+ proc = Color32;
}
- proc(dst, src, count, color);
+ SkASSERT(proc);
+ return proc;
}
-void SkBlitRow::Color32_BlitRow32(SkPMColor dst[], const SkPMColor src[],
- int count, SkPMColor color) {
+void SkBlitRow::Color32(SkPMColor dst[], const SkPMColor src[],
+ int count, SkPMColor color) {
if (count > 0) {
if (0 == color) {
if (src != dst) {
diff --git a/src/core/SkBlitter_ARGB32.cpp b/src/core/SkBlitter_ARGB32.cpp
index 37bd0da..905ab6b 100644
--- a/src/core/SkBlitter_ARGB32.cpp
+++ b/src/core/SkBlitter_ARGB32.cpp
@@ -51,6 +51,7 @@
fSrcB = SkAlphaMul(SkColorGetB(color), scale);
fPMColor = SkPackARGB32(fSrcA, fSrcR, fSrcG, fSrcB);
+ fColor32Proc = SkBlitRow::ColorProcFactory();
}
const SkBitmap* SkARGB32_Blitter::justAnOpaqueColor(uint32_t* value) {
@@ -69,7 +70,8 @@
void SkARGB32_Blitter::blitH(int x, int y, int width) {
SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width());
- SkBlitRow::Color32(fDevice.getAddr32(x, y), width, fPMColor);
+ uint32_t* device = fDevice.getAddr32(x, y);
+ fColor32Proc(device, device, width, fPMColor);
}
void SkARGB32_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[],
@@ -94,7 +96,7 @@
sk_memset32(device, color, count);
} else {
uint32_t sc = SkAlphaMulQ(color, SkAlpha255To256(aa));
- SkBlitRow::Color32(device, count, sc);
+ fColor32Proc(device, device, count, sc);
}
}
runs += count;
@@ -286,7 +288,7 @@
size_t rowBytes = fDevice.rowBytes();
while (--height >= 0) {
- SkBlitRow::Color32(device, width, color);
+ fColor32Proc(device, device, width, color);
device = (uint32_t*)((char*)device + rowBytes);
}
}
diff --git a/src/core/SkCoreBlitters.h b/src/core/SkCoreBlitters.h
index 6204b2c..32e8035 100644
--- a/src/core/SkCoreBlitters.h
+++ b/src/core/SkCoreBlitters.h
@@ -101,7 +101,8 @@
virtual const SkBitmap* justAnOpaqueColor(uint32_t*);
protected:
- SkColor fPMColor;
+ SkColor fPMColor;
+ SkBlitRow::ColorProc fColor32Proc;
private:
unsigned fSrcA, fSrcR, fSrcG, fSrcB;
diff --git a/src/effects/SkColorFilters.cpp b/src/effects/SkColorFilters.cpp
index 733e1ae..a396d35 100644
--- a/src/effects/SkColorFilters.cpp
+++ b/src/effects/SkColorFilters.cpp
@@ -75,7 +75,8 @@
class SkSrcOver_XfermodeColorFilter : public Sk_XfermodeColorFilter {
public:
- SkSrcOver_XfermodeColorFilter(SkColor color) : INHERITED(color) {}
+ SkSrcOver_XfermodeColorFilter(SkColor color)
+ : INHERITED(color), fColor32Proc(SkBlitRow::ColorProcFactory()) {}
virtual uint32_t getFlags() {
if (SkGetPackedA32(fPMColor) == 0xFF) {
@@ -87,7 +88,7 @@
virtual void filterSpan(const SkPMColor shader[], int count,
SkPMColor result[]) {
- SkBlitRow::Color32(result, shader, count, fPMColor);
+ fColor32Proc(result, shader, count, fPMColor);
}
virtual void filterSpan16(const uint16_t shader[], int count,
@@ -100,7 +101,7 @@
virtual Factory getFactory() { return CreateProc; }
SkSrcOver_XfermodeColorFilter(SkFlattenableReadBuffer& buffer)
- : INHERITED(buffer) {}
+ : INHERITED(buffer), fColor32Proc(SkBlitRow::ColorProcFactory()) {}
private:
static SkFlattenable* CreateProc(SkFlattenableReadBuffer& buffer) {
@@ -108,6 +109,7 @@
}
typedef Sk_XfermodeColorFilter INHERITED;
+ SkBlitRow::ColorProc fColor32Proc;
};
//////////////////////////////////////////////////////////////////////////////