More SSE2-ification; fix for gcc -msse2.
Review URL: http://codereview.appspot.com/154163
git-svn-id: http://skia.googlecode.com/svn/trunk@428 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/Makefile b/Makefile
index 0bf702c..7973c25 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@
CC := gcc
C_INCLUDES := -Iinclude/config -Iinclude/core -Iinclude/effects -Iinclude/images -Iinclude/utils
CFLAGS := -Wall -g # -O2
+CFLAGS_SSE2 = $(CFLAGS) -msse2
LINKER_OPTS := -lpthread
DEFINES := -DSK_CAN_USE_FLOAT
HIDE = @
@@ -76,6 +77,11 @@
SRC_LIST += src/images/SkScaledBitmapSampler.cpp
endif
+# For these files, and these files only, compile with -msse2.
+SSE2_OBJS := out/src/opts/SkBlitRow_opts_SSE2.o \
+ out/src/opts/SkUtils_opts_SSE2.o
+$(SSE2_OBJS) : CFLAGS := $(CFLAGS_SSE2)
+
out/%.o : %.cpp
@mkdir -p $(dir $@)
$(HIDE)$(CC) $(C_INCLUDES) $(CFLAGS) $(DEFINES) -c $< -o $@
diff --git a/include/core/SkUtils.h b/include/core/SkUtils.h
index 9f3b1d6f..0700aeb 100644
--- a/include/core/SkUtils.h
+++ b/include/core/SkUtils.h
@@ -27,6 +27,8 @@
@param count The number of times value should be copied into the buffer.
*/
void sk_memset16_portable(uint16_t dst[], uint16_t value, int count);
+typedef void (*SkMemset16Proc)(uint16_t dst[], uint16_t value, int count);
+SkMemset16Proc SkMemset16GetPlatformProc();
/** Similar to memset(), but it assigns a 32bit value into the buffer.
@param buffer The memory to have value copied into it
@@ -34,6 +36,8 @@
@param count The number of times value should be copied into the buffer.
*/
void sk_memset32_portable(uint32_t dst[], uint32_t value, int count);
+typedef void (*SkMemset32Proc)(uint32_t dst[], uint32_t value, int count);
+SkMemset32Proc SkMemset32GetPlatformProc();
#ifdef ANDROID
#include "cutils/memory.h"
@@ -43,14 +47,13 @@
#endif
#ifndef sk_memset16
- #define sk_memset16(dst, value, count) sk_memset16_portable(dst, value, count)
+extern SkMemset16Proc sk_memset16;
#endif
#ifndef sk_memset32
- #define sk_memset32(dst, value, count) sk_memset32_portable(dst, value, count)
+extern SkMemset32Proc sk_memset32;
#endif
-
///////////////////////////////////////////////////////////////////////////
#define kMaxBytesInUTF8Sequence 4
diff --git a/src/core/SkUtils.cpp b/src/core/SkUtils.cpp
index edc5b74..a88233f 100644
--- a/src/core/SkUtils.cpp
+++ b/src/core/SkUtils.cpp
@@ -124,6 +124,27 @@
}
}
+#ifndef ANDROID
+static void sk_memset16_stub(uint16_t dst[], uint16_t value, int count)
+{
+ SkMemset16Proc proc = SkMemset16GetPlatformProc();
+ sk_memset16 = proc ? proc : sk_memset16_portable;
+ sk_memset16(dst, value, count);
+}
+
+SkMemset16Proc sk_memset16 = sk_memset16_stub;
+
+static void sk_memset32_stub(uint32_t dst[], uint32_t value, int count)
+{
+ SkMemset32Proc proc = SkMemset32GetPlatformProc();
+ sk_memset32 = proc ? proc : sk_memset32_portable;
+ sk_memset32(dst, value, count);
+}
+
+SkMemset32Proc sk_memset32 = sk_memset32_stub;
+
+#endif
+
//////////////////////////////////////////////////////////////////////////////
/* 0xxxxxxx 1 total
diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp
index 7428584..8983093 100644
--- a/src/opts/SkBlitRow_opts_SSE2.cpp
+++ b/src/opts/SkBlitRow_opts_SSE2.cpp
@@ -15,47 +15,17 @@
** limitations under the License.
*/
-#include "SkBlitRow.h"
+#include "SkBlitRow_opts_SSE2.h"
#include "SkColorPriv.h"
-#include "SkDither.h"
#include <emmintrin.h>
-#ifdef _MSC_VER
-static void getcpuid(int info_type, int info[4])
-{
- __asm {
- mov eax, [info_type]
- cpuid
- mov edi, [info]
- mov [edi], eax
- mov [edi+4], ebx
- mov [edi+8], ecx
- mov [edi+12], edx
- }
-}
-#else
-static void getcpuid(int info_type, int info[4])
-{
- // We save and restore ebx, so this code can be compatible with -fPIC
- asm volatile (
- "pushl %%ebx \n\t"
- "cpuid \n\t"
- "movl %%ebx, %1 \n\t"
- "popl %%ebx \n\t"
- : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
- : "a"(info_type)
- :
- );
-}
-#endif
-
/* SSE2 version of S32_Blend_BlitRow32()
* portable version is in core/SkBlitRow_D32.cpp
*/
-static void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src,
- int count, U8CPU alpha) {
+void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
+ const SkPMColor* SK_RESTRICT src,
+ int count, U8CPU alpha) {
SkASSERT(alpha <= 255);
if (count <= 0) {
return;
@@ -108,7 +78,7 @@
src = reinterpret_cast<const SkPMColor*>(s);
dst = reinterpret_cast<SkPMColor*>(d);
- while (count > 0) {
+ while (count > 0) {
*dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale);
src++;
dst++;
@@ -116,9 +86,9 @@
}
}
-static void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src,
- int count, U8CPU alpha) {
+void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
+ const SkPMColor* SK_RESTRICT src,
+ int count, U8CPU alpha) {
SkASSERT(alpha == 255);
if (count <= 0) {
return;
@@ -228,9 +198,9 @@
}
}
-static void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
- const SkPMColor* SK_RESTRICT src,
- int count, U8CPU alpha) {
+void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
+ const SkPMColor* SK_RESTRICT src,
+ int count, U8CPU alpha) {
SkASSERT(alpha <= 255);
if (count <= 0) {
return;
@@ -307,36 +277,3 @@
count--;
}
}
-
-///////////////////////////////////////////////////////////////////////////////
-
-static const SkBlitRow::Proc32 platform_32_procs[] = {
- NULL, // S32_Opaque,
- S32_Blend_BlitRow32_SSE2, // S32_Blend,
- S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
- S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
-};
-
-SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
- return NULL;
-}
-
-SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
- return NULL;
-}
-
-SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
- static bool once;
- static bool hasSSE2;
- if (!once) {
- int cpu_info[4] = { 0 };
- getcpuid(1, cpu_info);
- hasSSE2 = (cpu_info[3] & (1<<26)) != 0;
- once = true;
- }
- if (hasSSE2) {
- return platform_32_procs[flags];
- } else {
- return NULL;
- }
-}
diff --git a/src/opts/SkBlitRow_opts_SSE2.h b/src/opts/SkBlitRow_opts_SSE2.h
new file mode 100644
index 0000000..c22edd8
--- /dev/null
+++ b/src/opts/SkBlitRow_opts_SSE2.h
@@ -0,0 +1,30 @@
+/*
+ **
+ ** Copyright 2009, The Android Open Source Project
+ **
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ **
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ **
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ */
+
+#include "SkBlitRow.h"
+
+void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
+ const SkPMColor* SK_RESTRICT src,
+ int count, U8CPU alpha);
+
+void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
+ const SkPMColor* SK_RESTRICT src,
+ int count, U8CPU alpha);
+
+void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
+ const SkPMColor* SK_RESTRICT src,
+ int count, U8CPU alpha);
diff --git a/src/opts/SkUtils_opts_SSE2.cpp b/src/opts/SkUtils_opts_SSE2.cpp
new file mode 100644
index 0000000..0537033
--- /dev/null
+++ b/src/opts/SkUtils_opts_SSE2.cpp
@@ -0,0 +1,77 @@
+/*
+ **
+ ** Copyright 2009, The Android Open Source Project
+ **
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ **
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ **
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ */
+
+#include <emmintrin.h>
+#include "SkUtils_opts_SSE2.h"
+
+void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count)
+{
+ SkASSERT(dst != NULL && count >= 0);
+
+ // dst must be 2-byte aligned.
+ SkASSERT((((size_t) dst) & 0x01) == 0);
+
+ if (count >= 32) {
+ while (((size_t)dst) & 0x0F) {
+ *dst++ = value;
+ --count;
+ }
+ __m128i *d = reinterpret_cast<__m128i*>(dst);
+ __m128i value_wide = _mm_set1_epi16(value);
+ while (count >= 32) {
+ _mm_store_si128(d++, value_wide);
+ _mm_store_si128(d++, value_wide);
+ _mm_store_si128(d++, value_wide);
+ _mm_store_si128(d++, value_wide);
+ count -= 32;
+ }
+ dst = reinterpret_cast<uint16_t*>(d);
+ }
+ while (count > 0) {
+ *dst++ = value;
+ --count;
+ }
+}
+
+void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count)
+{
+ SkASSERT(dst != NULL && count >= 0);
+
+ // dst must be 4-byte aligned.
+ SkASSERT((((size_t) dst) & 0x03) == 0);
+
+ if (count >= 16) {
+ while (((size_t)dst) & 0x0F) {
+ *dst++ = value;
+ --count;
+ }
+ __m128i *d = reinterpret_cast<__m128i*>(dst);
+ __m128i value_wide = _mm_set1_epi32(value);
+ while (count >= 16) {
+ _mm_store_si128(d++, value_wide);
+ _mm_store_si128(d++, value_wide);
+ _mm_store_si128(d++, value_wide);
+ _mm_store_si128(d++, value_wide);
+ count -= 16;
+ }
+ dst = reinterpret_cast<uint32_t*>(d);
+ }
+ while (count > 0) {
+ *dst++ = value;
+ --count;
+ }
+}
diff --git a/src/opts/SkUtils_opts_SSE2.h b/src/opts/SkUtils_opts_SSE2.h
new file mode 100644
index 0000000..a54e82f
--- /dev/null
+++ b/src/opts/SkUtils_opts_SSE2.h
@@ -0,0 +1,21 @@
+/*
+ **
+ ** Copyright 2009, The Android Open Source Project
+ **
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ **
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ **
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ */
+
+#include "SkTypes.h"
+
+void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count);
+void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count);
diff --git a/src/opts/SkUtils_opts_none.cpp b/src/opts/SkUtils_opts_none.cpp
new file mode 100644
index 0000000..108ce9c
--- /dev/null
+++ b/src/opts/SkUtils_opts_none.cpp
@@ -0,0 +1,26 @@
+/*
+ **
+ ** Copyright 2009, The Android Open Source Project
+ **
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ **
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ **
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ */
+
+#include "SkUtils.h"
+
+SkMemset16Proc SkMemset16GetPlatformProc() {
+ return NULL;
+}
+
+SkMemset32Proc SkMemset32GetPlatformProc() {
+ return NULL;
+}
diff --git a/src/opts/opts_check_SSE2.cpp b/src/opts/opts_check_SSE2.cpp
new file mode 100644
index 0000000..4757ed8
--- /dev/null
+++ b/src/opts/opts_check_SSE2.cpp
@@ -0,0 +1,104 @@
+/*
+ **
+ ** Copyright 2009, The Android Open Source Project
+ **
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ **
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ **
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ */
+
+#include "SkBlitRow_opts_SSE2.h"
+#include "SkUtils_opts_SSE2.h"
+#include "SkUtils.h"
+
+/* This file must *not* be compiled with -msse or -msse2, otherwise
+ gcc may generate sse2 even for scalar ops (and thus give an invalid
+ instruction on Pentium3 on the code below). Only files named *_SSE2.cpp
+ in this directory should be compiled with -msse2. */
+
+#ifdef __x86_64__
+/* All x86_64 machines have SSE2, so don't even bother checking. */
+static inline bool hasSSE2() {
+ return true;
+}
+#else
+#ifdef _MSC_VER
+static inline void getcpuid(int info_type, int info[4]) {
+ __asm {
+ mov eax, [info_type]
+ cpuid
+ mov edi, [info]
+ mov [edi], eax
+ mov [edi+4], ebx
+ mov [edi+8], ecx
+ mov [edi+12], edx
+ }
+}
+#else
+static inline void getcpuid(int info_type, int info[4]) {
+ // We save and restore ebx, so this code can be compatible with -fPIC
+ asm volatile (
+ "pushl %%ebx \n\t"
+ "cpuid \n\t"
+ "movl %%ebx, %1 \n\t"
+ "popl %%ebx \n\t"
+ : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
+ : "a"(info_type)
+ :
+ );
+}
+#endif
+
+static inline bool hasSSE2() {
+ int cpu_info[4] = { 0 };
+ getcpuid(1, cpu_info);
+ return (cpu_info[3] & (1<<26)) != 0;
+}
+#endif
+
+static SkBlitRow::Proc32 platform_32_procs[] = {
+ NULL, // S32_Opaque,
+ S32_Blend_BlitRow32_SSE2, // S32_Blend,
+ S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
+ S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
+};
+
+SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
+ return NULL;
+}
+
+SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
+ return NULL;
+}
+
+SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
+ if (hasSSE2()) {
+ return platform_32_procs[flags];
+ } else {
+ return NULL;
+ }
+}
+
+SkMemset16Proc SkMemset16GetPlatformProc() {
+ if (hasSSE2()) {
+ return sk_memset16_SSE2;
+ } else {
+ return NULL;
+ }
+}
+
+SkMemset32Proc SkMemset32GetPlatformProc() {
+ if (hasSSE2()) {
+ return sk_memset32_SSE2;
+ } else {
+ return NULL;
+ }
+}
diff --git a/src/opts/opts_files.mk b/src/opts/opts_files.mk
index d756f68..ae8fd77 100644
--- a/src/opts/opts_files.mk
+++ b/src/opts/opts_files.mk
@@ -1,4 +1,4 @@
SOURCE := \
SkBlitRow_opts_none.cpp \
- SkBitmapProcState_opts_none.cpp
-
+ SkBitmapProcState_opts_none.cpp \
+ SkUtils_opts_none.cpp