blob: 6370058f0db7bfa4bab85af1e3604bca7651cfed [file] [log] [blame]
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00001/*
epoger@google.comec3ed6a2011-07-28 14:26:00 +00002 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00006 */
7
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +00008#include "SkBitmapProcState_opts_SSE2.h"
tomhudson@google.com95ad1552012-02-14 18:28:54 +00009#include "SkBitmapProcState_opts_SSSE3.h"
reed@google.com58af9a62011-10-12 13:43:52 +000010#include "SkBlitMask.h"
tomhudson@google.com8dd90a92012-03-19 13:49:50 +000011#include "SkBlitRow.h"
12#include "SkBlitRect_opts_SSE2.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000013#include "SkBlitRow_opts_SSE2.h"
14#include "SkUtils_opts_SSE2.h"
15#include "SkUtils.h"
16
tomhudson@google.comea854942012-05-17 15:09:17 +000017#if defined(_MSC_VER) && defined(_WIN64)
18#include <intrin.h>
19#endif
20
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000021/* This file must *not* be compiled with -msse or -msse2, otherwise
22 gcc may generate sse2 even for scalar ops (and thus give an invalid
23 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp
24 in this directory should be compiled with -msse2. */
25
tomhudson@google.com95ad1552012-02-14 18:28:54 +000026
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000027#ifdef _MSC_VER
28static inline void getcpuid(int info_type, int info[4]) {
tomhudson@google.comea854942012-05-17 15:09:17 +000029#if defined(_WIN64)
30 __cpuid(info, info_type);
31#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000032 __asm {
33 mov eax, [info_type]
34 cpuid
35 mov edi, [info]
36 mov [edi], eax
37 mov [edi+4], ebx
38 mov [edi+8], ecx
39 mov [edi+12], edx
40 }
tomhudson@google.comea854942012-05-17 15:09:17 +000041#endif
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000042}
43#else
tomhudson@google.com95ad1552012-02-14 18:28:54 +000044#if defined(__x86_64__)
45static inline void getcpuid(int info_type, int info[4]) {
46 asm volatile (
47 "cpuid \n\t"
48 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
49 : "a"(info_type)
50 );
51}
52#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000053static inline void getcpuid(int info_type, int info[4]) {
54 // We save and restore ebx, so this code can be compatible with -fPIC
55 asm volatile (
56 "pushl %%ebx \n\t"
57 "cpuid \n\t"
58 "movl %%ebx, %1 \n\t"
59 "popl %%ebx \n\t"
60 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
61 : "a"(info_type)
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000062 );
63}
64#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +000065#endif
66
reed@google.com70d1be52012-07-16 16:07:42 +000067#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
68/* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */
tomhudson@google.com95ad1552012-02-14 18:28:54 +000069static inline bool hasSSE2() {
70 return true;
71}
72#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000073
74static inline bool hasSSE2() {
75 int cpu_info[4] = { 0 };
76 getcpuid(1, cpu_info);
77 return (cpu_info[3] & (1<<26)) != 0;
78}
79#endif
80
reed@google.com70d1be52012-07-16 16:07:42 +000081#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
82/* If we know SSSE3 is supported at compile time, don't even bother checking. */
83static inline bool hasSSSE3() {
84 return true;
85}
86#else
87
tomhudson@google.com95ad1552012-02-14 18:28:54 +000088static inline bool hasSSSE3() {
89 int cpu_info[4] = { 0 };
90 getcpuid(1, cpu_info);
91 return (cpu_info[2] & 0x200) != 0;
92}
reed@google.com70d1be52012-07-16 16:07:42 +000093#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +000094
reed@google.comedb606c2011-10-18 13:56:50 +000095static bool cachedHasSSE2() {
96 static bool gHasSSE2 = hasSSE2();
97 return gHasSSE2;
98}
99
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000100static bool cachedHasSSSE3() {
101 static bool gHasSSSE3 = hasSSSE3();
102 return gHasSSSE3;
103}
104
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000105void SkBitmapProcState::platformProcs() {
tomhudson@google.com06a73132012-02-22 18:30:43 +0000106 if (cachedHasSSSE3()) {
tomhudson@google.comd4caa2b2012-05-16 14:19:46 +0000107#if !defined(SK_BUILD_FOR_ANDROID)
108 // Disable SSSE3 optimization for Android x86
tomhudson@google.com06a73132012-02-22 18:30:43 +0000109 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
110 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
111 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
112 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
113 }
tomhudson@google.comae29b882012-03-06 14:59:04 +0000114
115 if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
116 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
117 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
118 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
119 }
tomhudson@google.comd4caa2b2012-05-16 14:19:46 +0000120#endif
tomhudson@google.com06a73132012-02-22 18:30:43 +0000121 } else if (cachedHasSSE2()) {
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000122 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
123 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
senorblanco@chromium.orgf3f0bd72009-12-10 22:46:31 +0000124 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
125 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000126 }
reed@google.com78662282012-07-24 13:53:23 +0000127
128 if (fSampleProc16 == S32_D16_filter_DX) {
129 fSampleProc16 = S32_D16_filter_DX_SSE2;
130 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000131 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000132
133 if (cachedHasSSSE3() || cachedHasSSE2()) {
134 if (fMatrixProc == ClampX_ClampY_filter_scale) {
135 fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
136 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
137 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
138 }
tomhudson@google.com5efaf262012-02-28 15:41:49 +0000139
140 if (fMatrixProc == ClampX_ClampY_filter_affine) {
141 fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
142 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
143 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
144 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000145 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000146}
147
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000148static SkBlitRow::Proc32 platform_32_procs[] = {
149 NULL, // S32_Opaque,
150 S32_Blend_BlitRow32_SSE2, // S32_Blend,
151 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
152 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
153};
154
155SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
156 return NULL;
157}
158
159SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
reed@google.com7329dc92012-07-27 13:29:59 +0000160 return NULL;
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000161}
162
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000163SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000164 if (cachedHasSSE2()) {
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000165 return Color32_SSE2;
166 } else {
167 return NULL;
168 }
169}
170
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000171SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
reed@google.comedb606c2011-10-18 13:56:50 +0000172 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000173 return platform_32_procs[flags];
174 } else {
175 return NULL;
176 }
177}
178
reed@google.com981d4792011-03-09 12:55:47 +0000179
reed@google.come901b4c2011-11-14 21:56:45 +0000180SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
181 SkMask::Format maskFormat,
182 SkColor color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000183 if (SkMask::kA8_Format != maskFormat) {
184 return NULL;
185 }
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000186
reed@google.come901b4c2011-11-14 21:56:45 +0000187 ColorProc proc = NULL;
reed@google.comedb606c2011-10-18 13:56:50 +0000188 if (cachedHasSSE2()) {
reed@google.com981d4792011-03-09 12:55:47 +0000189 switch (dstConfig) {
190 case SkBitmap::kARGB_8888_Config:
reed@google.come6ea6062011-07-07 19:12:50 +0000191 // The SSE2 version is not (yet) faster for black, so we check
192 // for that.
193 if (SK_ColorBLACK != color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000194 proc = SkARGB32_A8_BlitMask_SSE2;
reed@google.come6ea6062011-07-07 19:12:50 +0000195 }
reed@google.com981d4792011-03-09 12:55:47 +0000196 break;
197 default:
reed@google.come901b4c2011-11-14 21:56:45 +0000198 break;
reed@google.com981d4792011-03-09 12:55:47 +0000199 }
200 }
201 return proc;
202}
203
tomhudson@google.comd6770e62012-02-14 16:01:15 +0000204SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
205 if (cachedHasSSE2()) {
206 if (isOpaque) {
207 return SkBlitLCD16OpaqueRow_SSE2;
208 } else {
209 return SkBlitLCD16Row_SSE2;
210 }
211 } else {
212 return NULL;
213 }
214
215}
reed@google.come901b4c2011-11-14 21:56:45 +0000216SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
reed@google.com1750bf12011-11-15 19:51:02 +0000217 SkMask::Format maskFormat,
218 RowFlags flags) {
reed@google.come901b4c2011-11-14 21:56:45 +0000219 return NULL;
220}
221
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000222SkMemset16Proc SkMemset16GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000223 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000224 return sk_memset16_SSE2;
225 } else {
226 return NULL;
227 }
228}
229
230SkMemset32Proc SkMemset32GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000231 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000232 return sk_memset32_SSE2;
233 } else {
234 return NULL;
235 }
236}
tomhudson@google.com8dd90a92012-03-19 13:49:50 +0000237
caryclark@google.com83ecdc32012-06-06 12:10:26 +0000238SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
239
tomhudson@google.com8dd90a92012-03-19 13:49:50 +0000240SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
241 if (cachedHasSSE2()) {
242 return ColorRect32_SSE2;
243 } else {
244 return NULL;
245 }
246}