blob: 8f0bdac8fd88b647bb88b3c1fca6f640526914b1 [file] [log] [blame]
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00001/*
epoger@google.comec3ed6a2011-07-28 14:26:00 +00002 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00006 */
7
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +00008#include "SkBitmapProcState_opts_SSE2.h"
tomhudson@google.com95ad1552012-02-14 18:28:54 +00009#include "SkBitmapProcState_opts_SSSE3.h"
humper@google.comb0889472013-07-09 21:37:14 +000010#include "SkBitmapFilter_opts_SSE2.h"
reed@google.com58af9a62011-10-12 13:43:52 +000011#include "SkBlitMask.h"
tomhudson@google.com8dd90a92012-03-19 13:49:50 +000012#include "SkBlitRow.h"
13#include "SkBlitRect_opts_SSE2.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000014#include "SkBlitRow_opts_SSE2.h"
15#include "SkUtils_opts_SSE2.h"
16#include "SkUtils.h"
17
humper@google.comb0889472013-07-09 21:37:14 +000018#include "SkRTConf.h"
19
tomhudson@google.comea854942012-05-17 15:09:17 +000020#if defined(_MSC_VER) && defined(_WIN64)
21#include <intrin.h>
22#endif
23
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000024/* This file must *not* be compiled with -msse or -msse2, otherwise
25 gcc may generate sse2 even for scalar ops (and thus give an invalid
26 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp
27 in this directory should be compiled with -msse2. */
28
tomhudson@google.com95ad1552012-02-14 18:28:54 +000029
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000030#ifdef _MSC_VER
31static inline void getcpuid(int info_type, int info[4]) {
tomhudson@google.comea854942012-05-17 15:09:17 +000032#if defined(_WIN64)
33 __cpuid(info, info_type);
34#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000035 __asm {
36 mov eax, [info_type]
37 cpuid
38 mov edi, [info]
39 mov [edi], eax
40 mov [edi+4], ebx
41 mov [edi+8], ecx
42 mov [edi+12], edx
43 }
tomhudson@google.comea854942012-05-17 15:09:17 +000044#endif
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000045}
46#else
tomhudson@google.com95ad1552012-02-14 18:28:54 +000047#if defined(__x86_64__)
48static inline void getcpuid(int info_type, int info[4]) {
49 asm volatile (
50 "cpuid \n\t"
51 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
52 : "a"(info_type)
53 );
54}
55#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000056static inline void getcpuid(int info_type, int info[4]) {
57 // We save and restore ebx, so this code can be compatible with -fPIC
58 asm volatile (
59 "pushl %%ebx \n\t"
60 "cpuid \n\t"
61 "movl %%ebx, %1 \n\t"
62 "popl %%ebx \n\t"
63 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
64 : "a"(info_type)
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000065 );
66}
67#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +000068#endif
69
reed@google.com70d1be52012-07-16 16:07:42 +000070#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
71/* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */
tomhudson@google.com95ad1552012-02-14 18:28:54 +000072static inline bool hasSSE2() {
73 return true;
74}
75#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000076
77static inline bool hasSSE2() {
78 int cpu_info[4] = { 0 };
79 getcpuid(1, cpu_info);
80 return (cpu_info[3] & (1<<26)) != 0;
81}
82#endif
83
reed@google.com70d1be52012-07-16 16:07:42 +000084#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
85/* If we know SSSE3 is supported at compile time, don't even bother checking. */
86static inline bool hasSSSE3() {
87 return true;
88}
89#else
90
tomhudson@google.com95ad1552012-02-14 18:28:54 +000091static inline bool hasSSSE3() {
92 int cpu_info[4] = { 0 };
93 getcpuid(1, cpu_info);
94 return (cpu_info[2] & 0x200) != 0;
95}
reed@google.com70d1be52012-07-16 16:07:42 +000096#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +000097
reed@google.comedb606c2011-10-18 13:56:50 +000098static bool cachedHasSSE2() {
99 static bool gHasSSE2 = hasSSE2();
100 return gHasSSE2;
101}
102
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000103static bool cachedHasSSSE3() {
104 static bool gHasSSSE3 = hasSSSE3();
105 return gHasSSSE3;
106}
107
humper@google.comb0889472013-07-09 21:37:14 +0000108SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
109
reed@google.comfed04b32013-09-05 20:31:17 +0000110void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
humper@google.com138ebc32013-07-19 20:20:04 +0000111 if (cachedHasSSE2()) {
reed@google.comfed04b32013-09-05 20:31:17 +0000112 procs->fExtraHorizontalReads = 3;
113 procs->fConvolveVertically = &convolveVertically_SSE2;
114 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
115 procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
116 procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
humper@google.com138ebc32013-07-19 20:20:04 +0000117 }
118}
119
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000120void SkBitmapProcState::platformProcs() {
tomhudson@google.com06a73132012-02-22 18:30:43 +0000121 if (cachedHasSSSE3()) {
122 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
123 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
124 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
125 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
126 }
tomhudson@google.comae29b882012-03-06 14:59:04 +0000127
128 if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
129 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
130 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
131 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
132 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000133 } else if (cachedHasSSE2()) {
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000134 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
135 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
senorblanco@chromium.orgf3f0bd72009-12-10 22:46:31 +0000136 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
137 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000138 }
reed@google.com78662282012-07-24 13:53:23 +0000139
140 if (fSampleProc16 == S32_D16_filter_DX) {
141 fSampleProc16 = S32_D16_filter_DX_SSE2;
142 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000143 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000144
145 if (cachedHasSSSE3() || cachedHasSSE2()) {
146 if (fMatrixProc == ClampX_ClampY_filter_scale) {
147 fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
148 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
149 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
150 }
tomhudson@google.com5efaf262012-02-28 15:41:49 +0000151
152 if (fMatrixProc == ClampX_ClampY_filter_affine) {
153 fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
154 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
155 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
156 }
humper@google.comb0889472013-07-09 21:37:14 +0000157 if (c_hqfilter_sse) {
mtklein@google.com0dc546c2013-08-26 16:21:35 +0000158 if (fShaderProc32 == highQualityFilter32) {
humper@google.comb0889472013-07-09 21:37:14 +0000159 fShaderProc32 = highQualityFilter_SSE2;
160 }
humper@google.comb0889472013-07-09 21:37:14 +0000161 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000162 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000163}
164
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000165static SkBlitRow::Proc32 platform_32_procs[] = {
166 NULL, // S32_Opaque,
167 S32_Blend_BlitRow32_SSE2, // S32_Blend,
168 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
169 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
170};
171
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000172SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
reed@google.com7329dc92012-07-27 13:29:59 +0000173 return NULL;
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000174}
175
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000176SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000177 if (cachedHasSSE2()) {
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000178 return Color32_SSE2;
179 } else {
180 return NULL;
181 }
182}
183
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000184SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
reed@google.comedb606c2011-10-18 13:56:50 +0000185 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000186 return platform_32_procs[flags];
187 } else {
188 return NULL;
189 }
190}
191
reed@google.com981d4792011-03-09 12:55:47 +0000192
reed@google.come901b4c2011-11-14 21:56:45 +0000193SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
194 SkMask::Format maskFormat,
195 SkColor color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000196 if (SkMask::kA8_Format != maskFormat) {
197 return NULL;
198 }
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000199
reed@google.come901b4c2011-11-14 21:56:45 +0000200 ColorProc proc = NULL;
reed@google.comedb606c2011-10-18 13:56:50 +0000201 if (cachedHasSSE2()) {
reed@google.com981d4792011-03-09 12:55:47 +0000202 switch (dstConfig) {
203 case SkBitmap::kARGB_8888_Config:
reed@google.come6ea6062011-07-07 19:12:50 +0000204 // The SSE2 version is not (yet) faster for black, so we check
205 // for that.
206 if (SK_ColorBLACK != color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000207 proc = SkARGB32_A8_BlitMask_SSE2;
reed@google.come6ea6062011-07-07 19:12:50 +0000208 }
reed@google.com981d4792011-03-09 12:55:47 +0000209 break;
210 default:
reed@google.come901b4c2011-11-14 21:56:45 +0000211 break;
reed@google.com981d4792011-03-09 12:55:47 +0000212 }
213 }
214 return proc;
215}
216
tomhudson@google.comd6770e62012-02-14 16:01:15 +0000217SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
218 if (cachedHasSSE2()) {
219 if (isOpaque) {
220 return SkBlitLCD16OpaqueRow_SSE2;
221 } else {
222 return SkBlitLCD16Row_SSE2;
223 }
224 } else {
225 return NULL;
226 }
227
228}
reed@google.come901b4c2011-11-14 21:56:45 +0000229SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
reed@google.com1750bf12011-11-15 19:51:02 +0000230 SkMask::Format maskFormat,
231 RowFlags flags) {
reed@google.come901b4c2011-11-14 21:56:45 +0000232 return NULL;
233}
234
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000235SkMemset16Proc SkMemset16GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000236 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000237 return sk_memset16_SSE2;
238 } else {
239 return NULL;
240 }
241}
242
243SkMemset32Proc SkMemset32GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000244 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000245 return sk_memset32_SSE2;
246 } else {
247 return NULL;
248 }
249}
tomhudson@google.com8dd90a92012-03-19 13:49:50 +0000250
caryclark@google.com83ecdc32012-06-06 12:10:26 +0000251SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
252
tomhudson@google.com8dd90a92012-03-19 13:49:50 +0000253SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
254 if (cachedHasSSE2()) {
255 return ColorRect32_SSE2;
256 } else {
257 return NULL;
258 }
259}