blob: cc5adf1e2af856720bacc0d2f3f06f982792d2e7 [file] [log] [blame]
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00001/*
epoger@google.comec3ed6a2011-07-28 14:26:00 +00002 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00006 */
7
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +00008#include "SkBitmapProcState_opts_SSE2.h"
tomhudson@google.com95ad1552012-02-14 18:28:54 +00009#include "SkBitmapProcState_opts_SSSE3.h"
humper@google.comb0889472013-07-09 21:37:14 +000010#include "SkBitmapFilter_opts_SSE2.h"
reed@google.com58af9a62011-10-12 13:43:52 +000011#include "SkBlitMask.h"
tomhudson@google.com8dd90a92012-03-19 13:49:50 +000012#include "SkBlitRow.h"
13#include "SkBlitRect_opts_SSE2.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000014#include "SkBlitRow_opts_SSE2.h"
senorblanco@chromium.org27eec462013-11-08 20:49:04 +000015#include "SkBlurImage_opts_SSE2.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000016#include "SkUtils_opts_SSE2.h"
17#include "SkUtils.h"
senorblanco@chromium.org7a47ad32013-10-30 21:57:04 +000018#include "SkMorphology_opts.h"
19#include "SkMorphology_opts_SSE2.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000020
humper@google.comb0889472013-07-09 21:37:14 +000021#include "SkRTConf.h"
22
tomhudson@google.comea854942012-05-17 15:09:17 +000023#if defined(_MSC_VER) && defined(_WIN64)
24#include <intrin.h>
25#endif
26
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000027/* This file must *not* be compiled with -msse or -msse2, otherwise
28 gcc may generate sse2 even for scalar ops (and thus give an invalid
29 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp
30 in this directory should be compiled with -msse2. */
31
tomhudson@google.com95ad1552012-02-14 18:28:54 +000032
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000033#ifdef _MSC_VER
34static inline void getcpuid(int info_type, int info[4]) {
tomhudson@google.comea854942012-05-17 15:09:17 +000035#if defined(_WIN64)
36 __cpuid(info, info_type);
37#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000038 __asm {
39 mov eax, [info_type]
40 cpuid
41 mov edi, [info]
42 mov [edi], eax
43 mov [edi+4], ebx
44 mov [edi+8], ecx
45 mov [edi+12], edx
46 }
tomhudson@google.comea854942012-05-17 15:09:17 +000047#endif
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000048}
49#else
tomhudson@google.com95ad1552012-02-14 18:28:54 +000050#if defined(__x86_64__)
51static inline void getcpuid(int info_type, int info[4]) {
52 asm volatile (
53 "cpuid \n\t"
54 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
55 : "a"(info_type)
56 );
57}
58#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000059static inline void getcpuid(int info_type, int info[4]) {
60 // We save and restore ebx, so this code can be compatible with -fPIC
61 asm volatile (
62 "pushl %%ebx \n\t"
63 "cpuid \n\t"
64 "movl %%ebx, %1 \n\t"
65 "popl %%ebx \n\t"
66 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
67 : "a"(info_type)
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000068 );
69}
70#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +000071#endif
72
reed@google.com70d1be52012-07-16 16:07:42 +000073#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
74/* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */
tomhudson@google.com95ad1552012-02-14 18:28:54 +000075static inline bool hasSSE2() {
76 return true;
77}
78#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000079
80static inline bool hasSSE2() {
81 int cpu_info[4] = { 0 };
82 getcpuid(1, cpu_info);
83 return (cpu_info[3] & (1<<26)) != 0;
84}
85#endif
86
reed@google.com70d1be52012-07-16 16:07:42 +000087#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
88/* If we know SSSE3 is supported at compile time, don't even bother checking. */
89static inline bool hasSSSE3() {
90 return true;
91}
92#else
93
tomhudson@google.com95ad1552012-02-14 18:28:54 +000094static inline bool hasSSSE3() {
95 int cpu_info[4] = { 0 };
96 getcpuid(1, cpu_info);
97 return (cpu_info[2] & 0x200) != 0;
98}
reed@google.com70d1be52012-07-16 16:07:42 +000099#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000100
reed@google.comedb606c2011-10-18 13:56:50 +0000101static bool cachedHasSSE2() {
102 static bool gHasSSE2 = hasSSE2();
103 return gHasSSE2;
104}
105
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000106static bool cachedHasSSSE3() {
107 static bool gHasSSSE3 = hasSSSE3();
108 return gHasSSSE3;
109}
110
humper@google.comb0889472013-07-09 21:37:14 +0000111SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
112
reed@google.comfed04b32013-09-05 20:31:17 +0000113void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
humper@google.com138ebc32013-07-19 20:20:04 +0000114 if (cachedHasSSE2()) {
reed@google.comfed04b32013-09-05 20:31:17 +0000115 procs->fExtraHorizontalReads = 3;
116 procs->fConvolveVertically = &convolveVertically_SSE2;
117 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
118 procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
119 procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
humper@google.com138ebc32013-07-19 20:20:04 +0000120 }
121}
122
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000123void SkBitmapProcState::platformProcs() {
tomhudson@google.com06a73132012-02-22 18:30:43 +0000124 if (cachedHasSSSE3()) {
125 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
126 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
127 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
128 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
129 }
tomhudson@google.comae29b882012-03-06 14:59:04 +0000130
131 if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
132 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
133 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
134 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
135 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000136 } else if (cachedHasSSE2()) {
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000137 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
138 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
senorblanco@chromium.orgf3f0bd72009-12-10 22:46:31 +0000139 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
140 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000141 }
reed@google.com78662282012-07-24 13:53:23 +0000142
143 if (fSampleProc16 == S32_D16_filter_DX) {
144 fSampleProc16 = S32_D16_filter_DX_SSE2;
145 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000146 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000147
148 if (cachedHasSSSE3() || cachedHasSSE2()) {
149 if (fMatrixProc == ClampX_ClampY_filter_scale) {
150 fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
151 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
152 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
153 }
tomhudson@google.com5efaf262012-02-28 15:41:49 +0000154
155 if (fMatrixProc == ClampX_ClampY_filter_affine) {
156 fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
157 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
158 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
159 }
humper@google.comb0889472013-07-09 21:37:14 +0000160 if (c_hqfilter_sse) {
mtklein@google.com0dc546c2013-08-26 16:21:35 +0000161 if (fShaderProc32 == highQualityFilter32) {
humper@google.comb0889472013-07-09 21:37:14 +0000162 fShaderProc32 = highQualityFilter_SSE2;
163 }
humper@google.comb0889472013-07-09 21:37:14 +0000164 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000165 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000166}
167
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000168static SkBlitRow::Proc platform_16_procs[] = {
169 NULL, // S32_D565_Opaque
170 NULL, // S32_D565_Blend
171 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque
172 NULL, // S32A_D565_Blend
173 NULL, // S32_D565_Opaque_Dither
174 NULL, // S32_D565_Blend_Dither
175 NULL, // S32A_D565_Opaque_Dither
176 NULL, // S32A_D565_Blend_Dither
177};
178
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000179static SkBlitRow::Proc32 platform_32_procs[] = {
180 NULL, // S32_Opaque,
181 S32_Blend_BlitRow32_SSE2, // S32_Blend,
182 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
183 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
184};
185
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000186SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000187 if (cachedHasSSE2()) {
188 return platform_16_procs[flags];
189 } else {
190 return NULL;
191 }
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000192}
193
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000194SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000195 if (cachedHasSSE2()) {
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000196 return Color32_SSE2;
197 } else {
198 return NULL;
199 }
200}
201
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000202SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
reed@google.comedb606c2011-10-18 13:56:50 +0000203 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000204 return platform_32_procs[flags];
205 } else {
206 return NULL;
207 }
208}
209
reed@google.com981d4792011-03-09 12:55:47 +0000210
reed@google.come901b4c2011-11-14 21:56:45 +0000211SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
212 SkMask::Format maskFormat,
213 SkColor color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000214 if (SkMask::kA8_Format != maskFormat) {
215 return NULL;
216 }
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000217
reed@google.come901b4c2011-11-14 21:56:45 +0000218 ColorProc proc = NULL;
reed@google.comedb606c2011-10-18 13:56:50 +0000219 if (cachedHasSSE2()) {
reed@google.com981d4792011-03-09 12:55:47 +0000220 switch (dstConfig) {
221 case SkBitmap::kARGB_8888_Config:
reed@google.come6ea6062011-07-07 19:12:50 +0000222 // The SSE2 version is not (yet) faster for black, so we check
223 // for that.
224 if (SK_ColorBLACK != color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000225 proc = SkARGB32_A8_BlitMask_SSE2;
reed@google.come6ea6062011-07-07 19:12:50 +0000226 }
reed@google.com981d4792011-03-09 12:55:47 +0000227 break;
228 default:
reed@google.come901b4c2011-11-14 21:56:45 +0000229 break;
reed@google.com981d4792011-03-09 12:55:47 +0000230 }
231 }
232 return proc;
233}
234
tomhudson@google.comd6770e62012-02-14 16:01:15 +0000235SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
236 if (cachedHasSSE2()) {
237 if (isOpaque) {
238 return SkBlitLCD16OpaqueRow_SSE2;
239 } else {
240 return SkBlitLCD16Row_SSE2;
241 }
242 } else {
243 return NULL;
244 }
245
246}
reed@google.come901b4c2011-11-14 21:56:45 +0000247SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
reed@google.com1750bf12011-11-15 19:51:02 +0000248 SkMask::Format maskFormat,
249 RowFlags flags) {
reed@google.come901b4c2011-11-14 21:56:45 +0000250 return NULL;
251}
252
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000253SkMemset16Proc SkMemset16GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000254 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000255 return sk_memset16_SSE2;
256 } else {
257 return NULL;
258 }
259}
260
261SkMemset32Proc SkMemset32GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000262 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000263 return sk_memset32_SSE2;
264 } else {
265 return NULL;
266 }
267}
tomhudson@google.com8dd90a92012-03-19 13:49:50 +0000268
senorblanco@chromium.org0ded88d2014-01-24 15:43:50 +0000269SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
senorblanco@chromium.org7a47ad32013-10-30 21:57:04 +0000270 if (!cachedHasSSE2()) {
271 return NULL;
272 }
273 switch (type) {
274 case kDilateX_SkMorphologyProcType:
275 return SkDilateX_SSE2;
276 case kDilateY_SkMorphologyProcType:
277 return SkDilateY_SSE2;
278 case kErodeX_SkMorphologyProcType:
279 return SkErodeX_SSE2;
280 case kErodeY_SkMorphologyProcType:
281 return SkErodeY_SSE2;
282 default:
283 return NULL;
284 }
285}
286
senorblanco@chromium.org27eec462013-11-08 20:49:04 +0000287bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
288 SkBoxBlurProc* boxBlurY,
senorblanco@chromium.org05edd022013-11-11 20:12:34 +0000289 SkBoxBlurProc* boxBlurXY,
290 SkBoxBlurProc* boxBlurYX) {
senorblanco@chromium.org27eec462013-11-08 20:49:04 +0000291#ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
292 return false;
293#else
294 if (!cachedHasSSE2()) {
295 return false;
296 }
senorblanco@chromium.org05edd022013-11-11 20:12:34 +0000297 return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
senorblanco@chromium.org27eec462013-11-08 20:49:04 +0000298#endif
299}
300
caryclark@google.com83ecdc32012-06-06 12:10:26 +0000301SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
302
tomhudson@google.com8dd90a92012-03-19 13:49:50 +0000303SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
304 if (cachedHasSSE2()) {
305 return ColorRect32_SSE2;
306 } else {
307 return NULL;
308 }
309}