blob: e7677220cb9326480e4fea631e2249398547e1a9 [file] [log] [blame]
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00001/*
epoger@google.comec3ed6a2011-07-28 14:26:00 +00002 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00006 */
7
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +00008#include "SkBitmapProcState_opts_SSE2.h"
tomhudson@google.com95ad1552012-02-14 18:28:54 +00009#include "SkBitmapProcState_opts_SSSE3.h"
humper@google.comb0889472013-07-09 21:37:14 +000010#include "SkBitmapFilter_opts_SSE2.h"
reed@google.com58af9a62011-10-12 13:43:52 +000011#include "SkBlitMask.h"
tomhudson@google.com8dd90a92012-03-19 13:49:50 +000012#include "SkBlitRow.h"
13#include "SkBlitRect_opts_SSE2.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000014#include "SkBlitRow_opts_SSE2.h"
senorblanco@chromium.org27eec462013-11-08 20:49:04 +000015#include "SkBlurImage_opts_SSE2.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000016#include "SkUtils_opts_SSE2.h"
17#include "SkUtils.h"
senorblanco@chromium.org7a47ad32013-10-30 21:57:04 +000018#include "SkMorphology_opts.h"
19#include "SkMorphology_opts_SSE2.h"
commit-bot@chromium.orgc524e982014-04-09 15:43:46 +000020#include "SkXfermode.h"
21#include "SkXfermode_proccoeff.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000022
humper@google.comb0889472013-07-09 21:37:14 +000023#include "SkRTConf.h"
24
tomhudson@google.comea854942012-05-17 15:09:17 +000025#if defined(_MSC_VER) && defined(_WIN64)
26#include <intrin.h>
27#endif
28
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000029/* This file must *not* be compiled with -msse or -msse2, otherwise
30 gcc may generate sse2 even for scalar ops (and thus give an invalid
31 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp
32 in this directory should be compiled with -msse2. */
33
tomhudson@google.com95ad1552012-02-14 18:28:54 +000034
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000035#ifdef _MSC_VER
36static inline void getcpuid(int info_type, int info[4]) {
tomhudson@google.comea854942012-05-17 15:09:17 +000037#if defined(_WIN64)
38 __cpuid(info, info_type);
39#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000040 __asm {
41 mov eax, [info_type]
42 cpuid
43 mov edi, [info]
44 mov [edi], eax
45 mov [edi+4], ebx
46 mov [edi+8], ecx
47 mov [edi+12], edx
48 }
tomhudson@google.comea854942012-05-17 15:09:17 +000049#endif
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000050}
51#else
tomhudson@google.com95ad1552012-02-14 18:28:54 +000052#if defined(__x86_64__)
53static inline void getcpuid(int info_type, int info[4]) {
54 asm volatile (
55 "cpuid \n\t"
56 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
57 : "a"(info_type)
58 );
59}
60#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000061static inline void getcpuid(int info_type, int info[4]) {
62 // We save and restore ebx, so this code can be compatible with -fPIC
63 asm volatile (
64 "pushl %%ebx \n\t"
65 "cpuid \n\t"
66 "movl %%ebx, %1 \n\t"
67 "popl %%ebx \n\t"
68 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
69 : "a"(info_type)
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000070 );
71}
72#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +000073#endif
74
reed@google.com70d1be52012-07-16 16:07:42 +000075#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
76/* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */
tomhudson@google.com95ad1552012-02-14 18:28:54 +000077static inline bool hasSSE2() {
78 return true;
79}
80#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000081
82static inline bool hasSSE2() {
83 int cpu_info[4] = { 0 };
84 getcpuid(1, cpu_info);
85 return (cpu_info[3] & (1<<26)) != 0;
86}
87#endif
88
reed@google.com70d1be52012-07-16 16:07:42 +000089#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
90/* If we know SSSE3 is supported at compile time, don't even bother checking. */
91static inline bool hasSSSE3() {
92 return true;
93}
94#else
95
tomhudson@google.com95ad1552012-02-14 18:28:54 +000096static inline bool hasSSSE3() {
97 int cpu_info[4] = { 0 };
98 getcpuid(1, cpu_info);
99 return (cpu_info[2] & 0x200) != 0;
100}
reed@google.com70d1be52012-07-16 16:07:42 +0000101#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000102
reed@google.comedb606c2011-10-18 13:56:50 +0000103static bool cachedHasSSE2() {
104 static bool gHasSSE2 = hasSSE2();
105 return gHasSSE2;
106}
107
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000108static bool cachedHasSSSE3() {
109 static bool gHasSSSE3 = hasSSSE3();
110 return gHasSSSE3;
111}
112
humper@google.comb0889472013-07-09 21:37:14 +0000113SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
114
reed@google.comfed04b32013-09-05 20:31:17 +0000115void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
humper@google.com138ebc32013-07-19 20:20:04 +0000116 if (cachedHasSSE2()) {
reed@google.comfed04b32013-09-05 20:31:17 +0000117 procs->fExtraHorizontalReads = 3;
118 procs->fConvolveVertically = &convolveVertically_SSE2;
119 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
120 procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
121 procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
humper@google.com138ebc32013-07-19 20:20:04 +0000122 }
123}
124
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000125void SkBitmapProcState::platformProcs() {
tomhudson@google.com06a73132012-02-22 18:30:43 +0000126 if (cachedHasSSSE3()) {
127 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
128 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
129 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
130 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
131 }
tomhudson@google.comae29b882012-03-06 14:59:04 +0000132
133 if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
134 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
135 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
136 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
137 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000138 } else if (cachedHasSSE2()) {
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000139 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
140 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
senorblanco@chromium.orgf3f0bd72009-12-10 22:46:31 +0000141 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
142 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000143 }
reed@google.com78662282012-07-24 13:53:23 +0000144
145 if (fSampleProc16 == S32_D16_filter_DX) {
146 fSampleProc16 = S32_D16_filter_DX_SSE2;
147 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000148 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000149
150 if (cachedHasSSSE3() || cachedHasSSE2()) {
151 if (fMatrixProc == ClampX_ClampY_filter_scale) {
152 fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
153 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
154 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
155 }
tomhudson@google.com5efaf262012-02-28 15:41:49 +0000156
157 if (fMatrixProc == ClampX_ClampY_filter_affine) {
158 fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
159 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
160 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
161 }
humper@google.comb0889472013-07-09 21:37:14 +0000162 if (c_hqfilter_sse) {
mtklein@google.com0dc546c2013-08-26 16:21:35 +0000163 if (fShaderProc32 == highQualityFilter32) {
humper@google.comb0889472013-07-09 21:37:14 +0000164 fShaderProc32 = highQualityFilter_SSE2;
165 }
humper@google.comb0889472013-07-09 21:37:14 +0000166 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000167 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000168}
169
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000170static SkBlitRow::Proc platform_16_procs[] = {
commit-bot@chromium.org39ce33a2014-02-24 04:23:39 +0000171 S32_D565_Opaque_SSE2, // S32_D565_Opaque
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000172 NULL, // S32_D565_Blend
173 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque
174 NULL, // S32A_D565_Blend
commit-bot@chromium.org27580472014-03-07 03:25:32 +0000175 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000176 NULL, // S32_D565_Blend_Dither
commit-bot@chromium.orgfe089b32014-03-07 13:24:42 +0000177 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000178 NULL, // S32A_D565_Blend_Dither
179};
180
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000181static SkBlitRow::Proc32 platform_32_procs[] = {
182 NULL, // S32_Opaque,
183 S32_Blend_BlitRow32_SSE2, // S32_Blend,
184 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
185 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
186};
187
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000188SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000189 if (cachedHasSSE2()) {
190 return platform_16_procs[flags];
191 } else {
192 return NULL;
193 }
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000194}
195
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000196SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000197 if (cachedHasSSE2()) {
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000198 return Color32_SSE2;
199 } else {
200 return NULL;
201 }
202}
203
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000204SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
reed@google.comedb606c2011-10-18 13:56:50 +0000205 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000206 return platform_32_procs[flags];
207 } else {
208 return NULL;
209 }
210}
211
reed@google.com981d4792011-03-09 12:55:47 +0000212
reed@google.come901b4c2011-11-14 21:56:45 +0000213SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
214 SkMask::Format maskFormat,
215 SkColor color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000216 if (SkMask::kA8_Format != maskFormat) {
217 return NULL;
218 }
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000219
reed@google.come901b4c2011-11-14 21:56:45 +0000220 ColorProc proc = NULL;
reed@google.comedb606c2011-10-18 13:56:50 +0000221 if (cachedHasSSE2()) {
reed@google.com981d4792011-03-09 12:55:47 +0000222 switch (dstConfig) {
223 case SkBitmap::kARGB_8888_Config:
reed@google.come6ea6062011-07-07 19:12:50 +0000224 // The SSE2 version is not (yet) faster for black, so we check
225 // for that.
226 if (SK_ColorBLACK != color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000227 proc = SkARGB32_A8_BlitMask_SSE2;
reed@google.come6ea6062011-07-07 19:12:50 +0000228 }
reed@google.com981d4792011-03-09 12:55:47 +0000229 break;
230 default:
reed@google.come901b4c2011-11-14 21:56:45 +0000231 break;
reed@google.com981d4792011-03-09 12:55:47 +0000232 }
233 }
234 return proc;
235}
236
tomhudson@google.comd6770e62012-02-14 16:01:15 +0000237SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
238 if (cachedHasSSE2()) {
239 if (isOpaque) {
240 return SkBlitLCD16OpaqueRow_SSE2;
241 } else {
242 return SkBlitLCD16Row_SSE2;
243 }
244 } else {
245 return NULL;
246 }
247
248}
reed@google.come901b4c2011-11-14 21:56:45 +0000249SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
reed@google.com1750bf12011-11-15 19:51:02 +0000250 SkMask::Format maskFormat,
251 RowFlags flags) {
reed@google.come901b4c2011-11-14 21:56:45 +0000252 return NULL;
253}
254
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000255SkMemset16Proc SkMemset16GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000256 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000257 return sk_memset16_SSE2;
258 } else {
259 return NULL;
260 }
261}
262
263SkMemset32Proc SkMemset32GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000264 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000265 return sk_memset32_SSE2;
266 } else {
267 return NULL;
268 }
269}
tomhudson@google.com8dd90a92012-03-19 13:49:50 +0000270
senorblanco@chromium.org0ded88d2014-01-24 15:43:50 +0000271SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
senorblanco@chromium.org7a47ad32013-10-30 21:57:04 +0000272 if (!cachedHasSSE2()) {
273 return NULL;
274 }
275 switch (type) {
276 case kDilateX_SkMorphologyProcType:
277 return SkDilateX_SSE2;
278 case kDilateY_SkMorphologyProcType:
279 return SkDilateY_SSE2;
280 case kErodeX_SkMorphologyProcType:
281 return SkErodeX_SSE2;
282 case kErodeY_SkMorphologyProcType:
283 return SkErodeY_SSE2;
284 default:
285 return NULL;
286 }
287}
288
senorblanco@chromium.org27eec462013-11-08 20:49:04 +0000289bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
290 SkBoxBlurProc* boxBlurY,
senorblanco@chromium.org05edd022013-11-11 20:12:34 +0000291 SkBoxBlurProc* boxBlurXY,
292 SkBoxBlurProc* boxBlurYX) {
senorblanco@chromium.org27eec462013-11-08 20:49:04 +0000293#ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
294 return false;
295#else
296 if (!cachedHasSSE2()) {
297 return false;
298 }
senorblanco@chromium.org05edd022013-11-11 20:12:34 +0000299 return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
senorblanco@chromium.org27eec462013-11-08 20:49:04 +0000300#endif
301}
302
caryclark@google.com83ecdc32012-06-06 12:10:26 +0000303SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
304
tomhudson@google.com8dd90a92012-03-19 13:49:50 +0000305SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
306 if (cachedHasSSE2()) {
307 return ColorRect32_SSE2;
308 } else {
309 return NULL;
310 }
311}
commit-bot@chromium.orgc524e982014-04-09 15:43:46 +0000312
313extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
314 SkXfermode::Mode mode);
315
316SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
317 SkXfermode::Mode mode);
318
319SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
320 SkXfermode::Mode mode) {
321 return NULL;
322}
323
324SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
325 SkXfermode::Mode mode);
326
327SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
328 SkXfermode::Mode mode) {
329 if (cachedHasSSE2()) {
330 return SkPlatformXfermodeFactory_impl_SSE2(rec, mode);
331 } else {
332 return SkPlatformXfermodeFactory_impl(rec, mode);
333 }
334}
335
336SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode);
337
338SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) {
339 return NULL;
340}