blob: 0b0debb2708a80bd00cdc8e68b377917935f508b [file] [log] [blame]
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00001/*
epoger@google.comec3ed6a2011-07-28 14:26:00 +00002 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00006 */
7
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +00008#include "SkBitmapFilter_opts_SSE2.h"
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +00009#include "SkBitmapProcState_opts_SSE2.h"
tomhudson@google.com95ad1552012-02-14 18:28:54 +000010#include "SkBitmapProcState_opts_SSSE3.h"
reed@google.com58af9a62011-10-12 13:43:52 +000011#include "SkBlitMask.h"
tomhudson@google.com8dd90a92012-03-19 13:49:50 +000012#include "SkBlitRect_opts_SSE2.h"
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +000013#include "SkBlitRow.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000014#include "SkBlitRow_opts_SSE2.h"
senorblanco@chromium.org27eec462013-11-08 20:49:04 +000015#include "SkBlurImage_opts_SSE2.h"
senorblanco@chromium.org7a47ad32013-10-30 21:57:04 +000016#include "SkMorphology_opts.h"
17#include "SkMorphology_opts_SSE2.h"
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +000018#include "SkRTConf.h"
19#include "SkUtils.h"
20#include "SkUtils_opts_SSE2.h"
commit-bot@chromium.orgc524e982014-04-09 15:43:46 +000021#include "SkXfermode.h"
22#include "SkXfermode_proccoeff.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000023
tomhudson@google.comea854942012-05-17 15:09:17 +000024#if defined(_MSC_VER) && defined(_WIN64)
25#include <intrin.h>
26#endif
27
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000028/* This file must *not* be compiled with -msse or -msse2, otherwise
29 gcc may generate sse2 even for scalar ops (and thus give an invalid
30 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp
31 in this directory should be compiled with -msse2. */
32
tomhudson@google.com95ad1552012-02-14 18:28:54 +000033
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +000034/* Function to get the CPU SSE-level in runtime, for different compilers. */
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000035#ifdef _MSC_VER
36static inline void getcpuid(int info_type, int info[4]) {
tomhudson@google.comea854942012-05-17 15:09:17 +000037#if defined(_WIN64)
38 __cpuid(info, info_type);
39#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000040 __asm {
41 mov eax, [info_type]
42 cpuid
43 mov edi, [info]
44 mov [edi], eax
45 mov [edi+4], ebx
46 mov [edi+8], ecx
47 mov [edi+12], edx
48 }
tomhudson@google.comea854942012-05-17 15:09:17 +000049#endif
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000050}
51#else
tomhudson@google.com95ad1552012-02-14 18:28:54 +000052#if defined(__x86_64__)
53static inline void getcpuid(int info_type, int info[4]) {
54 asm volatile (
55 "cpuid \n\t"
56 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
57 : "a"(info_type)
58 );
59}
60#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000061static inline void getcpuid(int info_type, int info[4]) {
62 // We save and restore ebx, so this code can be compatible with -fPIC
63 asm volatile (
64 "pushl %%ebx \n\t"
65 "cpuid \n\t"
66 "movl %%ebx, %1 \n\t"
67 "popl %%ebx \n\t"
68 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
69 : "a"(info_type)
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000070 );
71}
72#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +000073#endif
74
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +000075////////////////////////////////////////////////////////////////////////////////
76
reed@google.com70d1be52012-07-16 16:07:42 +000077#if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
78/* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */
tomhudson@google.com95ad1552012-02-14 18:28:54 +000079static inline bool hasSSE2() {
80 return true;
81}
82#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000083
84static inline bool hasSSE2() {
85 int cpu_info[4] = { 0 };
86 getcpuid(1, cpu_info);
87 return (cpu_info[3] & (1<<26)) != 0;
88}
89#endif
90
reed@google.com70d1be52012-07-16 16:07:42 +000091#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
92/* If we know SSSE3 is supported at compile time, don't even bother checking. */
93static inline bool hasSSSE3() {
94 return true;
95}
commit-bot@chromium.orgcedbc732014-04-23 22:53:02 +000096#elif defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
97/* For the Android framework we should always know at compile time if the device
98 * we are building for supports SSSE3. The one exception to this rule is on the
99 * emulator where we are compiled without the -msse3 option (so we have no SSSE3
100 * procs) but can be run on a host machine that supports SSSE3 instructions. So
101 * for that particular case we disable our SSSE3 options.
102 */
103static inline bool hasSSSE3() {
104 return false;
105}
reed@google.com70d1be52012-07-16 16:07:42 +0000106#else
107
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000108static inline bool hasSSSE3() {
109 int cpu_info[4] = { 0 };
110 getcpuid(1, cpu_info);
111 return (cpu_info[2] & 0x200) != 0;
112}
reed@google.com70d1be52012-07-16 16:07:42 +0000113#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000114
reed@google.comedb606c2011-10-18 13:56:50 +0000115static bool cachedHasSSE2() {
116 static bool gHasSSE2 = hasSSE2();
117 return gHasSSE2;
118}
119
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000120static bool cachedHasSSSE3() {
121 static bool gHasSSSE3 = hasSSSE3();
122 return gHasSSSE3;
123}
124
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000125////////////////////////////////////////////////////////////////////////////////
126
humper@google.comb0889472013-07-09 21:37:14 +0000127SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
128
reed@google.comfed04b32013-09-05 20:31:17 +0000129void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
humper@google.com138ebc32013-07-19 20:20:04 +0000130 if (cachedHasSSE2()) {
reed@google.comfed04b32013-09-05 20:31:17 +0000131 procs->fExtraHorizontalReads = 3;
132 procs->fConvolveVertically = &convolveVertically_SSE2;
133 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
134 procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
135 procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
humper@google.com138ebc32013-07-19 20:20:04 +0000136 }
137}
138
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000139////////////////////////////////////////////////////////////////////////////////
140
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000141void SkBitmapProcState::platformProcs() {
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000142 /* Every optimization in the function requires at least SSE2 */
143 if (!cachedHasSSE2()) {
144 return;
145 }
commit-bot@chromium.orgc398f712014-04-23 20:07:19 +0000146
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000147 /* Check fSampleProc32 */
148 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
149 if (cachedHasSSSE3()) {
150 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
151 } else {
commit-bot@chromium.orgc398f712014-04-23 20:07:19 +0000152 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000153 }
154 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
155 if (cachedHasSSSE3()) {
156 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
157 }
158 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
159 if (cachedHasSSSE3()) {
160 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
161 } else {
commit-bot@chromium.orgc398f712014-04-23 20:07:19 +0000162 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
163 }
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000164 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
165 if (cachedHasSSSE3()) {
166 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
commit-bot@chromium.orgc398f712014-04-23 20:07:19 +0000167 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000168 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000169
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000170 /* Check fSampleProc16 */
171 if (fSampleProc16 == S32_D16_filter_DX) {
172 fSampleProc16 = S32_D16_filter_DX_SSE2;
173 }
tomhudson@google.com5efaf262012-02-28 15:41:49 +0000174
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000175 /* Check fMatrixProc */
176 if (fMatrixProc == ClampX_ClampY_filter_scale) {
177 fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
178 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
179 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
180 } else if (fMatrixProc == ClampX_ClampY_filter_affine) {
181 fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
182 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
183 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
184 }
185
186 /* Check fShaderProc32 */
187 if (c_hqfilter_sse) {
188 if (fShaderProc32 == highQualityFilter32) {
189 fShaderProc32 = highQualityFilter_SSE2;
humper@google.comb0889472013-07-09 21:37:14 +0000190 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000191 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000192}
193
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000194////////////////////////////////////////////////////////////////////////////////
195
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000196static SkBlitRow::Proc platform_16_procs[] = {
commit-bot@chromium.org39ce33a2014-02-24 04:23:39 +0000197 S32_D565_Opaque_SSE2, // S32_D565_Opaque
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000198 NULL, // S32_D565_Blend
199 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque
200 NULL, // S32A_D565_Blend
commit-bot@chromium.org27580472014-03-07 03:25:32 +0000201 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000202 NULL, // S32_D565_Blend_Dither
commit-bot@chromium.orgfe089b32014-03-07 13:24:42 +0000203 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000204 NULL, // S32A_D565_Blend_Dither
205};
206
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000207SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
208 if (cachedHasSSE2()) {
209 return platform_16_procs[flags];
210 } else {
211 return NULL;
212 }
213}
214
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000215static SkBlitRow::Proc32 platform_32_procs[] = {
216 NULL, // S32_Opaque,
217 S32_Blend_BlitRow32_SSE2, // S32_Blend,
218 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
219 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
220};
221
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000222SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000223 if (cachedHasSSE2()) {
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000224 return platform_32_procs[flags];
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000225 } else {
226 return NULL;
227 }
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000228}
229
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000230SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000231 if (cachedHasSSE2()) {
senorblanco@chromium.orgc3856382010-12-13 15:27:20 +0000232 return Color32_SSE2;
233 } else {
234 return NULL;
235 }
236}
237
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000238SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
239
240SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
241/* Return NULL for now, since the optimized path in ColorRect32_SSE2 is disabled.
reed@google.comedb606c2011-10-18 13:56:50 +0000242 if (cachedHasSSE2()) {
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000243 return ColorRect32_SSE2;
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000244 } else {
245 return NULL;
246 }
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000247*/
248 return NULL;
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000249}
250
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000251////////////////////////////////////////////////////////////////////////////////
reed@google.com981d4792011-03-09 12:55:47 +0000252
reed@google.come901b4c2011-11-14 21:56:45 +0000253SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
254 SkMask::Format maskFormat,
255 SkColor color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000256 if (SkMask::kA8_Format != maskFormat) {
257 return NULL;
258 }
rmistry@google.comfbfcd562012-08-23 18:09:54 +0000259
reed@google.come901b4c2011-11-14 21:56:45 +0000260 ColorProc proc = NULL;
reed@google.comedb606c2011-10-18 13:56:50 +0000261 if (cachedHasSSE2()) {
reed@google.com981d4792011-03-09 12:55:47 +0000262 switch (dstConfig) {
263 case SkBitmap::kARGB_8888_Config:
reed@google.come6ea6062011-07-07 19:12:50 +0000264 // The SSE2 version is not (yet) faster for black, so we check
265 // for that.
266 if (SK_ColorBLACK != color) {
reed@google.comedb606c2011-10-18 13:56:50 +0000267 proc = SkARGB32_A8_BlitMask_SSE2;
reed@google.come6ea6062011-07-07 19:12:50 +0000268 }
reed@google.com981d4792011-03-09 12:55:47 +0000269 break;
270 default:
reed@google.come901b4c2011-11-14 21:56:45 +0000271 break;
reed@google.com981d4792011-03-09 12:55:47 +0000272 }
273 }
274 return proc;
275}
276
tomhudson@google.comd6770e62012-02-14 16:01:15 +0000277SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
278 if (cachedHasSSE2()) {
279 if (isOpaque) {
280 return SkBlitLCD16OpaqueRow_SSE2;
281 } else {
282 return SkBlitLCD16Row_SSE2;
283 }
284 } else {
285 return NULL;
286 }
287
288}
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000289
reed@google.come901b4c2011-11-14 21:56:45 +0000290SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
reed@google.com1750bf12011-11-15 19:51:02 +0000291 SkMask::Format maskFormat,
292 RowFlags flags) {
reed@google.come901b4c2011-11-14 21:56:45 +0000293 return NULL;
294}
295
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000296////////////////////////////////////////////////////////////////////////////////
297
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000298SkMemset16Proc SkMemset16GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000299 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000300 return sk_memset16_SSE2;
301 } else {
302 return NULL;
303 }
304}
305
306SkMemset32Proc SkMemset32GetPlatformProc() {
reed@google.comedb606c2011-10-18 13:56:50 +0000307 if (cachedHasSSE2()) {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000308 return sk_memset32_SSE2;
309 } else {
310 return NULL;
311 }
312}
tomhudson@google.com8dd90a92012-03-19 13:49:50 +0000313
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000314////////////////////////////////////////////////////////////////////////////////
315
senorblanco@chromium.org0ded88d2014-01-24 15:43:50 +0000316SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
senorblanco@chromium.org7a47ad32013-10-30 21:57:04 +0000317 if (!cachedHasSSE2()) {
318 return NULL;
319 }
320 switch (type) {
321 case kDilateX_SkMorphologyProcType:
322 return SkDilateX_SSE2;
323 case kDilateY_SkMorphologyProcType:
324 return SkDilateY_SSE2;
325 case kErodeX_SkMorphologyProcType:
326 return SkErodeX_SSE2;
327 case kErodeY_SkMorphologyProcType:
328 return SkErodeY_SSE2;
329 default:
330 return NULL;
331 }
332}
333
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000334////////////////////////////////////////////////////////////////////////////////
335
senorblanco@chromium.org27eec462013-11-08 20:49:04 +0000336bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
337 SkBoxBlurProc* boxBlurY,
senorblanco@chromium.org05edd022013-11-11 20:12:34 +0000338 SkBoxBlurProc* boxBlurXY,
339 SkBoxBlurProc* boxBlurYX) {
senorblanco@chromium.org27eec462013-11-08 20:49:04 +0000340#ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
341 return false;
342#else
343 if (!cachedHasSSE2()) {
344 return false;
345 }
senorblanco@chromium.org05edd022013-11-11 20:12:34 +0000346 return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
senorblanco@chromium.org27eec462013-11-08 20:49:04 +0000347#endif
348}
349
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000350////////////////////////////////////////////////////////////////////////////////
commit-bot@chromium.orgc524e982014-04-09 15:43:46 +0000351
352extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
353 SkXfermode::Mode mode);
354
355SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
356 SkXfermode::Mode mode);
357
358SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
359 SkXfermode::Mode mode) {
360 return NULL;
361}
362
363SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
364 SkXfermode::Mode mode);
365
366SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
367 SkXfermode::Mode mode) {
368 if (cachedHasSSE2()) {
369 return SkPlatformXfermodeFactory_impl_SSE2(rec, mode);
370 } else {
371 return SkPlatformXfermodeFactory_impl(rec, mode);
372 }
373}
374
375SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode);
376
377SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) {
378 return NULL;
379}