blob: 2e140e8df61f40e7b596c00eff5b43e39b07be9e [file] [log] [blame]
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00001/*
epoger@google.comec3ed6a2011-07-28 14:26:00 +00002 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
senorblanco@chromium.org4e753552009-11-16 21:09:00 +00006 */
7
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +00008#include "SkBitmapFilter_opts_SSE2.h"
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +00009#include "SkBitmapProcState_opts_SSE2.h"
tomhudson@google.com95ad1552012-02-14 18:28:54 +000010#include "SkBitmapProcState_opts_SSSE3.h"
humper4f96ab32014-06-27 11:27:03 -070011#include "SkBitmapScaler.h"
reed@google.com58af9a62011-10-12 13:43:52 +000012#include "SkBlitMask.h"
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +000013#include "SkBlitRow.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000014#include "SkBlitRow_opts_SSE2.h"
henrik.smiding3bb195e2014-06-27 08:03:17 -070015#include "SkBlitRow_opts_SSE4.h"
mtkleinc09e2af2014-10-13 12:48:16 -070016#include "SkLazyPtr.h"
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +000017#include "SkRTConf.h"
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000018
tomhudson@google.comea854942012-05-17 15:09:17 +000019#if defined(_MSC_VER) && defined(_WIN64)
20#include <intrin.h>
21#endif
22
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +000023/* This file must *not* be compiled with -msse or any other optional SIMD
24 extension, otherwise gcc may generate SIMD instructions even for scalar ops
25 (and thus give an invalid instruction on Pentium3 on the code below).
26 For example, only files named *_SSE2.cpp in this directory should be
27 compiled with -msse2 or higher. */
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000028
tomhudson@google.com95ad1552012-02-14 18:28:54 +000029
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +000030/* Function to get the CPU SSE-level in runtime, for different compilers. */
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000031#ifdef _MSC_VER
32static inline void getcpuid(int info_type, int info[4]) {
tomhudson@google.comea854942012-05-17 15:09:17 +000033#if defined(_WIN64)
34 __cpuid(info, info_type);
35#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000036 __asm {
37 mov eax, [info_type]
38 cpuid
39 mov edi, [info]
40 mov [edi], eax
41 mov [edi+4], ebx
42 mov [edi+8], ecx
43 mov [edi+12], edx
44 }
tomhudson@google.comea854942012-05-17 15:09:17 +000045#endif
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000046}
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +000047#elif defined(__x86_64__)
tomhudson@google.com95ad1552012-02-14 18:28:54 +000048static inline void getcpuid(int info_type, int info[4]) {
49 asm volatile (
50 "cpuid \n\t"
51 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
52 : "a"(info_type)
53 );
54}
55#else
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000056static inline void getcpuid(int info_type, int info[4]) {
57 // We save and restore ebx, so this code can be compatible with -fPIC
58 asm volatile (
59 "pushl %%ebx \n\t"
60 "cpuid \n\t"
61 "movl %%ebx, %1 \n\t"
62 "popl %%ebx \n\t"
63 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
64 : "a"(info_type)
senorblanco@chromium.org4e753552009-11-16 21:09:00 +000065 );
66}
67#endif
tomhudson@google.com95ad1552012-02-14 18:28:54 +000068
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +000069////////////////////////////////////////////////////////////////////////////////
70
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +000071/* Fetch the SIMD level directly from the CPU, at run-time.
72 * Only checks the levels needed by the optimizations in this file.
commit-bot@chromium.org443c0a62014-05-08 15:27:52 +000073 */
mtkleinc09e2af2014-10-13 12:48:16 -070074namespace { // get_SIMD_level() technically must have external linkage, so no static.
75int* get_SIMD_level() {
76 int cpu_info[4] = { 0, 0, 0, 0 };
commit-bot@chromium.org443c0a62014-05-08 15:27:52 +000077 getcpuid(1, cpu_info);
mtkleinc09e2af2014-10-13 12:48:16 -070078
halcanary385fe4d2015-08-26 13:07:48 -070079 int* level = new int;
mtkleinc09e2af2014-10-13 12:48:16 -070080
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +000081 if ((cpu_info[2] & (1<<20)) != 0) {
mtkleinc09e2af2014-10-13 12:48:16 -070082 *level = SK_CPU_SSE_LEVEL_SSE42;
henrik.smiding3bb195e2014-06-27 08:03:17 -070083 } else if ((cpu_info[2] & (1<<19)) != 0) {
mtkleinc09e2af2014-10-13 12:48:16 -070084 *level = SK_CPU_SSE_LEVEL_SSE41;
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +000085 } else if ((cpu_info[2] & (1<<9)) != 0) {
mtkleinc09e2af2014-10-13 12:48:16 -070086 *level = SK_CPU_SSE_LEVEL_SSSE3;
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +000087 } else if ((cpu_info[3] & (1<<26)) != 0) {
mtkleinc09e2af2014-10-13 12:48:16 -070088 *level = SK_CPU_SSE_LEVEL_SSE2;
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +000089 } else {
mtkleinc09e2af2014-10-13 12:48:16 -070090 *level = 0;
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +000091 }
mtkleinc09e2af2014-10-13 12:48:16 -070092 return level;
commit-bot@chromium.org443c0a62014-05-08 15:27:52 +000093}
mtkleinc09e2af2014-10-13 12:48:16 -070094} // namespace
95
96SK_DECLARE_STATIC_LAZY_PTR(int, gSIMDLevel, get_SIMD_level);
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +000097
98/* Verify that the requested SIMD level is supported in the build.
99 * If not, check if the platform supports it.
100 */
101static inline bool supports_simd(int minLevel) {
102#if defined(SK_CPU_SSE_LEVEL)
103 if (minLevel <= SK_CPU_SSE_LEVEL) {
104 return true;
105 } else
commit-bot@chromium.org443c0a62014-05-08 15:27:52 +0000106#endif
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +0000107 {
108#if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
109 /* For the Android framework we should always know at compile time if the device
110 * we are building for supports SSSE3. The one exception to this rule is on the
111 * emulator where we are compiled without the -mssse3 option (so we have no
112 * SSSE3 procs) but can be run on a host machine that supports SSSE3
113 * instructions. So for that particular case we disable our SSSE3 options.
114 */
115 return false;
116#else
mtkleinc09e2af2014-10-13 12:48:16 -0700117 return minLevel <= *gSIMDLevel.get();
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +0000118#endif
119 }
tomhudson@google.com95ad1552012-02-14 18:28:54 +0000120}
121
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000122////////////////////////////////////////////////////////////////////////////////
123
humper4f96ab32014-06-27 11:27:03 -0700124void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) {
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +0000125 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
reed@google.comfed04b32013-09-05 20:31:17 +0000126 procs->fExtraHorizontalReads = 3;
127 procs->fConvolveVertically = &convolveVertically_SSE2;
128 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
129 procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
130 procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
humper@google.com138ebc32013-07-19 20:20:04 +0000131 }
132}
133
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000134////////////////////////////////////////////////////////////////////////////////
135
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000136void SkBitmapProcState::platformProcs() {
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000137 /* Every optimization in the function requires at least SSE2 */
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +0000138 if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000139 return;
140 }
qiankun.miao60f3c652014-12-04 06:27:03 -0800141 const bool ssse3 = supports_simd(SK_CPU_SSE_LEVEL_SSSE3);
commit-bot@chromium.orgc398f712014-04-23 20:07:19 +0000142
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000143 /* Check fSampleProc32 */
144 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
qiankun.miao60f3c652014-12-04 06:27:03 -0800145 if (ssse3) {
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000146 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
147 } else {
commit-bot@chromium.orgc398f712014-04-23 20:07:19 +0000148 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000149 }
150 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
qiankun.miao60f3c652014-12-04 06:27:03 -0800151 if (ssse3) {
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000152 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
153 }
154 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
qiankun.miao60f3c652014-12-04 06:27:03 -0800155 if (ssse3) {
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000156 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
157 } else {
commit-bot@chromium.orgc398f712014-04-23 20:07:19 +0000158 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
159 }
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000160 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
qiankun.miao60f3c652014-12-04 06:27:03 -0800161 if (ssse3) {
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000162 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
commit-bot@chromium.orgc398f712014-04-23 20:07:19 +0000163 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000164 }
tomhudson@google.com06a73132012-02-22 18:30:43 +0000165
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000166 /* Check fSampleProc16 */
167 if (fSampleProc16 == S32_D16_filter_DX) {
qiankun.miao72b0c052014-12-10 07:21:35 -0800168 if (ssse3) {
169 fSampleProc16 = S32_D16_filter_DX_SSSE3;
170 } else {
171 fSampleProc16 = S32_D16_filter_DX_SSE2;
172 }
qiankun.miao60f3c652014-12-04 06:27:03 -0800173 } else if (ssse3 && fSampleProc16 == S32_D16_filter_DXDY) {
174 fSampleProc16 = S32_D16_filter_DXDY_SSSE3;
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000175 }
tomhudson@google.com5efaf262012-02-28 15:41:49 +0000176
commit-bot@chromium.org4b9b4562014-04-28 15:07:50 +0000177 /* Check fMatrixProc */
178 if (fMatrixProc == ClampX_ClampY_filter_scale) {
179 fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
180 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
181 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
182 } else if (fMatrixProc == ClampX_ClampY_filter_affine) {
183 fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
184 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
185 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
186 }
senorblanco@chromium.orgdc7de742009-11-30 20:00:29 +0000187}
188
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000189////////////////////////////////////////////////////////////////////////////////
190
henrik.smiding4e654732015-02-10 09:42:33 -0800191static const SkBlitRow::Proc16 platform_16_procs[] = {
commit-bot@chromium.org39ce33a2014-02-24 04:23:39 +0000192 S32_D565_Opaque_SSE2, // S32_D565_Opaque
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000193 NULL, // S32_D565_Blend
194 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque
195 NULL, // S32A_D565_Blend
commit-bot@chromium.org27580472014-03-07 03:25:32 +0000196 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000197 NULL, // S32_D565_Blend_Dither
commit-bot@chromium.orgfe089b32014-03-07 13:24:42 +0000198 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000199 NULL, // S32A_D565_Blend_Dither
200};
201
reeda7f11912015-01-13 13:51:00 -0800202SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +0000203 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000204 return platform_16_procs[flags];
205 } else {
206 return NULL;
207 }
208}
209
henrik.smiding70840cb2015-03-20 09:20:46 -0700210static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = {
211 Color32A_D565_SSE2, // Color32A_D565,
henrik.smiding4e654732015-02-10 09:42:33 -0800212 NULL, // Color32A_D565_Dither
213};
214
reeda7f11912015-01-13 13:51:00 -0800215SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
henrik.smiding70840cb2015-03-20 09:20:46 -0700216/* If you're thinking about writing an SSE4 version of this, do check it's
217 * actually faster on Atom. Our original SSE4 version was slower than this
218 * SSE2 version on Silvermont, and only marginally faster on a Core i7,
219 * mainly due to the MULLD timings.
220 */
221 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
222 return platform_565_colorprocs_SSE2[flags];
henrik.smiding4e654732015-02-10 09:42:33 -0800223 } else {
224 return NULL;
225 }
reeda7f11912015-01-13 13:51:00 -0800226}
227
henrik.smiding4e654732015-02-10 09:42:33 -0800228static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = {
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000229 NULL, // S32_Opaque,
230 S32_Blend_BlitRow32_SSE2, // S32_Blend,
231 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
232 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
233};
234
henrik.smiding4e654732015-02-10 09:42:33 -0800235static const SkBlitRow::Proc32 platform_32_procs_SSE4[] = {
henrik.smiding3bb195e2014-06-27 08:03:17 -0700236 NULL, // S32_Opaque,
237 S32_Blend_BlitRow32_SSE2, // S32_Blend,
stephana4bf1ce22015-02-02 10:02:48 -0800238 S32A_Opaque_BlitRow32_SSE4, // S32A_Opaque
henrik.smiding3bb195e2014-06-27 08:03:17 -0700239 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
240};
henrik.smiding3bb195e2014-06-27 08:03:17 -0700241
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000242SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
henrik.smiding3bb195e2014-06-27 08:03:17 -0700243 if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) {
244 return platform_32_procs_SSE4[flags];
245 } else
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +0000246 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
henrik.smiding3bb195e2014-06-27 08:03:17 -0700247 return platform_32_procs_SSE2[flags];
commit-bot@chromium.org47591072014-02-19 03:09:52 +0000248 } else {
249 return NULL;
250 }
senorblanco@chromium.org4e753552009-11-16 21:09:00 +0000251}
252
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000253////////////////////////////////////////////////////////////////////////////////
reed@google.com981d4792011-03-09 12:55:47 +0000254
tomhudson@google.comd6770e62012-02-14 16:01:15 +0000255SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
commit-bot@chromium.orgce4402c2014-05-12 14:16:19 +0000256 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
tomhudson@google.comd6770e62012-02-14 16:01:15 +0000257 if (isOpaque) {
258 return SkBlitLCD16OpaqueRow_SSE2;
259 } else {
260 return SkBlitLCD16Row_SSE2;
261 }
262 } else {
263 return NULL;
264 }
265
266}
commit-bot@chromium.org8c4953c2014-04-30 14:58:46 +0000267
commit-bot@chromium.orgcba73782014-05-29 15:57:47 +0000268SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, RowFlags) {
reed@google.come901b4c2011-11-14 21:56:45 +0000269 return NULL;
270}