senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 1 | /* |
epoger@google.com | ec3ed6a | 2011-07-28 14:26:00 +0000 | [diff] [blame] | 2 | * Copyright 2009 The Android Open Source Project |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 6 | */ |
| 7 | |
senorblanco@chromium.org | dc7de74 | 2009-11-30 20:00:29 +0000 | [diff] [blame] | 8 | #include "SkBitmapProcState_opts_SSE2.h" |
reed@google.com | 58af9a6 | 2011-10-12 13:43:52 +0000 | [diff] [blame] | 9 | #include "SkBlitMask.h" |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 10 | #include "SkBlitRow_opts_SSE2.h" |
| 11 | #include "SkUtils_opts_SSE2.h" |
| 12 | #include "SkUtils.h" |
| 13 | |
| 14 | /* This file must *not* be compiled with -msse or -msse2, otherwise |
| 15 | gcc may generate sse2 even for scalar ops (and thus give an invalid |
| 16 | instruction on Pentium3 on the code below). Only files named *_SSE2.cpp |
| 17 | in this directory should be compiled with -msse2. */ |
| 18 | |
senorblanco@chromium.org | 0c547bc | 2009-11-20 18:55:44 +0000 | [diff] [blame] | 19 | #if defined(__x86_64__) || defined(_WIN64) |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 20 | /* All x86_64 machines have SSE2, so don't even bother checking. */ |
| 21 | static inline bool hasSSE2() { |
| 22 | return true; |
| 23 | } |
| 24 | #else |
| 25 | #ifdef _MSC_VER |
| 26 | static inline void getcpuid(int info_type, int info[4]) { |
| 27 | __asm { |
| 28 | mov eax, [info_type] |
| 29 | cpuid |
| 30 | mov edi, [info] |
| 31 | mov [edi], eax |
| 32 | mov [edi+4], ebx |
| 33 | mov [edi+8], ecx |
| 34 | mov [edi+12], edx |
| 35 | } |
| 36 | } |
| 37 | #else |
| 38 | static inline void getcpuid(int info_type, int info[4]) { |
| 39 | // We save and restore ebx, so this code can be compatible with -fPIC |
| 40 | asm volatile ( |
| 41 | "pushl %%ebx \n\t" |
| 42 | "cpuid \n\t" |
| 43 | "movl %%ebx, %1 \n\t" |
| 44 | "popl %%ebx \n\t" |
| 45 | : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) |
| 46 | : "a"(info_type) |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 47 | ); |
| 48 | } |
| 49 | #endif |
| 50 | |
| 51 | static inline bool hasSSE2() { |
| 52 | int cpu_info[4] = { 0 }; |
| 53 | getcpuid(1, cpu_info); |
| 54 | return (cpu_info[3] & (1<<26)) != 0; |
| 55 | } |
| 56 | #endif |
| 57 | |
reed@google.com | edb606c | 2011-10-18 13:56:50 +0000 | [diff] [blame] | 58 | static bool cachedHasSSE2() { |
| 59 | static bool gHasSSE2 = hasSSE2(); |
| 60 | return gHasSSE2; |
| 61 | } |
| 62 | |
senorblanco@chromium.org | dc7de74 | 2009-11-30 20:00:29 +0000 | [diff] [blame] | 63 | void SkBitmapProcState::platformProcs() { |
reed@google.com | edb606c | 2011-10-18 13:56:50 +0000 | [diff] [blame] | 64 | if (cachedHasSSE2()) { |
senorblanco@chromium.org | dc7de74 | 2009-11-30 20:00:29 +0000 | [diff] [blame] | 65 | if (fSampleProc32 == S32_opaque_D32_filter_DX) { |
| 66 | fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; |
senorblanco@chromium.org | f3f0bd7 | 2009-12-10 22:46:31 +0000 | [diff] [blame] | 67 | } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { |
| 68 | fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; |
senorblanco@chromium.org | dc7de74 | 2009-11-30 20:00:29 +0000 | [diff] [blame] | 69 | } |
| 70 | } |
| 71 | } |
| 72 | |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 73 | static SkBlitRow::Proc32 platform_32_procs[] = { |
| 74 | NULL, // S32_Opaque, |
| 75 | S32_Blend_BlitRow32_SSE2, // S32_Blend, |
| 76 | S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque |
| 77 | S32A_Blend_BlitRow32_SSE2, // S32A_Blend, |
| 78 | }; |
| 79 | |
| 80 | SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) { |
| 81 | return NULL; |
| 82 | } |
| 83 | |
| 84 | SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { |
| 85 | return NULL; |
| 86 | } |
| 87 | |
senorblanco@chromium.org | c385638 | 2010-12-13 15:27:20 +0000 | [diff] [blame] | 88 | SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { |
reed@google.com | edb606c | 2011-10-18 13:56:50 +0000 | [diff] [blame] | 89 | if (cachedHasSSE2()) { |
senorblanco@chromium.org | c385638 | 2010-12-13 15:27:20 +0000 | [diff] [blame] | 90 | return Color32_SSE2; |
| 91 | } else { |
| 92 | return NULL; |
| 93 | } |
| 94 | } |
| 95 | |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 96 | SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { |
reed@google.com | edb606c | 2011-10-18 13:56:50 +0000 | [diff] [blame] | 97 | if (cachedHasSSE2()) { |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 98 | return platform_32_procs[flags]; |
| 99 | } else { |
| 100 | return NULL; |
| 101 | } |
| 102 | } |
| 103 | |
reed@google.com | 981d479 | 2011-03-09 12:55:47 +0000 | [diff] [blame] | 104 | |
reed@google.com | e901b4c | 2011-11-14 21:56:45 +0000 | [diff] [blame^] | 105 | SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, |
| 106 | SkMask::Format maskFormat, |
| 107 | SkColor color) { |
reed@google.com | edb606c | 2011-10-18 13:56:50 +0000 | [diff] [blame] | 108 | if (SkMask::kA8_Format != maskFormat) { |
| 109 | return NULL; |
| 110 | } |
reed@google.com | e901b4c | 2011-11-14 21:56:45 +0000 | [diff] [blame^] | 111 | |
| 112 | ColorProc proc = NULL; |
reed@google.com | edb606c | 2011-10-18 13:56:50 +0000 | [diff] [blame] | 113 | if (cachedHasSSE2()) { |
reed@google.com | 981d479 | 2011-03-09 12:55:47 +0000 | [diff] [blame] | 114 | switch (dstConfig) { |
| 115 | case SkBitmap::kARGB_8888_Config: |
reed@google.com | e6ea606 | 2011-07-07 19:12:50 +0000 | [diff] [blame] | 116 | // The SSE2 version is not (yet) faster for black, so we check |
| 117 | // for that. |
| 118 | if (SK_ColorBLACK != color) { |
reed@google.com | edb606c | 2011-10-18 13:56:50 +0000 | [diff] [blame] | 119 | proc = SkARGB32_A8_BlitMask_SSE2; |
reed@google.com | e6ea606 | 2011-07-07 19:12:50 +0000 | [diff] [blame] | 120 | } |
reed@google.com | 981d479 | 2011-03-09 12:55:47 +0000 | [diff] [blame] | 121 | break; |
| 122 | default: |
reed@google.com | e901b4c | 2011-11-14 21:56:45 +0000 | [diff] [blame^] | 123 | break; |
reed@google.com | 981d479 | 2011-03-09 12:55:47 +0000 | [diff] [blame] | 124 | } |
| 125 | } |
| 126 | return proc; |
| 127 | } |
| 128 | |
reed@google.com | e901b4c | 2011-11-14 21:56:45 +0000 | [diff] [blame^] | 129 | SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, |
| 130 | SkMask::Format maskFormat) { |
| 131 | return NULL; |
| 132 | } |
| 133 | |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 134 | SkMemset16Proc SkMemset16GetPlatformProc() { |
reed@google.com | edb606c | 2011-10-18 13:56:50 +0000 | [diff] [blame] | 135 | if (cachedHasSSE2()) { |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 136 | return sk_memset16_SSE2; |
| 137 | } else { |
| 138 | return NULL; |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | SkMemset32Proc SkMemset32GetPlatformProc() { |
reed@google.com | edb606c | 2011-10-18 13:56:50 +0000 | [diff] [blame] | 143 | if (cachedHasSSE2()) { |
senorblanco@chromium.org | 4e75355 | 2009-11-16 21:09:00 +0000 | [diff] [blame] | 144 | return sk_memset32_SSE2; |
| 145 | } else { |
| 146 | return NULL; |
| 147 | } |
| 148 | } |