mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2015 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "SkOnce.h" |
| 9 | #include "SkOpts.h" |
Mike Klein | 8caa5af | 2015-08-04 16:48:43 -0400 | [diff] [blame] | 10 | |
mtklein | b2a3270 | 2015-08-18 10:00:29 -0700 | [diff] [blame] | 11 | #define SK_OPTS_NS sk_default |
mtklein | 4977983 | 2015-08-10 12:58:17 -0700 | [diff] [blame] | 12 | #include "SkBlitMask_opts.h" |
mtklein | 4a37d08 | 2015-09-10 10:38:02 -0700 | [diff] [blame] | 13 | #include "SkBlitRow_opts.h" |
mtklein | dce5ce4 | 2015-08-04 08:49:21 -0700 | [diff] [blame] | 14 | #include "SkBlurImageFilter_opts.h" |
mtklein | 2d141ba | 2015-08-18 09:43:28 -0700 | [diff] [blame] | 15 | #include "SkColorCubeFilter_opts.h" |
Mike Klein | 8caa5af | 2015-08-04 16:48:43 -0400 | [diff] [blame] | 16 | #include "SkFloatingPoint_opts.h" |
mtklein | 4e8a09d | 2015-09-10 11:18:31 -0700 | [diff] [blame] | 17 | #include "SkMatrix_opts.h" |
mtklein | d029ded | 2015-08-04 14:09:09 -0700 | [diff] [blame] | 18 | #include "SkMorphologyImageFilter_opts.h" |
mtklein | b639474 | 2015-08-06 08:17:16 -0700 | [diff] [blame] | 19 | #include "SkTextureCompressor_opts.h" |
Mike Klein | 8caa5af | 2015-08-04 16:48:43 -0400 | [diff] [blame] | 20 | #include "SkUtils_opts.h" |
mtklein | 490b615 | 2015-07-31 11:50:27 -0700 | [diff] [blame] | 21 | #include "SkXfermode_opts.h" |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 22 | |
thakis | 2d0e37a | 2016-01-03 12:44:43 -0800 | [diff] [blame] | 23 | #if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS) |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 24 | #if defined(SK_BUILD_FOR_WIN32) |
| 25 | #include <intrin.h> |
mtklein | 844a0b4 | 2015-11-06 19:48:19 -0800 | [diff] [blame] | 26 | static void cpuid (uint32_t abcd[4]) { __cpuid ((int*)abcd, 1); } |
| 27 | static void cpuid7(uint32_t abcd[4]) { __cpuidex((int*)abcd, 7, 0); } |
| 28 | static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); } |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 29 | #else |
| 30 | #include <cpuid.h> |
mtklein | 844a0b4 | 2015-11-06 19:48:19 -0800 | [diff] [blame] | 31 | #if !defined(__cpuid_count) // Old Mac Clang doesn't have this defined. |
| 32 | #define __cpuid_count(eax, ecx, a, b, c, d) \ |
| 33 | __asm__("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eax), "2"(ecx)) |
| 34 | #endif |
| 35 | static void cpuid (uint32_t abcd[4]) { __get_cpuid(1, abcd+0, abcd+1, abcd+2, abcd+3); } |
| 36 | static void cpuid7(uint32_t abcd[4]) { |
| 37 | __cpuid_count(7, 0, abcd[0], abcd[1], abcd[2], abcd[3]); |
| 38 | } |
| 39 | static uint64_t xgetbv(uint32_t xcr) { |
| 40 | uint32_t eax, edx; |
| 41 | __asm__ __volatile__ ( "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr)); |
| 42 | return (uint64_t)(edx) << 32 | eax; |
| 43 | } |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 44 | #endif |
mtklein | 7de63d5 | 2015-10-28 13:45:59 -0700 | [diff] [blame] | 45 | #elif !defined(SK_ARM_HAS_NEON) && \ |
| 46 | defined(SK_CPU_ARM32) && \ |
| 47 | defined(SK_BUILD_FOR_ANDROID) && \ |
| 48 | !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK) |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 49 | #include <cpu-features.h> |
| 50 | #endif |
| 51 | |
mtklein | a1bfaad | 2016-01-07 13:48:21 -0800 | [diff] [blame^] | 52 | namespace sk_default { |
| 53 | |
| 54 | // These variable names in these functions just pretend the input is BGRA. |
| 55 | // They work fine with both RGBA and BGRA. |
| 56 | |
| 57 | static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 58 | for (int i = 0; i < count; i++) { |
| 59 | uint8_t a = src[i] >> 24, |
| 60 | r = src[i] >> 16, |
| 61 | g = src[i] >> 8, |
| 62 | b = src[i] >> 0; |
| 63 | r = (r*a+127)/255; |
| 64 | g = (g*a+127)/255; |
| 65 | b = (b*a+127)/255; |
| 66 | dst[i] = (uint32_t)a << 24 |
| 67 | | (uint32_t)r << 16 |
| 68 | | (uint32_t)g << 8 |
| 69 | | (uint32_t)b << 0; |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 74 | for (int i = 0; i < count; i++) { |
| 75 | uint8_t a = src[i] >> 24, |
| 76 | r = src[i] >> 16, |
| 77 | g = src[i] >> 8, |
| 78 | b = src[i] >> 0; |
| 79 | dst[i] = (uint32_t)a << 24 |
| 80 | | (uint32_t)b << 16 |
| 81 | | (uint32_t)g << 8 |
| 82 | | (uint32_t)r << 0; |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
| 87 | for (int i = 0; i < count; i++) { |
| 88 | uint8_t a = src[i] >> 24, |
| 89 | r = src[i] >> 16, |
| 90 | g = src[i] >> 8, |
| 91 | b = src[i] >> 0; |
| 92 | r = (r*a+127)/255; |
| 93 | g = (g*a+127)/255; |
| 94 | b = (b*a+127)/255; |
| 95 | dst[i] = (uint32_t)a << 24 |
| 96 | | (uint32_t)b << 16 |
| 97 | | (uint32_t)g << 8 |
| 98 | | (uint32_t)r << 0; |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | } // namespace sk_default |
| 103 | |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 104 | namespace SkOpts { |
mtklein | f684a78 | 2015-07-30 09:29:37 -0700 | [diff] [blame] | 105 | // Define default function pointer values here... |
mtklein | b2a3270 | 2015-08-18 10:00:29 -0700 | [diff] [blame] | 106 | // If our global compile options are set high enough, these defaults might even be |
| 107 | // CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults. |
Mike Klein | 8caa5af | 2015-08-04 16:48:43 -0400 | [diff] [blame] | 108 | // They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1. |
mtklein | b2a3270 | 2015-08-18 10:00:29 -0700 | [diff] [blame] | 109 | decltype(rsqrt) rsqrt = sk_default::rsqrt; |
| 110 | decltype(memset16) memset16 = sk_default::memset16; |
| 111 | decltype(memset32) memset32 = sk_default::memset32; |
mtklein | 08f9234 | 2015-08-18 12:23:28 -0700 | [diff] [blame] | 112 | decltype(create_xfermode) create_xfermode = sk_default::create_xfermode; |
mtklein | b2a3270 | 2015-08-18 10:00:29 -0700 | [diff] [blame] | 113 | decltype(color_cube_filter_span) color_cube_filter_span = sk_default::color_cube_filter_span; |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 114 | |
mtklein | b2a3270 | 2015-08-18 10:00:29 -0700 | [diff] [blame] | 115 | decltype(box_blur_xx) box_blur_xx = sk_default::box_blur_xx; |
| 116 | decltype(box_blur_xy) box_blur_xy = sk_default::box_blur_xy; |
| 117 | decltype(box_blur_yx) box_blur_yx = sk_default::box_blur_yx; |
mtklein | d029ded | 2015-08-04 14:09:09 -0700 | [diff] [blame] | 118 | |
mtklein | b2a3270 | 2015-08-18 10:00:29 -0700 | [diff] [blame] | 119 | decltype(dilate_x) dilate_x = sk_default::dilate_x; |
| 120 | decltype(dilate_y) dilate_y = sk_default::dilate_y; |
| 121 | decltype( erode_x) erode_x = sk_default::erode_x; |
| 122 | decltype( erode_y) erode_y = sk_default::erode_y; |
mtklein | dce5ce4 | 2015-08-04 08:49:21 -0700 | [diff] [blame] | 123 | |
mtklein | b2a3270 | 2015-08-18 10:00:29 -0700 | [diff] [blame] | 124 | decltype(texture_compressor) texture_compressor = sk_default::texture_compressor; |
| 125 | decltype(fill_block_dimensions) fill_block_dimensions = sk_default::fill_block_dimensions; |
mtklein | b639474 | 2015-08-06 08:17:16 -0700 | [diff] [blame] | 126 | |
mtklein | b2a3270 | 2015-08-18 10:00:29 -0700 | [diff] [blame] | 127 | decltype(blit_mask_d32_a8) blit_mask_d32_a8 = sk_default::blit_mask_d32_a8; |
mtklein | 4977983 | 2015-08-10 12:58:17 -0700 | [diff] [blame] | 128 | |
mtklein | 4a37d08 | 2015-09-10 10:38:02 -0700 | [diff] [blame] | 129 | decltype(blit_row_color32) blit_row_color32 = sk_default::blit_row_color32; |
| 130 | |
mtklein | 4e8a09d | 2015-09-10 11:18:31 -0700 | [diff] [blame] | 131 | decltype(matrix_translate) matrix_translate = sk_default::matrix_translate; |
| 132 | decltype(matrix_scale_translate) matrix_scale_translate = sk_default::matrix_scale_translate; |
| 133 | decltype(matrix_affine) matrix_affine = sk_default::matrix_affine; |
| 134 | |
mtklein | a1bfaad | 2016-01-07 13:48:21 -0800 | [diff] [blame^] | 135 | decltype( premul_xxxa) premul_xxxa = sk_default:: premul_xxxa; |
| 136 | decltype( swaprb_xxxa) swaprb_xxxa = sk_default:: swaprb_xxxa; |
| 137 | decltype(premul_swaprb_xxxa) premul_swaprb_xxxa = sk_default::premul_swaprb_xxxa; |
| 138 | |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 139 | // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 140 | void Init_ssse3(); |
| 141 | void Init_sse41(); |
mtklein | ee9fe1e | 2015-11-16 17:28:55 -0800 | [diff] [blame] | 142 | void Init_sse42() {} |
mtklein | 084db25 | 2015-11-11 11:39:09 -0800 | [diff] [blame] | 143 | void Init_avx(); |
mtklein | ee9fe1e | 2015-11-16 17:28:55 -0800 | [diff] [blame] | 144 | void Init_avx2() {} |
mtklein | 9535492 | 2015-11-09 07:08:32 -0800 | [diff] [blame] | 145 | void Init_neon(); |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 146 | |
| 147 | static void init() { |
mtklein | f96bee3 | 2015-07-31 14:47:25 -0700 | [diff] [blame] | 148 | // TODO: Chrome's not linking _sse* opts on iOS simulator builds. Bug or feature? |
| 149 | #if defined(SK_CPU_X86) && !defined(SK_BUILD_FOR_IOS) |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 150 | uint32_t abcd[] = {0,0,0,0}; |
| 151 | cpuid(abcd); |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 152 | if (abcd[2] & (1<< 9)) { Init_ssse3(); } |
| 153 | if (abcd[2] & (1<<19)) { Init_sse41(); } |
mtklein | 9535492 | 2015-11-09 07:08:32 -0800 | [diff] [blame] | 154 | if (abcd[2] & (1<<20)) { Init_sse42(); } |
mtklein | 844a0b4 | 2015-11-06 19:48:19 -0800 | [diff] [blame] | 155 | |
| 156 | // AVX detection's kind of a pain. This is cribbed from Chromium. |
| 157 | if ( ( abcd[2] & (7<<26)) == (7<<26) && // Check bits 26-28 of ecx are all set, |
| 158 | (xgetbv(0) & 6 ) == 6 ){ // and check the OS supports XSAVE. |
| 159 | Init_avx(); |
| 160 | |
| 161 | // AVX2 additionally needs bit 5 set on ebx after calling cpuid(7). |
| 162 | uint32_t abcd7[] = {0,0,0,0}; |
| 163 | cpuid7(abcd7); |
| 164 | if (abcd7[1] & (1<<5)) { Init_avx2(); } |
| 165 | } |
| 166 | |
mtklein | 7de63d5 | 2015-10-28 13:45:59 -0700 | [diff] [blame] | 167 | #elif !defined(SK_ARM_HAS_NEON) && \ |
| 168 | defined(SK_CPU_ARM32) && \ |
| 169 | defined(SK_BUILD_FOR_ANDROID) && \ |
| 170 | !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK) |
mtklein | 8317a18 | 2015-07-30 07:30:16 -0700 | [diff] [blame] | 171 | if (android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) { Init_neon(); } |
| 172 | #endif |
| 173 | } |
| 174 | |
| 175 | SK_DECLARE_STATIC_ONCE(gInitOnce); |
| 176 | void Init() { SkOnce(&gInitOnce, init); } |
| 177 | |
| 178 | #if SK_ALLOW_STATIC_GLOBAL_INITIALIZERS |
| 179 | static struct AutoInit { |
| 180 | AutoInit() { Init(); } |
| 181 | } gAutoInit; |
| 182 | #endif |
| 183 | } |