Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 1 | /* |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 2 | * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "libyuv/cpu_id.h" |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 12 | |
| 13 | #ifdef _MSC_VER |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 14 | #include <intrin.h> // For __cpuid() |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 15 | #endif |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 16 | #if !defined(__CLR_VER) && defined(_M_X64) && \ |
| 17 | defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) |
| 18 | #include <immintrin.h> // For _xgetbv() |
| 19 | #endif |
| 20 | |
| 21 | #include <stdlib.h> // For getenv() |
| 22 | |
| 23 | // For ArmCpuCaps() but unittested on all platforms |
| 24 | #include <stdio.h> |
| 25 | #include <string.h> |
| 26 | |
| 27 | #include "libyuv/basic_types.h" // For CPU_X86 |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 28 | |
| 29 | // TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux. |
| 30 | #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__) |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 31 | static __inline void __cpuid(int cpu_info[4], int info_type) { |
| 32 | asm volatile ( // NOLINT |
| 33 | "mov %%ebx, %%edi \n" |
| 34 | "cpuid \n" |
| 35 | "xchg %%edi, %%ebx \n" |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 36 | : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 37 | : "a"(info_type)); |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 38 | } |
| 39 | #elif defined(__i386__) || defined(__x86_64__) |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 40 | static __inline void __cpuid(int cpu_info[4], int info_type) { |
| 41 | asm volatile ( // NOLINT |
| 42 | "cpuid \n" |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 43 | : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 44 | : "a"(info_type)); |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 45 | } |
| 46 | #endif |
| 47 | |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 48 | #ifdef __cplusplus |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 49 | namespace libyuv { |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 50 | extern "C" { |
| 51 | #endif |
| 52 | |
| 53 | // Low level cpuid for X86. Returns zeros on other CPUs. |
| 54 | #if !defined(__CLR_VER) && (defined(_M_IX86) || defined(_M_X64) || \ |
| 55 | defined(__i386__) || defined(__x86_64__)) |
| 56 | LIBYUV_API |
| 57 | void CpuId(int cpu_info[4], int info_type) { |
| 58 | __cpuid(cpu_info, info_type); |
| 59 | } |
| 60 | #else |
| 61 | LIBYUV_API |
| 62 | void CpuId(int cpu_info[4], int) { |
| 63 | cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; |
| 64 | } |
| 65 | #endif |
| 66 | |
| 67 | // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. |
| 68 | #if !defined(__CLR_VER) && defined(_M_X64) && \ |
| 69 | defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) |
| 70 | #define HAS_XGETBV |
| 71 | static uint32 XGetBV(unsigned int xcr) { |
| 72 | return static_cast<uint32>(_xgetbv(xcr)); |
| 73 | } |
| 74 | #elif !defined(__CLR_VER) && defined(_M_IX86) |
| 75 | #define HAS_XGETBV |
| 76 | __declspec(naked) __declspec(align(16)) |
| 77 | static uint32 XGetBV(unsigned int xcr) { |
| 78 | __asm { |
| 79 | mov ecx, [esp + 4] // xcr |
| 80 | _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // xgetbv for vs2005. |
| 81 | ret |
| 82 | } |
| 83 | } |
| 84 | #elif defined(__i386__) || defined(__x86_64__) |
| 85 | #define HAS_XGETBV |
| 86 | static uint32 XGetBV(unsigned int xcr) { |
| 87 | uint32 xcr_feature_mask; |
| 88 | asm volatile ( // NOLINT |
| 89 | ".byte 0x0f, 0x01, 0xd0\n" |
| 90 | : "=a"(xcr_feature_mask) |
| 91 | : "c"(xcr) |
| 92 | : "memory", "cc", "edx"); // edx unused. |
| 93 | return xcr_feature_mask; |
| 94 | } |
| 95 | #endif |
| 96 | #ifdef HAS_XGETBV |
| 97 | static const int kXCR_XFEATURE_ENABLED_MASK = 0; |
| 98 | #endif |
| 99 | |
| 100 | // based on libvpx arm_cpudetect.c |
| 101 | // For Arm, but public to allow testing on any CPU |
| 102 | LIBYUV_API |
| 103 | int ArmCpuCaps(const char* cpuinfo_name) { |
| 104 | int flags = 0; |
| 105 | FILE* fin = fopen(cpuinfo_name, "r"); |
| 106 | if (fin) { |
| 107 | char buf[512]; |
| 108 | while (fgets(buf, 511, fin)) { |
| 109 | if (memcmp(buf, "Features", 8) == 0) { |
| 110 | flags |= kCpuInitialized; |
| 111 | char* p = strstr(buf, " neon"); |
| 112 | if (p && (p[5] == ' ' || p[5] == '\n')) { |
| 113 | flags |= kCpuHasNEON; |
| 114 | break; |
| 115 | } |
| 116 | } |
| 117 | } |
| 118 | fclose(fin); |
| 119 | } |
| 120 | return flags; |
| 121 | } |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 122 | |
| 123 | // CPU detect function for SIMD instruction sets. |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 124 | LIBYUV_API |
| 125 | int cpu_info_ = 0; |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 126 | |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 127 | // Test environment variable for disabling CPU features. Any non-zero value |
| 128 | // to disable. Zero ignored to make it easy to set the variable on/off. |
| 129 | static bool TestEnv(const char* name) { |
| 130 | const char* var = getenv(name); |
| 131 | if (var) { |
| 132 | if (var[0] != '0') { |
| 133 | return true; |
| 134 | } |
| 135 | } |
| 136 | return false; |
| 137 | } |
| 138 | |
| 139 | LIBYUV_API |
| 140 | int InitCpuFlags(void) { |
| 141 | #if !defined(__CLR_VER) && defined(CPU_X86) |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 142 | int cpu_info[4]; |
| 143 | __cpuid(cpu_info, 1); |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 144 | cpu_info_ = ((cpu_info[3] & 0x04000000) ? kCpuHasSSE2 : 0) | |
| 145 | ((cpu_info[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | |
| 146 | ((cpu_info[2] & 0x00080000) ? kCpuHasSSE41 : 0) | |
| 147 | ((cpu_info[2] & 0x00100000) ? kCpuHasSSE42 : 0) | |
| 148 | (((cpu_info[2] & 0x18000000) == 0x18000000) ? kCpuHasAVX : 0) | |
| 149 | kCpuInitialized | kCpuHasX86; |
| 150 | #ifdef HAS_XGETBV |
| 151 | if (cpu_info_ & kCpuHasAVX) { |
| 152 | __cpuid(cpu_info, 7); |
| 153 | if ((cpu_info[1] & 0x00000020) && |
| 154 | ((XGetBV(kXCR_XFEATURE_ENABLED_MASK) & 0x06) == 0x06)) { |
| 155 | cpu_info_ |= kCpuHasAVX2; |
| 156 | } |
| 157 | } |
| 158 | #endif |
| 159 | // environment variable overrides for testing. |
| 160 | if (TestEnv("LIBYUV_DISABLE_X86")) { |
| 161 | cpu_info_ &= ~kCpuHasX86; |
| 162 | } |
| 163 | if (TestEnv("LIBYUV_DISABLE_SSE2")) { |
| 164 | cpu_info_ &= ~kCpuHasSSE2; |
| 165 | } |
| 166 | if (TestEnv("LIBYUV_DISABLE_SSSE3")) { |
| 167 | cpu_info_ &= ~kCpuHasSSSE3; |
| 168 | } |
| 169 | if (TestEnv("LIBYUV_DISABLE_SSE41")) { |
| 170 | cpu_info_ &= ~kCpuHasSSE41; |
| 171 | } |
| 172 | if (TestEnv("LIBYUV_DISABLE_SSE42")) { |
| 173 | cpu_info_ &= ~kCpuHasSSE42; |
| 174 | } |
| 175 | if (TestEnv("LIBYUV_DISABLE_AVX")) { |
| 176 | cpu_info_ &= ~kCpuHasAVX; |
| 177 | } |
| 178 | if (TestEnv("LIBYUV_DISABLE_AVX2")) { |
| 179 | cpu_info_ &= ~kCpuHasAVX2; |
| 180 | } |
| 181 | if (TestEnv("LIBYUV_DISABLE_ASM")) { |
| 182 | cpu_info_ = kCpuInitialized; |
| 183 | } |
| 184 | #elif defined(__arm__) |
| 185 | #if defined(__linux__) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) |
| 186 | // linux arm parse text file for neon detect. |
| 187 | cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 188 | #elif defined(__ARM_NEON__) |
| 189 | // gcc -mfpu=neon defines __ARM_NEON__ |
| 190 | // Enable Neon if you want support for Neon and Arm, and use MaskCpuFlags |
| 191 | // to disable Neon on devices that do not have it. |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 192 | cpu_info_ = kCpuHasNEON; |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 193 | #endif |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 194 | cpu_info_ |= kCpuInitialized | kCpuHasARM; |
| 195 | if (TestEnv("LIBYUV_DISABLE_NEON")) { |
| 196 | cpu_info_ &= ~kCpuHasNEON; |
| 197 | } |
| 198 | if (TestEnv("LIBYUV_DISABLE_ASM")) { |
| 199 | cpu_info_ = kCpuInitialized; |
| 200 | } |
| 201 | #endif // __arm__ |
| 202 | return cpu_info_; |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 203 | } |
| 204 | |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 205 | LIBYUV_API |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 206 | void MaskCpuFlags(int enable_flags) { |
| 207 | InitCpuFlags(); |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 208 | cpu_info_ = (cpu_info_ & enable_flags) | kCpuInitialized; |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 209 | } |
| 210 | |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 211 | #ifdef __cplusplus |
| 212 | } // extern "C" |
Shri Borde | 7cd8149 | 2011-11-02 13:20:24 -0700 | [diff] [blame] | 213 | } // namespace libyuv |
Hendrik Dahlkamp | 33cfdeb | 2013-01-23 18:27:37 -0800 | [diff] [blame^] | 214 | #endif |