Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 1 | /* libFLAC - Free Lossless Audio Codec library |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 2 | * Copyright (C) 2001-2009 Josh Coalson |
| 3 | * Copyright (C) 2011-2014 Xiph.Org Foundation |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions |
| 7 | * are met: |
| 8 | * |
| 9 | * - Redistributions of source code must retain the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer. |
| 11 | * |
| 12 | * - Redistributions in binary form must reproduce the above copyright |
| 13 | * notice, this list of conditions and the following disclaimer in the |
| 14 | * documentation and/or other materials provided with the distribution. |
| 15 | * |
| 16 | * - Neither the name of the Xiph.org Foundation nor the names of its |
| 17 | * contributors may be used to endorse or promote products derived from |
| 18 | * this software without specific prior written permission. |
| 19 | * |
| 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 24 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 27 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 28 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 29 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 30 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | */ |
| 32 | |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 33 | #ifdef HAVE_CONFIG_H |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 34 | # include <config.h> |
| 35 | #endif |
| 36 | |
| 37 | #include "private/cpu.h" |
| 38 | #include <stdlib.h> |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 39 | #include <memory.h> |
| 40 | #ifdef DEBUG |
| 41 | # include <stdio.h> |
| 42 | #endif |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 43 | |
| 44 | #if defined FLAC__CPU_IA32 |
| 45 | # include <signal.h> |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 46 | |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 47 | static void disable_sse(FLAC__CPUInfo *info) |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 48 | { |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 49 | info->ia32.sse = false; |
| 50 | info->ia32.sse2 = false; |
| 51 | info->ia32.sse3 = false; |
| 52 | info->ia32.ssse3 = false; |
| 53 | info->ia32.sse41 = false; |
| 54 | info->ia32.sse42 = false; |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 55 | } |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 56 | |
| 57 | static void disable_avx(FLAC__CPUInfo *info) |
| 58 | { |
| 59 | info->ia32.avx = false; |
| 60 | info->ia32.avx2 = false; |
| 61 | info->ia32.fma = false; |
| 62 | } |
| 63 | |
| 64 | #elif defined FLAC__CPU_X86_64 |
| 65 | |
| 66 | static void disable_avx(FLAC__CPUInfo *info) |
| 67 | { |
| 68 | info->x86.avx = false; |
| 69 | info->x86.avx2 = false; |
| 70 | info->x86.fma = false; |
| 71 | } |
| 72 | #endif |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 73 | |
| 74 | #if defined (__NetBSD__) || defined(__OpenBSD__) |
| 75 | #include <sys/param.h> |
| 76 | #include <sys/sysctl.h> |
| 77 | #include <machine/cpu.h> |
| 78 | #endif |
| 79 | |
| 80 | #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) |
| 81 | #include <sys/types.h> |
| 82 | #include <sys/sysctl.h> |
| 83 | #endif |
| 84 | |
| 85 | #if defined(__APPLE__) |
| 86 | /* how to get sysctlbyname()? */ |
| 87 | #endif |
| 88 | |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 89 | #ifdef FLAC__CPU_IA32 |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 90 | /* these are flags in EDX of CPUID AX=00000001 */ |
| 91 | static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000; |
| 92 | static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000; |
| 93 | static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000; |
| 94 | static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000; |
| 95 | static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000; |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 96 | #endif |
| 97 | |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 98 | /* these are flags in ECX of CPUID AX=00000001 */ |
| 99 | static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001; |
| 100 | static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200; |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 101 | static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE41 = 0x00080000; |
| 102 | static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE42 = 0x00100000; |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 103 | |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 104 | #if defined FLAC__AVX_SUPPORTED |
| 105 | /* these are flags in ECX of CPUID AX=00000001 */ |
| 106 | static const unsigned FLAC__CPUINFO_IA32_CPUID_OSXSAVE = 0x08000000; |
| 107 | static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX = 0x10000000; |
| 108 | static const unsigned FLAC__CPUINFO_IA32_CPUID_FMA = 0x00001000; |
| 109 | /* these are flags in EBX of CPUID AX=00000007 */ |
| 110 | static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX2 = 0x00000020; |
| 111 | #endif |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 112 | |
| 113 | /* |
| 114 | * Extra stuff needed for detection of OS support for SSE on IA-32 |
| 115 | */ |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 116 | #if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 117 | # if defined(__linux__) |
| 118 | /* |
| 119 | * If the OS doesn't support SSE, we will get here with a SIGILL. We |
| 120 | * modify the return address to jump over the offending SSE instruction |
| 121 | * and also the operation following it that indicates the instruction |
| 122 | * executed successfully. In this way we use no global variables and |
| 123 | * stay thread-safe. |
| 124 | * |
| 125 | * 3 + 3 + 6: |
| 126 | * 3 bytes for "xorps xmm0,xmm0" |
| 127 | * 3 bytes for estimate of how long the follwing "inc var" instruction is |
| 128 | * 6 bytes extra in case our estimate is wrong |
| 129 | * 12 bytes puts us in the NOP "landing zone" |
| 130 | */ |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 131 | # include <sys/ucontext.h> |
| 132 | static void sigill_handler_sse_os(int signal, siginfo_t *si, void *uc) |
| 133 | { |
| 134 | (void)signal, (void)si; |
| 135 | ((ucontext_t*)uc)->uc_mcontext.gregs[14/*REG_EIP*/] += 3 + 3 + 6; |
| 136 | } |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 137 | # elif defined(_MSC_VER) |
| 138 | # include <windows.h> |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 139 | # endif |
| 140 | #endif |
| 141 | |
| 142 | |
| 143 | void FLAC__cpu_info(FLAC__CPUInfo *info) |
| 144 | { |
| 145 | /* |
| 146 | * IA32-specific |
| 147 | */ |
| 148 | #ifdef FLAC__CPU_IA32 |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 149 | FLAC__bool ia32_fxsr = false; |
| 150 | FLAC__bool ia32_osxsave = false; |
| 151 | (void) ia32_fxsr; (void) ia32_osxsave; /* to avoid warnings about unused variables */ |
| 152 | memset(info, 0, sizeof(*info)); |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 153 | info->type = FLAC__CPUINFO_TYPE_IA32; |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 154 | #if !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 155 | info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */ |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 156 | #ifdef FLAC__HAS_X86INTRIN |
| 157 | if(!FLAC__cpu_have_cpuid_x86()) |
| 158 | return; |
worker | a3d7a6d | 2015-10-30 17:30:39 +0100 | [diff] [blame] | 159 | #else |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 160 | if(!FLAC__cpu_have_cpuid_asm_ia32()) |
| 161 | return; |
dingjf0506 | 787abc6 | 2015-10-30 10:36:08 +0800 | [diff] [blame] | 162 | #endif |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 163 | { |
| 164 | /* http://www.sandpile.org/x86/cpuid.htm */ |
| 165 | #ifdef FLAC__HAS_X86INTRIN |
| 166 | FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx; |
| 167 | FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); |
| 168 | info->ia32.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */ |
| 169 | FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); |
| 170 | #else |
| 171 | FLAC__uint32 flags_ecx, flags_edx; |
| 172 | FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx); |
| 173 | #endif |
| 174 | info->ia32.cmov = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false; |
| 175 | info->ia32.mmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX )? true : false; |
| 176 | ia32_fxsr = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false; |
| 177 | info->ia32.sse = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE )? true : false; |
| 178 | info->ia32.sse2 = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false; |
| 179 | info->ia32.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; |
| 180 | info->ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; |
| 181 | info->ia32.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false; |
| 182 | info->ia32.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false; |
| 183 | #if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED |
| 184 | ia32_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false; |
| 185 | info->ia32.avx = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX )? true : false; |
| 186 | info->ia32.fma = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA )? true : false; |
| 187 | FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); |
| 188 | info->ia32.avx2 = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2 )? true : false; |
| 189 | #endif |
| 190 | } |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 191 | |
| 192 | #ifdef DEBUG |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 193 | fprintf(stderr, "CPU info (IA-32):\n"); |
| 194 | fprintf(stderr, " CMOV ....... %c\n", info->ia32.cmov ? 'Y' : 'n'); |
| 195 | fprintf(stderr, " MMX ........ %c\n", info->ia32.mmx ? 'Y' : 'n'); |
| 196 | fprintf(stderr, " SSE ........ %c\n", info->ia32.sse ? 'Y' : 'n'); |
| 197 | fprintf(stderr, " SSE2 ....... %c\n", info->ia32.sse2 ? 'Y' : 'n'); |
| 198 | fprintf(stderr, " SSE3 ....... %c\n", info->ia32.sse3 ? 'Y' : 'n'); |
| 199 | fprintf(stderr, " SSSE3 ...... %c\n", info->ia32.ssse3 ? 'Y' : 'n'); |
| 200 | fprintf(stderr, " SSE41 ...... %c\n", info->ia32.sse41 ? 'Y' : 'n'); |
| 201 | fprintf(stderr, " SSE42 ...... %c\n", info->ia32.sse42 ? 'Y' : 'n'); |
| 202 | # if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED |
| 203 | fprintf(stderr, " AVX ........ %c\n", info->ia32.avx ? 'Y' : 'n'); |
| 204 | fprintf(stderr, " FMA ........ %c\n", info->ia32.fma ? 'Y' : 'n'); |
| 205 | fprintf(stderr, " AVX2 ....... %c\n", info->ia32.avx2 ? 'Y' : 'n'); |
| 206 | # endif |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 207 | #endif |
| 208 | |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 209 | /* |
| 210 | * now have to check for OS support of SSE instructions |
| 211 | */ |
| 212 | if(info->ia32.sse) { |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 213 | #if defined FLAC__NO_SSE_OS |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 214 | /* assume user knows better than us; turn it off */ |
| 215 | disable_sse(info); |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 216 | #elif defined FLAC__SSE_OS |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 217 | /* assume user knows better than us; leave as detected above */ |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 218 | #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__) |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 219 | int sse = 0; |
| 220 | size_t len; |
| 221 | /* at least one of these must work: */ |
| 222 | len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse); |
| 223 | len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse" , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */ |
| 224 | if(!sse) |
| 225 | disable_sse(info); |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 226 | #elif defined(__NetBSD__) || defined (__OpenBSD__) |
| 227 | # if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__) |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 228 | int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE }; |
| 229 | size_t len = sizeof(val); |
| 230 | if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) |
| 231 | disable_sse(info); |
| 232 | else { /* double-check SSE2 */ |
| 233 | mib[1] = CPU_SSE2; |
| 234 | len = sizeof(val); |
| 235 | if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) { |
| 236 | disable_sse(info); |
| 237 | info->ia32.sse = true; |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 238 | } |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 239 | } |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 240 | # else |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 241 | disable_sse(info); |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 242 | # endif |
| 243 | #elif defined(__linux__) |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 244 | int sse = 0; |
| 245 | struct sigaction sigill_save; |
| 246 | struct sigaction sigill_sse; |
| 247 | sigill_sse.sa_sigaction = sigill_handler_sse_os; |
| 248 | __sigemptyset(&sigill_sse.sa_mask); |
| 249 | sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */ |
| 250 | if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save)) |
| 251 | { |
| 252 | /* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */ |
| 253 | /* see sigill_handler_sse_os() for an explanation of the following: */ |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 254 | asm volatile ( |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 255 | "xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */ |
| 256 | "incl %0\n\t" /* SIGILL handler will jump over this */ |
| 257 | /* landing zone */ |
| 258 | "nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */ |
| 259 | "nop\n\t" |
| 260 | "nop\n\t" |
| 261 | "nop\n\t" |
| 262 | "nop\n\t" |
| 263 | "nop\n\t" |
| 264 | "nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */ |
| 265 | "nop\n\t" |
| 266 | "nop" /* SIGILL jump lands here if "inc" is 1 byte */ |
| 267 | : "=r"(sse) |
| 268 | : "0"(sse) |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 269 | ); |
| 270 | |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 271 | sigaction(SIGILL, &sigill_save, NULL); |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 272 | } |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 273 | |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 274 | if(!sse) |
| 275 | disable_sse(info); |
| 276 | #elif defined(_MSC_VER) |
| 277 | __try { |
| 278 | __asm { |
| 279 | xorps xmm0,xmm0 |
| 280 | } |
dingjf0506 | 787abc6 | 2015-10-30 10:36:08 +0800 | [diff] [blame] | 281 | } |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 282 | __except(EXCEPTION_EXECUTE_HANDLER) { |
| 283 | if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION) |
| 284 | disable_sse(info); |
| 285 | } |
| 286 | #elif defined(__GNUC__) /* MinGW goes here */ |
| 287 | int sse = 0; |
| 288 | /* Based on the idea described in Agner Fog's manual "Optimizing subroutines in assembly language" */ |
| 289 | /* In theory, not guaranteed to detect lack of OS SSE support on some future Intel CPUs, but in practice works (see the aforementioned manual) */ |
| 290 | if (ia32_fxsr) { |
| 291 | struct { |
| 292 | FLAC__uint32 buff[128]; |
| 293 | } __attribute__((aligned(16))) fxsr; |
| 294 | FLAC__uint32 old_val, new_val; |
| 295 | |
| 296 | asm volatile ("fxsave %0" : "=m" (fxsr) : "m" (fxsr)); |
| 297 | old_val = fxsr.buff[50]; |
| 298 | fxsr.buff[50] ^= 0x0013c0de; /* change value in the buffer */ |
| 299 | asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr)); /* try to change SSE register */ |
| 300 | fxsr.buff[50] = old_val; /* restore old value in the buffer */ |
| 301 | asm volatile ("fxsave %0 " : "=m" (fxsr) : "m" (fxsr)); /* old value will be overwritten if SSE register was changed */ |
| 302 | new_val = fxsr.buff[50]; /* == old_val if FXRSTOR didn't change SSE register and (old_val ^ 0x0013c0de) otherwise */ |
| 303 | fxsr.buff[50] = old_val; /* again restore old value in the buffer */ |
| 304 | asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr)); /* restore old values of registers */ |
| 305 | |
| 306 | if ((old_val^new_val) == 0x0013c0de) |
| 307 | sse = 1; |
| 308 | } |
| 309 | if(!sse) |
| 310 | disable_sse(info); |
| 311 | #else |
| 312 | /* no way to test, disable to be safe */ |
| 313 | disable_sse(info); |
| 314 | #endif |
| 315 | #ifdef DEBUG |
| 316 | fprintf(stderr, " SSE OS sup . %c\n", info->ia32.sse ? 'Y' : 'n'); |
| 317 | #endif |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 318 | } |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 319 | else /* info->ia32.sse == false */ |
| 320 | disable_sse(info); |
| 321 | |
| 322 | /* |
| 323 | * now have to check for OS support of AVX instructions |
| 324 | */ |
| 325 | if(info->ia32.avx && ia32_osxsave) { |
| 326 | FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86(); |
| 327 | if ((ecr & 0x6) != 0x6) |
| 328 | disable_avx(info); |
| 329 | #ifdef DEBUG |
| 330 | fprintf(stderr, " AVX OS sup . %c\n", info->ia32.avx ? 'Y' : 'n'); |
| 331 | #endif |
| 332 | } |
| 333 | else /* no OS AVX support*/ |
| 334 | disable_avx(info); |
| 335 | #else |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 336 | info->use_asm = false; |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 337 | #endif |
dingjf0506 | 787abc6 | 2015-10-30 10:36:08 +0800 | [diff] [blame] | 338 | |
| 339 | /* |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 340 | * x86-64-specific |
| 341 | */ |
| 342 | #elif defined FLAC__CPU_X86_64 |
| 343 | FLAC__bool x86_osxsave = false; |
| 344 | (void) x86_osxsave; /* to avoid warnings about unused variables */ |
| 345 | memset(info, 0, sizeof(*info)); |
| 346 | info->type = FLAC__CPUINFO_TYPE_X86_64; |
| 347 | #if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN |
| 348 | info->use_asm = true; |
| 349 | { |
| 350 | /* http://www.sandpile.org/x86/cpuid.htm */ |
| 351 | FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx; |
| 352 | FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); |
| 353 | info->x86.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */ |
| 354 | FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); |
| 355 | info->x86.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; |
| 356 | info->x86.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; |
| 357 | info->x86.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false; |
| 358 | info->x86.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false; |
| 359 | #if defined FLAC__AVX_SUPPORTED |
| 360 | x86_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false; |
| 361 | info->x86.avx = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX )? true : false; |
| 362 | info->x86.fma = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA )? true : false; |
| 363 | FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); |
| 364 | info->x86.avx2 = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2 )? true : false; |
| 365 | #endif |
| 366 | } |
| 367 | #ifdef DEBUG |
| 368 | fprintf(stderr, "CPU info (x86-64):\n"); |
| 369 | fprintf(stderr, " SSE3 ....... %c\n", info->x86.sse3 ? 'Y' : 'n'); |
| 370 | fprintf(stderr, " SSSE3 ...... %c\n", info->x86.ssse3 ? 'Y' : 'n'); |
| 371 | fprintf(stderr, " SSE41 ...... %c\n", info->x86.sse41 ? 'Y' : 'n'); |
| 372 | fprintf(stderr, " SSE42 ...... %c\n", info->x86.sse42 ? 'Y' : 'n'); |
| 373 | # if defined FLAC__AVX_SUPPORTED |
| 374 | fprintf(stderr, " AVX ........ %c\n", info->x86.avx ? 'Y' : 'n'); |
| 375 | fprintf(stderr, " FMA ........ %c\n", info->x86.fma ? 'Y' : 'n'); |
| 376 | fprintf(stderr, " AVX2 ....... %c\n", info->x86.avx2 ? 'Y' : 'n'); |
| 377 | # endif |
| 378 | #endif |
| 379 | |
| 380 | /* |
| 381 | * now have to check for OS support of AVX instructions |
| 382 | */ |
| 383 | if(info->x86.avx && x86_osxsave) { |
| 384 | FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86(); |
| 385 | if ((ecr & 0x6) != 0x6) |
| 386 | disable_avx(info); |
| 387 | #ifdef DEBUG |
| 388 | fprintf(stderr, " AVX OS sup . %c\n", info->x86.avx ? 'Y' : 'n'); |
| 389 | #endif |
| 390 | } |
| 391 | else /* no OS AVX support*/ |
| 392 | disable_avx(info); |
| 393 | #else |
| 394 | info->use_asm = false; |
| 395 | #endif |
| 396 | |
| 397 | /* |
| 398 | * unknown CPU |
Kenny Root | c746637 | 2009-11-08 12:46:32 -0600 | [diff] [blame] | 399 | */ |
| 400 | #else |
| 401 | info->type = FLAC__CPUINFO_TYPE_UNKNOWN; |
| 402 | info->use_asm = false; |
| 403 | #endif |
| 404 | } |
Robert Shih | 9e64f14 | 2015-01-05 17:35:54 -0800 | [diff] [blame] | 405 | |
| 406 | #if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN |
| 407 | |
| 408 | #if defined _MSC_VER |
| 409 | #include <intrin.h> /* for __cpuid() and _xgetbv() */ |
| 410 | #elif defined __GNUC__ && defined HAVE_CPUID_H |
| 411 | #include <cpuid.h> /* for __get_cpuid() and __get_cpuid_max() */ |
| 412 | #endif |
| 413 | |
| 414 | FLAC__uint32 FLAC__cpu_have_cpuid_x86(void) |
| 415 | { |
| 416 | #ifdef FLAC__CPU_X86_64 |
| 417 | return 1; |
| 418 | #else |
| 419 | # if defined _MSC_VER || defined __INTEL_COMPILER /* Do they support CPUs w/o CPUID support (or OSes that work on those CPUs)? */ |
| 420 | FLAC__uint32 flags1, flags2; |
| 421 | __asm { |
| 422 | pushfd |
| 423 | pushfd |
| 424 | pop eax |
| 425 | mov flags1, eax |
| 426 | xor eax, 0x200000 |
| 427 | push eax |
| 428 | popfd |
| 429 | pushfd |
| 430 | pop eax |
| 431 | mov flags2, eax |
| 432 | popfd |
| 433 | } |
| 434 | if (((flags1^flags2) & 0x200000) != 0) |
| 435 | return 1; |
| 436 | else |
| 437 | return 0; |
| 438 | # elif defined __GNUC__ && defined HAVE_CPUID_H |
| 439 | if (__get_cpuid_max(0, 0) != 0) |
| 440 | return 1; |
| 441 | else |
| 442 | return 0; |
| 443 | # else |
| 444 | return 0; |
| 445 | # endif |
| 446 | #endif |
| 447 | } |
| 448 | |
| 449 | void FLAC__cpu_info_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx) |
| 450 | { |
| 451 | #if defined _MSC_VER || defined __INTEL_COMPILER |
| 452 | int cpuinfo[4]; |
| 453 | int ext = level & 0x80000000; |
| 454 | __cpuid(cpuinfo, ext); |
| 455 | if((unsigned)cpuinfo[0] < level) { |
| 456 | *eax = *ebx = *ecx = *edx = 0; |
| 457 | return; |
| 458 | } |
| 459 | #if defined FLAC__AVX_SUPPORTED |
| 460 | __cpuidex(cpuinfo, level, 0); /* for AVX2 detection */ |
| 461 | #else |
| 462 | __cpuid(cpuinfo, level); /* some old compilers don't support __cpuidex */ |
| 463 | #endif |
| 464 | *eax = cpuinfo[0]; *ebx = cpuinfo[1]; *ecx = cpuinfo[2]; *edx = cpuinfo[3]; |
| 465 | #elif defined __GNUC__ && defined HAVE_CPUID_H |
| 466 | FLAC__uint32 ext = level & 0x80000000; |
| 467 | __cpuid(ext, *eax, *ebx, *ecx, *edx); |
| 468 | if (*eax < level) { |
| 469 | *eax = *ebx = *ecx = *edx = 0; |
| 470 | return; |
| 471 | } |
| 472 | __cpuid_count(level, 0, *eax, *ebx, *ecx, *edx); |
| 473 | #else |
| 474 | *eax = *ebx = *ecx = *edx = 0; |
| 475 | #endif |
| 476 | } |
| 477 | |
| 478 | FLAC__uint32 FLAC__cpu_xgetbv_x86(void) |
| 479 | { |
| 480 | #if (defined _MSC_VER || defined __INTEL_COMPILER) && defined FLAC__AVX_SUPPORTED |
| 481 | return (FLAC__uint32)_xgetbv(0); |
| 482 | #elif defined __GNUC__ |
| 483 | FLAC__uint32 lo, hi; |
| 484 | asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(lo), "=d"(hi) : "c" (0)); |
| 485 | return lo; |
| 486 | #else |
| 487 | return 0; |
| 488 | #endif |
| 489 | } |
| 490 | |
| 491 | #endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ |