blob: c42c4ad62956f559f0d69606ba5fa6dfde8b22d9 [file] [log] [blame]
Pierre Ossman2ae181c2009-03-09 13:21:27 +00001;
2; jsimdcpu.asm - SIMD instruction support check
3;
4; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5;
6; Based on
7; x86 SIMD extension for IJG JPEG library
8; Copyright (C) 1999-2006, MIYASAKA Masaru.
9; For conditions of distribution and use, see copyright notice in jsimdext.inc
10;
11; This file should be assembled with NASM (Netwide Assembler),
12; can *not* be assembled with Microsoft's MASM or any compatible
13; assembler (including Borland's Turbo Assembler).
14; NASM is available from http://nasm.sourceforge.net/ or
15; http://sourceforge.net/project/showfiles.php?group_id=6208
16;
17; [TAB8]
18
Pierre Ossman3a65ef42009-03-16 13:34:18 +000019%include "jsimdext.inc"
Pierre Ossman2ae181c2009-03-09 13:21:27 +000020
21; --------------------------------------------------------------------------
DRCe5eaf372014-05-09 18:00:32 +000022 SECTION SEG_TEXT
23 BITS 32
Pierre Ossman2ae181c2009-03-09 13:21:27 +000024;
25; Check if the CPU supports SIMD instructions
26;
27; GLOBAL(unsigned int)
28; jpeg_simd_cpu_support (void)
29;
30
DRCe5eaf372014-05-09 18:00:32 +000031 align 16
32 global EXTN(jpeg_simd_cpu_support)
Pierre Ossman2ae181c2009-03-09 13:21:27 +000033
34EXTN(jpeg_simd_cpu_support):
DRCe5eaf372014-05-09 18:00:32 +000035 push ebx
36; push ecx ; need not be preserved
37; push edx ; need not be preserved
38; push esi ; unused
39 push edi
Pierre Ossman2ae181c2009-03-09 13:21:27 +000040
DRCe5eaf372014-05-09 18:00:32 +000041 xor edi,edi ; simd support flag
Pierre Ossman2ae181c2009-03-09 13:21:27 +000042
DRCe5eaf372014-05-09 18:00:32 +000043 pushfd
44 pop eax
45 mov edx,eax
46 xor eax, 1<<21 ; flip ID bit in EFLAGS
47 push eax
48 popfd
49 pushfd
50 pop eax
51 xor eax,edx
52 jz short .return ; CPUID is not supported
Pierre Ossman2ae181c2009-03-09 13:21:27 +000053
DRCe5eaf372014-05-09 18:00:32 +000054 ; Check for MMX instruction support
55 xor eax,eax
56 cpuid
57 test eax,eax
58 jz short .return
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000059
DRCe5eaf372014-05-09 18:00:32 +000060 xor eax,eax
61 inc eax
62 cpuid
63 mov eax,edx ; eax = Standard feature flags
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000064
DRCe5eaf372014-05-09 18:00:32 +000065 test eax, 1<<23 ; bit23:MMX
66 jz short .no_mmx
67 or edi, byte JSIMD_MMX
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000068.no_mmx:
DRCe5eaf372014-05-09 18:00:32 +000069 test eax, 1<<25 ; bit25:SSE
70 jz short .no_sse
71 or edi, byte JSIMD_SSE
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000072.no_sse:
DRCe5eaf372014-05-09 18:00:32 +000073 test eax, 1<<26 ; bit26:SSE2
74 jz short .no_sse2
75 or edi, byte JSIMD_SSE2
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000076.no_sse2:
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000077
DRCe5eaf372014-05-09 18:00:32 +000078 ; Check for 3DNow! instruction support
79 mov eax, 0x80000000
80 cpuid
81 cmp eax, 0x80000000
82 jbe short .return
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000083
DRCe5eaf372014-05-09 18:00:32 +000084 mov eax, 0x80000001
85 cpuid
86 mov eax,edx ; eax = Extended feature flags
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000087
DRCe5eaf372014-05-09 18:00:32 +000088 test eax, 1<<31 ; bit31:3DNow!(vendor independent)
89 jz short .no_3dnow
90 or edi, byte JSIMD_3DNOW
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000091.no_3dnow:
92
Pierre Ossman2ae181c2009-03-09 13:21:27 +000093.return:
DRCe5eaf372014-05-09 18:00:32 +000094 mov eax,edi
Pierre Ossman2ae181c2009-03-09 13:21:27 +000095
DRCe5eaf372014-05-09 18:00:32 +000096 pop edi
97; pop esi ; unused
98; pop edx ; need not be preserved
99; pop ecx ; need not be preserved
100 pop ebx
101 ret
Pierre Ossman2ae181c2009-03-09 13:21:27 +0000102
DRC132b5fd2009-10-08 09:04:56 +0000103; For some reason, the OS X linker does not honor the request to align the
104; segment unless we do this.
DRCe5eaf372014-05-09 18:00:32 +0000105 align 16