blob: 8bbf64eb894f37532b1d5e87be0a9b6d85a96283 [file] [log] [blame]
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +00001;
2; jsimdext.inc - common declarations
3;
Pierre Ossman2ae181c2009-03-09 13:21:27 +00004; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5;
6; Based on
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +00007; x86 SIMD extension for IJG JPEG library - version 1.02
8;
9; Copyright (C) 1999-2006, MIYASAKA Masaru.
10;
11; This software is provided 'as-is', without any express or implied
12; warranty. In no event will the authors be held liable for any damages
13; arising from the use of this software.
14;
15; Permission is granted to anyone to use this software for any purpose,
16; including commercial applications, and to alter it and redistribute it
17; freely, subject to the following restrictions:
18;
19; 1. The origin of this software must not be misrepresented; you must not
20; claim that you wrote the original software. If you use this software
21; in a product, an acknowledgment in the product documentation would be
22; appreciated but is not required.
23; 2. Altered source versions must be plainly marked as such, and must not be
24; misrepresented as being the original software.
25; 3. This notice may not be removed or altered from any source distribution.
26;
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000027; [TAB8]
28
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000029; ==========================================================================
30; System-dependent configurations
31
32%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)--------
33; * Microsoft Visual C++
34; * MinGW (Minimalist GNU for Windows)
35; * CygWin
36; * LCC-Win32
37
38; -- segment definition --
39;
40%define SEG_TEXT .text align=16 public use32 class=CODE
41%define SEG_CONST .rdata align=16 public use32 class=CONST
42
43%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
44; * Borland C++ (Win32)
45
46; -- segment definition --
47;
48%define SEG_TEXT .text align=16 public use32 class=CODE
49%define SEG_CONST .data align=16 public use32 class=DATA
50
51%elifdef ELF ; ----(nasm -felf -DELF ...)------------
52; * Linux
53; * *BSD family Unix using elf format
54; * Unix System V, including Solaris x86, UnixWare and SCO Unix
55
56; -- segment definition --
57;
58%define SEG_TEXT .text progbits alloc exec nowrite align=16
59%define SEG_CONST .rodata progbits alloc noexec nowrite align=16
60
61; To make the code position-independent, append -DPIC to the commandline
62;
63%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC
64%define EXTN(name) name ; foo() -> foo
65
66%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)----
67; * Older Linux using a.out format (nasm -f aout -DAOUT ...)
68; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...)
69
70; -- segment definition --
71;
72%define SEG_TEXT .text
73%define SEG_CONST .data
74
75; To make the code position-independent, append -DPIC to the commandline
76;
77%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC
78
79%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
80; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
81
82; -- segment definition --
83;
84%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why?
85%define SEG_CONST .rodata align=16
86
87; The generation of position-independent code (PIC) is the default on Darwin.
88;
89%define PIC
90%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing
91
92%else ; ----(Other case)----------------------
93
94; -- segment definition --
95;
96%define SEG_TEXT .text
97%define SEG_CONST .data
98
99%endif ; ----------------------------------------------
100
101; ==========================================================================
102
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000103; --------------------------------------------------------------------------
Pierre Ossman2ae181c2009-03-09 13:21:27 +0000104; Common types
105;
106%define POINTER dword ; general pointer type
107%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
108%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000109
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000110%define INT dword ; signed integer type
111%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT)
112%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000113
Pierre Ossman65d03172009-03-09 13:28:10 +0000114%define FP32 dword ; IEEE754 single
115%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32)
116%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000117
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000118%define MMWORD qword ; int64 (MMX register)
119%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD)
120%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000121
Pierre Ossman018fc422009-03-09 13:31:56 +0000122; NASM is buggy and doesn't properly handle operand sizes for SSE
123; instructions, so for now we have to define XMMWORD as blank.
124%define XMMWORD ; int128 (SSE register)
125%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD)
126%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000127
Pierre Ossmaneea72152009-03-09 13:34:17 +0000128; Similar hacks for when we load a dword or MMWORD into an xmm# register
129%define XMM_DWORD
130%define XMM_MMWORD
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000131
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000132%define SIZEOF_BYTE 1 ; sizeof(BYTE)
133%define SIZEOF_WORD 2 ; sizeof(WORD)
134%define SIZEOF_DWORD 4 ; sizeof(DWORD)
135%define SIZEOF_QWORD 8 ; sizeof(QWORD)
Pierre Ossman018fc422009-03-09 13:31:56 +0000136%define SIZEOF_OWORD 16 ; sizeof(OWORD)
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000137
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000138%define BYTE_BIT 8 ; CHAR_BIT in C
139%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
140%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
141%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
Pierre Ossman018fc422009-03-09 13:31:56 +0000142%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000143
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000144; --------------------------------------------------------------------------
145; External Symbol Name
146;
147%ifndef EXTN
148%define EXTN(name) _ %+ name ; foo() -> _foo
149%endif
150
151; --------------------------------------------------------------------------
152; Macros for position-independent code (PIC) support
153;
154%ifndef GOT_SYMBOL
155%undef PIC
156%endif
157
158%ifdef PIC ; -------------------------------------------
159
160%ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
161
162; At present, nasm doesn't seem to support PIC generation for Mach-O.
163; The PIC support code below is a little tricky.
164
165 SECTION SEG_CONST
166const_base:
167
168%define GOTOFF(got,sym) (got) + (sym) - const_base
169
170%imacro get_GOT 1
171 ; NOTE: this macro destroys ecx resister.
172 call %%geteip
173 add ecx, byte (%%ref - $)
174 jmp short %%adjust
175%%geteip:
176 mov ecx, POINTER [esp]
177 ret
178%%adjust:
179 push ebp
180 xor ebp,ebp ; ebp = 0
181%ifidni %1,ebx ; (%1 == ebx)
182 ; db 0x8D,0x9C + jmp near const_base =
183 ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
184 db 0x8D,0x9C ; 8D,9C
185 jmp near const_base ; E9,(const_base-%%ref)
186%%ref:
187%else ; (%1 != ebx)
188 ; db 0x8D,0x8C + jmp near const_base =
189 ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
190 db 0x8D,0x8C ; 8D,8C
191 jmp near const_base ; E9,(const_base-%%ref)
192%%ref: mov %1, ecx
193%endif ; (%1 == ebx)
194 pop ebp
195%endmacro
196
197%else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
198
199%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
200
201%imacro get_GOT 1
202 extern GOT_SYMBOL
203 call %%geteip
204 add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
205 jmp short %%done
206%%geteip:
207 mov %1, POINTER [esp]
208 ret
209%%done:
210%endmacro
211
212%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
213
214%imacro pushpic 1.nolist
215 push %1
216%endmacro
217%imacro poppic 1.nolist
218 pop %1
219%endmacro
220%imacro movpic 2.nolist
221 mov %1,%2
222%endmacro
223
224%else ; !PIC -----------------------------------------
225
226%define GOTOFF(got,sym) (sym)
227
228%imacro get_GOT 1.nolist
229%endmacro
230%imacro pushpic 1.nolist
231%endmacro
232%imacro poppic 1.nolist
233%endmacro
234%imacro movpic 2.nolist
235%endmacro
236
237%endif ; PIC -----------------------------------------
238
239; --------------------------------------------------------------------------
240; Align the next instruction on {2,4,8,16,..}-byte boundary.
241; ".balign n,,m" in GNU as
242;
243%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
244%define FILLB(b,n) (($$-(b)) & ((n)-1))
245
246%imacro alignx 1-2.nolist 0xFFFF
247%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
248 db 0x90 ; nop
249 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
250 db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
251 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
252 db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
253 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
254 db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
255 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
256 db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00]
257 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
258 db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00]
259 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
260 db 0x8B,0xED ; mov ebp,ebp
261 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
262 db 0x90 ; nop
263%endmacro
264
265; Align the next data on {2,4,8,16,..}-byte boundary.
266;
267%imacro alignz 1.nolist
268 align %1, db 0 ; filling zeros
269%endmacro
270
Pierre Ossman2ae181c2009-03-09 13:21:27 +0000271
272; --------------------------------------------------------------------------
273; Defines picked up from the C headers
274;
275%include "simd/jsimdcfg.inc"
276
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000277; --------------------------------------------------------------------------