blob: 878f0456da6e39ce14e064d9e62cc1ed523121c1 [file] [log] [blame]
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +00001;
2; jsimdext.inc - common declarations
3;
Pierre Ossman2ae181c2009-03-09 13:21:27 +00004; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5;
6; Based on
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +00007; x86 SIMD extension for IJG JPEG library - version 1.02
8;
9; Copyright (C) 1999-2006, MIYASAKA Masaru.
10;
11; This software is provided 'as-is', without any express or implied
12; warranty. In no event will the authors be held liable for any damages
13; arising from the use of this software.
14;
15; Permission is granted to anyone to use this software for any purpose,
16; including commercial applications, and to alter it and redistribute it
17; freely, subject to the following restrictions:
18;
19; 1. The origin of this software must not be misrepresented; you must not
20; claim that you wrote the original software. If you use this software
21; in a product, an acknowledgment in the product documentation would be
22; appreciated but is not required.
23; 2. Altered source versions must be plainly marked as such, and must not be
24; misrepresented as being the original software.
25; 3. This notice may not be removed or altered from any source distribution.
26;
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000027; [TAB8]
28
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000029; ==========================================================================
30; System-dependent configurations
31
32%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)--------
33; * Microsoft Visual C++
34; * MinGW (Minimalist GNU for Windows)
35; * CygWin
36; * LCC-Win32
37
38; -- segment definition --
39;
40%define SEG_TEXT .text align=16 public use32 class=CODE
41%define SEG_CONST .rdata align=16 public use32 class=CONST
42
43%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
44; * Borland C++ (Win32)
45
46; -- segment definition --
47;
48%define SEG_TEXT .text align=16 public use32 class=CODE
49%define SEG_CONST .data align=16 public use32 class=DATA
50
DRCcdc8ac32009-06-25 20:38:31 +000051%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000052; * Linux
53; * *BSD family Unix using elf format
54; * Unix System V, including Solaris x86, UnixWare and SCO Unix
55
56; -- segment definition --
57;
DRCcdc8ac32009-06-25 20:38:31 +000058%ifdef __x86_64__
59%define SEG_TEXT .text progbits align=16
60%define SEG_CONST .rodata progbits align=16
61%else
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000062%define SEG_TEXT .text progbits alloc exec nowrite align=16
63%define SEG_CONST .rodata progbits alloc noexec nowrite align=16
DRCcdc8ac32009-06-25 20:38:31 +000064%endif
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +000065
66; To make the code position-independent, append -DPIC to the commandline
67;
68%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC
69%define EXTN(name) name ; foo() -> foo
70
71%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)----
72; * Older Linux using a.out format (nasm -f aout -DAOUT ...)
73; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...)
74
75; -- segment definition --
76;
77%define SEG_TEXT .text
78%define SEG_CONST .data
79
80; To make the code position-independent, append -DPIC to the commandline
81;
82%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC
83
84%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
85; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
86
87; -- segment definition --
88;
89%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why?
90%define SEG_CONST .rodata align=16
91
92; The generation of position-independent code (PIC) is the default on Darwin.
93;
94%define PIC
95%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing
96
97%else ; ----(Other case)----------------------
98
99; -- segment definition --
100;
101%define SEG_TEXT .text
102%define SEG_CONST .data
103
104%endif ; ----------------------------------------------
105
106; ==========================================================================
107
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000108; --------------------------------------------------------------------------
Pierre Ossman2ae181c2009-03-09 13:21:27 +0000109; Common types
110;
DRCcdc8ac32009-06-25 20:38:31 +0000111%ifdef __x86_64__
112%define POINTER qword ; general pointer type
113%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
114%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
115%else
Pierre Ossman2ae181c2009-03-09 13:21:27 +0000116%define POINTER dword ; general pointer type
117%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
118%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
DRCcdc8ac32009-06-25 20:38:31 +0000119%endif
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000120
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000121%define INT dword ; signed integer type
122%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT)
123%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000124
Pierre Ossman65d03172009-03-09 13:28:10 +0000125%define FP32 dword ; IEEE754 single
126%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32)
127%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000128
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000129%define MMWORD qword ; int64 (MMX register)
130%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD)
131%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000132
Pierre Ossman018fc422009-03-09 13:31:56 +0000133; NASM is buggy and doesn't properly handle operand sizes for SSE
134; instructions, so for now we have to define XMMWORD as blank.
135%define XMMWORD ; int128 (SSE register)
136%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD)
137%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000138
Pierre Ossmaneea72152009-03-09 13:34:17 +0000139; Similar hacks for when we load a dword or MMWORD into an xmm# register
140%define XMM_DWORD
141%define XMM_MMWORD
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000142
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000143%define SIZEOF_BYTE 1 ; sizeof(BYTE)
144%define SIZEOF_WORD 2 ; sizeof(WORD)
145%define SIZEOF_DWORD 4 ; sizeof(DWORD)
146%define SIZEOF_QWORD 8 ; sizeof(QWORD)
Pierre Ossman018fc422009-03-09 13:31:56 +0000147%define SIZEOF_OWORD 16 ; sizeof(OWORD)
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000148
Pierre Ossman5eb84ff2009-03-09 13:25:30 +0000149%define BYTE_BIT 8 ; CHAR_BIT in C
150%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
151%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
152%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
Pierre Ossman018fc422009-03-09 13:31:56 +0000153%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000154
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000155; --------------------------------------------------------------------------
156; External Symbol Name
157;
158%ifndef EXTN
159%define EXTN(name) _ %+ name ; foo() -> _foo
160%endif
161
162; --------------------------------------------------------------------------
163; Macros for position-independent code (PIC) support
164;
165%ifndef GOT_SYMBOL
166%undef PIC
167%endif
168
169%ifdef PIC ; -------------------------------------------
170
171%ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
172
173; At present, nasm doesn't seem to support PIC generation for Mach-O.
174; The PIC support code below is a little tricky.
175
176 SECTION SEG_CONST
177const_base:
178
179%define GOTOFF(got,sym) (got) + (sym) - const_base
180
181%imacro get_GOT 1
182 ; NOTE: this macro destroys ecx resister.
183 call %%geteip
184 add ecx, byte (%%ref - $)
185 jmp short %%adjust
186%%geteip:
187 mov ecx, POINTER [esp]
188 ret
189%%adjust:
190 push ebp
191 xor ebp,ebp ; ebp = 0
192%ifidni %1,ebx ; (%1 == ebx)
193 ; db 0x8D,0x9C + jmp near const_base =
194 ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
195 db 0x8D,0x9C ; 8D,9C
196 jmp near const_base ; E9,(const_base-%%ref)
197%%ref:
198%else ; (%1 != ebx)
199 ; db 0x8D,0x8C + jmp near const_base =
200 ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
201 db 0x8D,0x8C ; 8D,8C
202 jmp near const_base ; E9,(const_base-%%ref)
203%%ref: mov %1, ecx
204%endif ; (%1 == ebx)
205 pop ebp
206%endmacro
207
208%else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
209
210%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
211
212%imacro get_GOT 1
213 extern GOT_SYMBOL
214 call %%geteip
215 add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
216 jmp short %%done
217%%geteip:
218 mov %1, POINTER [esp]
219 ret
220%%done:
221%endmacro
222
223%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
224
225%imacro pushpic 1.nolist
226 push %1
227%endmacro
228%imacro poppic 1.nolist
229 pop %1
230%endmacro
231%imacro movpic 2.nolist
232 mov %1,%2
233%endmacro
234
235%else ; !PIC -----------------------------------------
236
237%define GOTOFF(got,sym) (sym)
238
239%imacro get_GOT 1.nolist
240%endmacro
241%imacro pushpic 1.nolist
242%endmacro
243%imacro poppic 1.nolist
244%endmacro
245%imacro movpic 2.nolist
246%endmacro
247
248%endif ; PIC -----------------------------------------
249
250; --------------------------------------------------------------------------
251; Align the next instruction on {2,4,8,16,..}-byte boundary.
252; ".balign n,,m" in GNU as
253;
254%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
255%define FILLB(b,n) (($$-(b)) & ((n)-1))
256
257%imacro alignx 1-2.nolist 0xFFFF
258%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
259 db 0x90 ; nop
260 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
261 db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
262 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
263 db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
264 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
265 db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
266 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
267 db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00]
268 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
269 db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00]
270 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
271 db 0x8B,0xED ; mov ebp,ebp
272 times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
273 db 0x90 ; nop
274%endmacro
275
276; Align the next data on {2,4,8,16,..}-byte boundary.
277;
278%imacro alignz 1.nolist
279 align %1, db 0 ; filling zeros
280%endmacro
281
DRCcdc8ac32009-06-25 20:38:31 +0000282%ifdef __x86_64__
283%imacro collect_args 0
284 push r10
285 push r11
286 push r12
287 push r13
288 push r14
289 push r15
290 mov r10, rdi
291 mov r11, rsi
292 mov r12, rdx
293 mov r13, rcx
294 mov r14, r8
295 mov r15, r9
296%endmacro
297
298%imacro uncollect_args 0
299 pop r15
300 pop r14
301 pop r13
302 pop r12
303 pop r11
304 pop r10
305%endmacro
306
307%endif
Pierre Ossman2ae181c2009-03-09 13:21:27 +0000308
309; --------------------------------------------------------------------------
310; Defines picked up from the C headers
311;
Pierre Ossman3a65ef42009-03-16 13:34:18 +0000312%include "jsimdcfg.inc"
Pierre Ossman2ae181c2009-03-09 13:21:27 +0000313
MIYASAKA Masarua2e6a9d2006-02-04 00:00:00 +0000314; --------------------------------------------------------------------------