MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 1 | ; |
| 2 | ; jsimdext.inc - common declarations |
| 3 | ; |
Pierre Ossman | 2ae181c | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 4 | ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 | ; |
| 6 | ; Based on |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 7 | ; x86 SIMD extension for IJG JPEG library - version 1.02 |
| 8 | ; |
| 9 | ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 | ; |
| 11 | ; This software is provided 'as-is', without any express or implied |
| 12 | ; warranty. In no event will the authors be held liable for any damages |
| 13 | ; arising from the use of this software. |
| 14 | ; |
| 15 | ; Permission is granted to anyone to use this software for any purpose, |
| 16 | ; including commercial applications, and to alter it and redistribute it |
| 17 | ; freely, subject to the following restrictions: |
| 18 | ; |
| 19 | ; 1. The origin of this software must not be misrepresented; you must not |
| 20 | ; claim that you wrote the original software. If you use this software |
| 21 | ; in a product, an acknowledgment in the product documentation would be |
| 22 | ; appreciated but is not required. |
| 23 | ; 2. Altered source versions must be plainly marked as such, and must not be |
| 24 | ; misrepresented as being the original software. |
| 25 | ; 3. This notice may not be removed or altered from any source distribution. |
| 26 | ; |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 27 | ; [TAB8] |
| 28 | |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 29 | ; ========================================================================== |
| 30 | ; System-dependent configurations |
| 31 | |
| 32 | %ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- |
| 33 | ; * Microsoft Visual C++ |
| 34 | ; * MinGW (Minimalist GNU for Windows) |
| 35 | ; * CygWin |
| 36 | ; * LCC-Win32 |
| 37 | |
| 38 | ; -- segment definition -- |
| 39 | ; |
| 40 | %define SEG_TEXT .text align=16 public use32 class=CODE |
| 41 | %define SEG_CONST .rdata align=16 public use32 class=CONST |
| 42 | |
| 43 | %elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- |
| 44 | ; * Borland C++ (Win32) |
| 45 | |
| 46 | ; -- segment definition -- |
| 47 | ; |
| 48 | %define SEG_TEXT .text align=16 public use32 class=CODE |
| 49 | %define SEG_CONST .data align=16 public use32 class=DATA |
| 50 | |
| 51 | %elifdef ELF ; ----(nasm -felf -DELF ...)------------ |
| 52 | ; * Linux |
| 53 | ; * *BSD family Unix using elf format |
| 54 | ; * Unix System V, including Solaris x86, UnixWare and SCO Unix |
| 55 | |
| 56 | ; -- segment definition -- |
| 57 | ; |
| 58 | %define SEG_TEXT .text progbits alloc exec nowrite align=16 |
| 59 | %define SEG_CONST .rodata progbits alloc noexec nowrite align=16 |
| 60 | |
| 61 | ; To make the code position-independent, append -DPIC to the commandline |
| 62 | ; |
| 63 | %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC |
| 64 | %define EXTN(name) name ; foo() -> foo |
| 65 | |
| 66 | %elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- |
| 67 | ; * Older Linux using a.out format (nasm -f aout -DAOUT ...) |
| 68 | ; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) |
| 69 | |
| 70 | ; -- segment definition -- |
| 71 | ; |
| 72 | %define SEG_TEXT .text |
| 73 | %define SEG_CONST .data |
| 74 | |
| 75 | ; To make the code position-independent, append -DPIC to the commandline |
| 76 | ; |
| 77 | %define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC |
| 78 | |
| 79 | %elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- |
| 80 | ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) |
| 81 | |
| 82 | ; -- segment definition -- |
| 83 | ; |
| 84 | %define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? |
| 85 | %define SEG_CONST .rodata align=16 |
| 86 | |
| 87 | ; The generation of position-independent code (PIC) is the default on Darwin. |
| 88 | ; |
| 89 | %define PIC |
| 90 | %define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing |
| 91 | |
| 92 | %else ; ----(Other case)---------------------- |
| 93 | |
| 94 | ; -- segment definition -- |
| 95 | ; |
| 96 | %define SEG_TEXT .text |
| 97 | %define SEG_CONST .data |
| 98 | |
| 99 | %endif ; ---------------------------------------------- |
| 100 | |
| 101 | ; ========================================================================== |
| 102 | |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 103 | ; -------------------------------------------------------------------------- |
Pierre Ossman | 2ae181c | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 104 | ; Common types |
| 105 | ; |
| 106 | %define POINTER dword ; general pointer type |
| 107 | %define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) |
| 108 | %define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 109 | |
Pierre Ossman | 5eb84ff | 2009-03-09 13:25:30 +0000 | [diff] [blame] | 110 | %define INT dword ; signed integer type |
| 111 | %define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) |
| 112 | %define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 113 | |
Pierre Ossman | 65d0317 | 2009-03-09 13:28:10 +0000 | [diff] [blame] | 114 | %define FP32 dword ; IEEE754 single |
| 115 | %define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) |
| 116 | %define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 117 | |
Pierre Ossman | 5eb84ff | 2009-03-09 13:25:30 +0000 | [diff] [blame] | 118 | %define MMWORD qword ; int64 (MMX register) |
| 119 | %define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) |
| 120 | %define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 121 | |
Pierre Ossman | 018fc42 | 2009-03-09 13:31:56 +0000 | [diff] [blame] | 122 | ; NASM is buggy and doesn't properly handle operand sizes for SSE |
| 123 | ; instructions, so for now we have to define XMMWORD as blank. |
| 124 | %define XMMWORD ; int128 (SSE register) |
| 125 | %define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) |
| 126 | %define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 127 | |
Pierre Ossman | eea7215 | 2009-03-09 13:34:17 +0000 | [diff] [blame] | 128 | ; Similar hacks for when we load a dword or MMWORD into an xmm# register |
| 129 | %define XMM_DWORD |
| 130 | %define XMM_MMWORD |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 131 | |
Pierre Ossman | 5eb84ff | 2009-03-09 13:25:30 +0000 | [diff] [blame] | 132 | %define SIZEOF_BYTE 1 ; sizeof(BYTE) |
| 133 | %define SIZEOF_WORD 2 ; sizeof(WORD) |
| 134 | %define SIZEOF_DWORD 4 ; sizeof(DWORD) |
| 135 | %define SIZEOF_QWORD 8 ; sizeof(QWORD) |
Pierre Ossman | 018fc42 | 2009-03-09 13:31:56 +0000 | [diff] [blame] | 136 | %define SIZEOF_OWORD 16 ; sizeof(OWORD) |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 137 | |
Pierre Ossman | 5eb84ff | 2009-03-09 13:25:30 +0000 | [diff] [blame] | 138 | %define BYTE_BIT 8 ; CHAR_BIT in C |
| 139 | %define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT |
| 140 | %define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT |
| 141 | %define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT |
Pierre Ossman | 018fc42 | 2009-03-09 13:31:56 +0000 | [diff] [blame] | 142 | %define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 143 | |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 144 | ; -------------------------------------------------------------------------- |
| 145 | ; External Symbol Name |
| 146 | ; |
| 147 | %ifndef EXTN |
| 148 | %define EXTN(name) _ %+ name ; foo() -> _foo |
| 149 | %endif |
| 150 | |
| 151 | ; -------------------------------------------------------------------------- |
| 152 | ; Macros for position-independent code (PIC) support |
| 153 | ; |
| 154 | %ifndef GOT_SYMBOL |
| 155 | %undef PIC |
| 156 | %endif |
| 157 | |
| 158 | %ifdef PIC ; ------------------------------------------- |
| 159 | |
| 160 | %ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- |
| 161 | |
| 162 | ; At present, nasm doesn't seem to support PIC generation for Mach-O. |
| 163 | ; The PIC support code below is a little tricky. |
| 164 | |
| 165 | SECTION SEG_CONST |
| 166 | const_base: |
| 167 | |
| 168 | %define GOTOFF(got,sym) (got) + (sym) - const_base |
| 169 | |
| 170 | %imacro get_GOT 1 |
| 171 | ; NOTE: this macro destroys ecx resister. |
| 172 | call %%geteip |
| 173 | add ecx, byte (%%ref - $) |
| 174 | jmp short %%adjust |
| 175 | %%geteip: |
| 176 | mov ecx, POINTER [esp] |
| 177 | ret |
| 178 | %%adjust: |
| 179 | push ebp |
| 180 | xor ebp,ebp ; ebp = 0 |
| 181 | %ifidni %1,ebx ; (%1 == ebx) |
| 182 | ; db 0x8D,0x9C + jmp near const_base = |
| 183 | ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) |
| 184 | db 0x8D,0x9C ; 8D,9C |
| 185 | jmp near const_base ; E9,(const_base-%%ref) |
| 186 | %%ref: |
| 187 | %else ; (%1 != ebx) |
| 188 | ; db 0x8D,0x8C + jmp near const_base = |
| 189 | ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) |
| 190 | db 0x8D,0x8C ; 8D,8C |
| 191 | jmp near const_base ; E9,(const_base-%%ref) |
| 192 | %%ref: mov %1, ecx |
| 193 | %endif ; (%1 == ebx) |
| 194 | pop ebp |
| 195 | %endmacro |
| 196 | |
| 197 | %else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- |
| 198 | |
| 199 | %define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff |
| 200 | |
| 201 | %imacro get_GOT 1 |
| 202 | extern GOT_SYMBOL |
| 203 | call %%geteip |
| 204 | add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc |
| 205 | jmp short %%done |
| 206 | %%geteip: |
| 207 | mov %1, POINTER [esp] |
| 208 | ret |
| 209 | %%done: |
| 210 | %endmacro |
| 211 | |
| 212 | %endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- |
| 213 | |
| 214 | %imacro pushpic 1.nolist |
| 215 | push %1 |
| 216 | %endmacro |
| 217 | %imacro poppic 1.nolist |
| 218 | pop %1 |
| 219 | %endmacro |
| 220 | %imacro movpic 2.nolist |
| 221 | mov %1,%2 |
| 222 | %endmacro |
| 223 | |
| 224 | %else ; !PIC ----------------------------------------- |
| 225 | |
| 226 | %define GOTOFF(got,sym) (sym) |
| 227 | |
| 228 | %imacro get_GOT 1.nolist |
| 229 | %endmacro |
| 230 | %imacro pushpic 1.nolist |
| 231 | %endmacro |
| 232 | %imacro poppic 1.nolist |
| 233 | %endmacro |
| 234 | %imacro movpic 2.nolist |
| 235 | %endmacro |
| 236 | |
| 237 | %endif ; PIC ----------------------------------------- |
| 238 | |
| 239 | ; -------------------------------------------------------------------------- |
| 240 | ; Align the next instruction on {2,4,8,16,..}-byte boundary. |
| 241 | ; ".balign n,,m" in GNU as |
| 242 | ; |
| 243 | %define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) |
| 244 | %define FILLB(b,n) (($$-(b)) & ((n)-1)) |
| 245 | |
| 246 | %imacro alignx 1-2.nolist 0xFFFF |
| 247 | %%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ |
| 248 | db 0x90 ; nop |
| 249 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ |
| 250 | db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] |
| 251 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ |
| 252 | db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] |
| 253 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ |
| 254 | db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] |
| 255 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ |
| 256 | db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] |
| 257 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ |
| 258 | db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] |
| 259 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ |
| 260 | db 0x8B,0xED ; mov ebp,ebp |
| 261 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ |
| 262 | db 0x90 ; nop |
| 263 | %endmacro |
| 264 | |
| 265 | ; Align the next data on {2,4,8,16,..}-byte boundary. |
| 266 | ; |
| 267 | %imacro alignz 1.nolist |
| 268 | align %1, db 0 ; filling zeros |
| 269 | %endmacro |
| 270 | |
Pierre Ossman | 2ae181c | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 271 | |
| 272 | ; -------------------------------------------------------------------------- |
| 273 | ; Defines picked up from the C headers |
| 274 | ; |
| 275 | %include "simd/jsimdcfg.inc" |
| 276 | |
MIYASAKA Masaru | a2e6a9d | 2006-02-04 00:00:00 +0000 | [diff] [blame] | 277 | ; -------------------------------------------------------------------------- |