Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* Machine-dependent software floating-point definitions. PPC version. |
| 2 | Copyright (C) 1997 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Library General Public License as |
| 7 | published by the Free Software Foundation; either version 2 of the |
| 8 | License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Library General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Library General Public |
| 16 | License along with the GNU C Library; see the file COPYING.LIB. If |
| 17 | not, write to the Free Software Foundation, Inc., |
| 18 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 19 | |
| 20 | Actually, this is a PPC (32bit) version, written based on the |
| 21 | i386, sparc, and sparc64 versions, by me, |
| 22 | Peter Maydell (pmaydell@chiark.greenend.org.uk). |
| 23 | Comments are by and large also mine, although they may be inaccurate. |
| 24 | |
| 25 | In picking out asm fragments I've gone with the lowest common |
| 26 | denominator, which also happens to be the hardware I have :-> |
| 27 | That is, a SPARC without hardware multiply and divide. |
| 28 | */ |
| 29 | |
| 30 | /* basic word size definitions */ |
| 31 | #define _FP_W_TYPE_SIZE 32 |
| 32 | #define _FP_W_TYPE unsigned long |
| 33 | #define _FP_WS_TYPE signed long |
| 34 | #define _FP_I_TYPE long |
| 35 | |
| 36 | #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) |
| 37 | #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) |
| 38 | #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) |
| 39 | |
| 40 | /* You can optionally code some things like addition in asm. For |
| 41 | * example, i386 defines __FP_FRAC_ADD_2 as asm. If you don't |
| 42 | * then you get a fragment of C code [if you change an #ifdef 0 |
| 43 | * in op-2.h] or a call to add_ssaaaa (see below). |
| 44 | * Good places to look for asm fragments to use are gcc and glibc. |
| 45 | * gcc's longlong.h is useful. |
| 46 | */ |
| 47 | |
| 48 | /* We need to know how to multiply and divide. If the host word size |
| 49 | * is >= 2*fracbits you can use FP_MUL_MEAT_n_imm(t,R,X,Y) which |
| 50 | * codes the multiply with whatever gcc does to 'a * b'. |
| 51 | * _FP_MUL_MEAT_n_wide(t,R,X,Y,f) is used when you have an asm |
| 52 | * function that can multiply two 1W values and get a 2W result. |
| 53 | * Otherwise you're stuck with _FP_MUL_MEAT_n_hard(t,R,X,Y) which |
| 54 | * does bitshifting to avoid overflow. |
| 55 | * For division there is FP_DIV_MEAT_n_imm(t,R,X,Y,f) for word size |
| 56 | * >= 2*fracbits, where f is either _FP_DIV_HELP_imm or |
| 57 | * _FP_DIV_HELP_ldiv (see op-1.h). |
| 58 | * _FP_DIV_MEAT_udiv() is if you have asm to do 2W/1W => (1W, 1W). |
| 59 | * [GCC and glibc have longlong.h which has the asm macro udiv_qrnnd |
| 60 | * to do this.] |
| 61 | * In general, 'n' is the number of words required to hold the type, |
| 62 | * and 't' is either S, D or Q for single/double/quad. |
| 63 | * -- PMM |
| 64 | */ |
| 65 | /* Example: SPARC64: |
| 66 | * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_imm(S,R,X,Y) |
| 67 | * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm) |
| 68 | * #define _FP_MUL_MEAT_Q(R,X,Y) _FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm) |
| 69 | * |
| 70 | * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) |
| 71 | * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv(D,R,X,Y) |
| 72 | * #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv_64(Q,R,X,Y) |
| 73 | * |
| 74 | * Example: i386: |
| 75 | * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,_i386_mul_32_64) |
| 76 | * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,_i386_mul_32_64) |
| 77 | * |
| 78 | * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y,_i386_div_64_32) |
| 79 | * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv_64(D,R,X,Y) |
| 80 | */ |
| 81 | |
| 82 | #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,umul_ppmm) |
| 83 | #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,umul_ppmm) |
| 84 | |
| 85 | #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y) |
| 86 | #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv_64(D,R,X,Y) |
| 87 | |
| 88 | /* These macros define what NaN looks like. They're supposed to expand to |
| 89 | * a comma-separated set of 32bit unsigned ints that encode NaN. |
| 90 | */ |
| 91 | #define _FP_NANFRAC_S _FP_QNANBIT_S |
| 92 | #define _FP_NANFRAC_D _FP_QNANBIT_D, 0 |
| 93 | #define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 |
| 94 | |
| 95 | #define _FP_KEEPNANFRACP 1 |
| 96 | |
| 97 | /* This macro appears to be called when both X and Y are NaNs, and |
| 98 | * has to choose one and copy it to R. i386 goes for the larger of the |
| 99 | * two, sparc64 just picks Y. I don't understand this at all so I'll |
| 100 | * go with sparc64 because it's shorter :-> -- PMM |
| 101 | */ |
| 102 | #define _FP_CHOOSENAN(fs, wc, R, X, Y) \ |
| 103 | do { \ |
| 104 | R##_s = Y##_s; \ |
| 105 | _FP_FRAC_COPY_##wc(R,Y); \ |
| 106 | R##_c = FP_CLS_NAN; \ |
| 107 | } while (0) |
| 108 | |
| 109 | |
| 110 | extern void fp_unpack_d(long *, unsigned long *, unsigned long *, |
| 111 | long *, long *, void *); |
| 112 | extern int fp_pack_d(void *, long, unsigned long, unsigned long, long, long); |
| 113 | extern int fp_pack_ds(void *, long, unsigned long, unsigned long, long, long); |
| 114 | |
| 115 | #define __FP_UNPACK_RAW_1(fs, X, val) \ |
| 116 | do { \ |
| 117 | union _FP_UNION_##fs *_flo = \ |
| 118 | (union _FP_UNION_##fs *)val; \ |
| 119 | \ |
| 120 | X##_f = _flo->bits.frac; \ |
| 121 | X##_e = _flo->bits.exp; \ |
| 122 | X##_s = _flo->bits.sign; \ |
| 123 | } while (0) |
| 124 | |
| 125 | #define __FP_UNPACK_RAW_2(fs, X, val) \ |
| 126 | do { \ |
| 127 | union _FP_UNION_##fs *_flo = \ |
| 128 | (union _FP_UNION_##fs *)val; \ |
| 129 | \ |
| 130 | X##_f0 = _flo->bits.frac0; \ |
| 131 | X##_f1 = _flo->bits.frac1; \ |
| 132 | X##_e = _flo->bits.exp; \ |
| 133 | X##_s = _flo->bits.sign; \ |
| 134 | } while (0) |
| 135 | |
| 136 | #define __FP_UNPACK_S(X,val) \ |
| 137 | do { \ |
| 138 | __FP_UNPACK_RAW_1(S,X,val); \ |
| 139 | _FP_UNPACK_CANONICAL(S,1,X); \ |
| 140 | } while (0) |
| 141 | |
| 142 | #define __FP_UNPACK_D(X,val) \ |
| 143 | fp_unpack_d(&X##_s, &X##_f1, &X##_f0, &X##_e, &X##_c, val) |
| 144 | |
| 145 | #define __FP_PACK_RAW_1(fs, val, X) \ |
| 146 | do { \ |
| 147 | union _FP_UNION_##fs *_flo = \ |
| 148 | (union _FP_UNION_##fs *)val; \ |
| 149 | \ |
| 150 | _flo->bits.frac = X##_f; \ |
| 151 | _flo->bits.exp = X##_e; \ |
| 152 | _flo->bits.sign = X##_s; \ |
| 153 | } while (0) |
| 154 | |
| 155 | #define __FP_PACK_RAW_2(fs, val, X) \ |
| 156 | do { \ |
| 157 | union _FP_UNION_##fs *_flo = \ |
| 158 | (union _FP_UNION_##fs *)val; \ |
| 159 | \ |
| 160 | _flo->bits.frac0 = X##_f0; \ |
| 161 | _flo->bits.frac1 = X##_f1; \ |
| 162 | _flo->bits.exp = X##_e; \ |
| 163 | _flo->bits.sign = X##_s; \ |
| 164 | } while (0) |
| 165 | |
| 166 | #include <linux/kernel.h> |
| 167 | #include <linux/sched.h> |
| 168 | |
David Gibson | 25c8a78 | 2005-10-27 16:27:25 +1000 | [diff] [blame] | 169 | #define __FPU_FPSCR (current->thread.fpscr.val) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 170 | |
| 171 | /* We only actually write to the destination register |
| 172 | * if exceptions signalled (if any) will not trap. |
| 173 | */ |
| 174 | #define __FPU_ENABLED_EXC \ |
| 175 | ({ \ |
| 176 | (__FPU_FPSCR >> 3) & 0x1f; \ |
| 177 | }) |
| 178 | |
| 179 | #define __FPU_TRAP_P(bits) \ |
| 180 | ((__FPU_ENABLED_EXC & (bits)) != 0) |
| 181 | |
| 182 | #define __FP_PACK_S(val,X) \ |
| 183 | ({ int __exc = _FP_PACK_CANONICAL(S,1,X); \ |
| 184 | if(!__exc || !__FPU_TRAP_P(__exc)) \ |
| 185 | __FP_PACK_RAW_1(S,val,X); \ |
| 186 | __exc; \ |
| 187 | }) |
| 188 | |
| 189 | #define __FP_PACK_D(val,X) \ |
| 190 | fp_pack_d(val, X##_s, X##_f1, X##_f0, X##_e, X##_c) |
| 191 | |
| 192 | #define __FP_PACK_DS(val,X) \ |
| 193 | fp_pack_ds(val, X##_s, X##_f1, X##_f0, X##_e, X##_c) |
| 194 | |
| 195 | /* Obtain the current rounding mode. */ |
| 196 | #define FP_ROUNDMODE \ |
| 197 | ({ \ |
| 198 | __FPU_FPSCR & 0x3; \ |
| 199 | }) |
| 200 | |
| 201 | /* the asm fragments go here: all these are taken from glibc-2.0.5's |
| 202 | * stdlib/longlong.h |
| 203 | */ |
| 204 | |
| 205 | #include <linux/types.h> |
| 206 | #include <asm/byteorder.h> |
| 207 | |
| 208 | /* add_ssaaaa is used in op-2.h and should be equivalent to |
| 209 | * #define add_ssaaaa(sh,sl,ah,al,bh,bl) (sh = ah+bh+ (( sl = al+bl) < al)) |
| 210 | * add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, |
| 211 | * high_addend_2, low_addend_2) adds two UWtype integers, composed by |
| 212 | * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 |
| 213 | * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow |
| 214 | * (i.e. carry out) is not stored anywhere, and is lost. |
| 215 | */ |
| 216 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
| 217 | do { \ |
| 218 | if (__builtin_constant_p (bh) && (bh) == 0) \ |
| 219 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ |
| 220 | : "=r" ((USItype)(sh)), \ |
| 221 | "=&r" ((USItype)(sl)) \ |
| 222 | : "%r" ((USItype)(ah)), \ |
| 223 | "%r" ((USItype)(al)), \ |
| 224 | "rI" ((USItype)(bl))); \ |
| 225 | else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ |
| 226 | __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ |
| 227 | : "=r" ((USItype)(sh)), \ |
| 228 | "=&r" ((USItype)(sl)) \ |
| 229 | : "%r" ((USItype)(ah)), \ |
| 230 | "%r" ((USItype)(al)), \ |
| 231 | "rI" ((USItype)(bl))); \ |
| 232 | else \ |
| 233 | __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ |
| 234 | : "=r" ((USItype)(sh)), \ |
| 235 | "=&r" ((USItype)(sl)) \ |
| 236 | : "%r" ((USItype)(ah)), \ |
| 237 | "r" ((USItype)(bh)), \ |
| 238 | "%r" ((USItype)(al)), \ |
| 239 | "rI" ((USItype)(bl))); \ |
| 240 | } while (0) |
| 241 | |
| 242 | /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to |
| 243 | * #define sub_ddmmss(sh, sl, ah, al, bh, bl) (sh = ah-bh - ((sl = al-bl) > al)) |
| 244 | * sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, |
| 245 | * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, |
| 246 | * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and |
| 247 | * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE |
| 248 | * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, |
| 249 | * and is lost. |
| 250 | */ |
| 251 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
| 252 | do { \ |
| 253 | if (__builtin_constant_p (ah) && (ah) == 0) \ |
| 254 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ |
| 255 | : "=r" ((USItype)(sh)), \ |
| 256 | "=&r" ((USItype)(sl)) \ |
| 257 | : "r" ((USItype)(bh)), \ |
| 258 | "rI" ((USItype)(al)), \ |
| 259 | "r" ((USItype)(bl))); \ |
| 260 | else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ |
| 261 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ |
| 262 | : "=r" ((USItype)(sh)), \ |
| 263 | "=&r" ((USItype)(sl)) \ |
| 264 | : "r" ((USItype)(bh)), \ |
| 265 | "rI" ((USItype)(al)), \ |
| 266 | "r" ((USItype)(bl))); \ |
| 267 | else if (__builtin_constant_p (bh) && (bh) == 0) \ |
| 268 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ |
| 269 | : "=r" ((USItype)(sh)), \ |
| 270 | "=&r" ((USItype)(sl)) \ |
| 271 | : "r" ((USItype)(ah)), \ |
| 272 | "rI" ((USItype)(al)), \ |
| 273 | "r" ((USItype)(bl))); \ |
| 274 | else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ |
| 275 | __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ |
| 276 | : "=r" ((USItype)(sh)), \ |
| 277 | "=&r" ((USItype)(sl)) \ |
| 278 | : "r" ((USItype)(ah)), \ |
| 279 | "rI" ((USItype)(al)), \ |
| 280 | "r" ((USItype)(bl))); \ |
| 281 | else \ |
| 282 | __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ |
| 283 | : "=r" ((USItype)(sh)), \ |
| 284 | "=&r" ((USItype)(sl)) \ |
| 285 | : "r" ((USItype)(ah)), \ |
| 286 | "r" ((USItype)(bh)), \ |
| 287 | "rI" ((USItype)(al)), \ |
| 288 | "r" ((USItype)(bl))); \ |
| 289 | } while (0) |
| 290 | |
| 291 | /* asm fragments for mul and div */ |
| 292 | |
| 293 | /* umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two |
| 294 | * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype |
| 295 | * word product in HIGH_PROD and LOW_PROD. |
| 296 | */ |
| 297 | #define umul_ppmm(ph, pl, m0, m1) \ |
| 298 | do { \ |
| 299 | USItype __m0 = (m0), __m1 = (m1); \ |
| 300 | __asm__ ("mulhwu %0,%1,%2" \ |
| 301 | : "=r" ((USItype)(ph)) \ |
| 302 | : "%r" (__m0), \ |
| 303 | "r" (__m1)); \ |
| 304 | (pl) = __m0 * __m1; \ |
| 305 | } while (0) |
| 306 | |
| 307 | /* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, |
| 308 | * denominator) divides a UDWtype, composed by the UWtype integers |
| 309 | * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient |
| 310 | * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less |
| 311 | * than DENOMINATOR for correct operation. If, in addition, the most |
| 312 | * significant bit of DENOMINATOR must be 1, then the pre-processor symbol |
| 313 | * UDIV_NEEDS_NORMALIZATION is defined to 1. |
| 314 | */ |
| 315 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
| 316 | do { \ |
| 317 | UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ |
| 318 | __d1 = __ll_highpart (d); \ |
| 319 | __d0 = __ll_lowpart (d); \ |
| 320 | \ |
| 321 | __r1 = (n1) % __d1; \ |
| 322 | __q1 = (n1) / __d1; \ |
| 323 | __m = (UWtype) __q1 * __d0; \ |
| 324 | __r1 = __r1 * __ll_B | __ll_highpart (n0); \ |
| 325 | if (__r1 < __m) \ |
| 326 | { \ |
| 327 | __q1--, __r1 += (d); \ |
| 328 | if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */ \ |
| 329 | if (__r1 < __m) \ |
| 330 | __q1--, __r1 += (d); \ |
| 331 | } \ |
| 332 | __r1 -= __m; \ |
| 333 | \ |
| 334 | __r0 = __r1 % __d1; \ |
| 335 | __q0 = __r1 / __d1; \ |
| 336 | __m = (UWtype) __q0 * __d0; \ |
| 337 | __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ |
| 338 | if (__r0 < __m) \ |
| 339 | { \ |
| 340 | __q0--, __r0 += (d); \ |
| 341 | if (__r0 >= (d)) \ |
| 342 | if (__r0 < __m) \ |
| 343 | __q0--, __r0 += (d); \ |
| 344 | } \ |
| 345 | __r0 -= __m; \ |
| 346 | \ |
| 347 | (q) = (UWtype) __q1 * __ll_B | __q0; \ |
| 348 | (r) = __r0; \ |
| 349 | } while (0) |
| 350 | |
| 351 | #define UDIV_NEEDS_NORMALIZATION 1 |
| 352 | |
| 353 | #define abort() \ |
| 354 | return 0 |
| 355 | |
| 356 | #ifdef __BIG_ENDIAN |
| 357 | #define __BYTE_ORDER __BIG_ENDIAN |
| 358 | #else |
| 359 | #define __BYTE_ORDER __LITTLE_ENDIAN |
| 360 | #endif |
| 361 | |
| 362 | /* Exception flags. */ |
| 363 | #define EFLAG_INVALID (1 << (31 - 2)) |
| 364 | #define EFLAG_OVERFLOW (1 << (31 - 3)) |
| 365 | #define EFLAG_UNDERFLOW (1 << (31 - 4)) |
| 366 | #define EFLAG_DIVZERO (1 << (31 - 5)) |
| 367 | #define EFLAG_INEXACT (1 << (31 - 6)) |
| 368 | |
| 369 | #define EFLAG_VXSNAN (1 << (31 - 7)) |
| 370 | #define EFLAG_VXISI (1 << (31 - 8)) |
| 371 | #define EFLAG_VXIDI (1 << (31 - 9)) |
| 372 | #define EFLAG_VXZDZ (1 << (31 - 10)) |
| 373 | #define EFLAG_VXIMZ (1 << (31 - 11)) |
| 374 | #define EFLAG_VXVC (1 << (31 - 12)) |
| 375 | #define EFLAG_VXSOFT (1 << (31 - 21)) |
| 376 | #define EFLAG_VXSQRT (1 << (31 - 22)) |
| 377 | #define EFLAG_VXCVI (1 << (31 - 23)) |