Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #include <linux/module.h> |
| 2 | #include <linux/types.h> |
| 3 | #include <linux/kernel.h> |
| 4 | #include <linux/sched.h> |
| 5 | |
| 6 | #include <asm/uaccess.h> |
| 7 | |
| 8 | #include "sfp-util.h" |
| 9 | #include <math-emu/soft-fp.h> |
| 10 | #include <math-emu/single.h> |
| 11 | #include <math-emu/double.h> |
| 12 | |
| 13 | #define OPC_PAL 0x00 |
| 14 | #define OPC_INTA 0x10 |
| 15 | #define OPC_INTL 0x11 |
| 16 | #define OPC_INTS 0x12 |
| 17 | #define OPC_INTM 0x13 |
| 18 | #define OPC_FLTC 0x14 |
| 19 | #define OPC_FLTV 0x15 |
| 20 | #define OPC_FLTI 0x16 |
| 21 | #define OPC_FLTL 0x17 |
| 22 | #define OPC_MISC 0x18 |
| 23 | #define OPC_JSR 0x1a |
| 24 | |
| 25 | #define FOP_SRC_S 0 |
| 26 | #define FOP_SRC_T 2 |
| 27 | #define FOP_SRC_Q 3 |
| 28 | |
| 29 | #define FOP_FNC_ADDx 0 |
| 30 | #define FOP_FNC_CVTQL 0 |
| 31 | #define FOP_FNC_SUBx 1 |
| 32 | #define FOP_FNC_MULx 2 |
| 33 | #define FOP_FNC_DIVx 3 |
| 34 | #define FOP_FNC_CMPxUN 4 |
| 35 | #define FOP_FNC_CMPxEQ 5 |
| 36 | #define FOP_FNC_CMPxLT 6 |
| 37 | #define FOP_FNC_CMPxLE 7 |
| 38 | #define FOP_FNC_SQRTx 11 |
| 39 | #define FOP_FNC_CVTxS 12 |
| 40 | #define FOP_FNC_CVTxT 14 |
| 41 | #define FOP_FNC_CVTxQ 15 |
| 42 | |
| 43 | #define MISC_TRAPB 0x0000 |
| 44 | #define MISC_EXCB 0x0400 |
| 45 | |
| 46 | extern unsigned long alpha_read_fp_reg (unsigned long reg); |
| 47 | extern void alpha_write_fp_reg (unsigned long reg, unsigned long val); |
| 48 | extern unsigned long alpha_read_fp_reg_s (unsigned long reg); |
| 49 | extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val); |
| 50 | |
| 51 | |
| 52 | #ifdef MODULE |
| 53 | |
| 54 | MODULE_DESCRIPTION("FP Software completion module"); |
| 55 | |
| 56 | extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long); |
| 57 | extern long (*alpha_fp_emul) (unsigned long pc); |
| 58 | |
| 59 | static long (*save_emul_imprecise)(struct pt_regs *, unsigned long); |
| 60 | static long (*save_emul) (unsigned long pc); |
| 61 | |
| 62 | long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long); |
| 63 | long do_alpha_fp_emul(unsigned long); |
| 64 | |
| 65 | int init_module(void) |
| 66 | { |
| 67 | save_emul_imprecise = alpha_fp_emul_imprecise; |
| 68 | save_emul = alpha_fp_emul; |
| 69 | alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise; |
| 70 | alpha_fp_emul = do_alpha_fp_emul; |
| 71 | return 0; |
| 72 | } |
| 73 | |
| 74 | void cleanup_module(void) |
| 75 | { |
| 76 | alpha_fp_emul_imprecise = save_emul_imprecise; |
| 77 | alpha_fp_emul = save_emul; |
| 78 | } |
| 79 | |
| 80 | #undef alpha_fp_emul_imprecise |
| 81 | #define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise |
| 82 | #undef alpha_fp_emul |
| 83 | #define alpha_fp_emul do_alpha_fp_emul |
| 84 | |
| 85 | #endif /* MODULE */ |
| 86 | |
| 87 | |
| 88 | /* |
| 89 | * Emulate the floating point instruction at address PC. Returns -1 if the |
| 90 | * instruction to be emulated is illegal (such as with the opDEC trap), else |
| 91 | * the SI_CODE for a SIGFPE signal, else 0 if everything's ok. |
| 92 | * |
| 93 | * Notice that the kernel does not and cannot use FP regs. This is good |
| 94 | * because it means that instead of saving/restoring all fp regs, we simply |
| 95 | * stick the result of the operation into the appropriate register. |
| 96 | */ |
| 97 | long |
| 98 | alpha_fp_emul (unsigned long pc) |
| 99 | { |
| 100 | FP_DECL_EX; |
| 101 | FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); |
| 102 | FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); |
| 103 | |
| 104 | unsigned long fa, fb, fc, func, mode, src; |
| 105 | unsigned long res, va, vb, vc, swcr, fpcr; |
| 106 | __u32 insn; |
| 107 | long si_code; |
| 108 | |
| 109 | get_user(insn, (__u32 __user *)pc); |
| 110 | fc = (insn >> 0) & 0x1f; /* destination register */ |
| 111 | fb = (insn >> 16) & 0x1f; |
| 112 | fa = (insn >> 21) & 0x1f; |
| 113 | func = (insn >> 5) & 0xf; |
| 114 | src = (insn >> 9) & 0x3; |
| 115 | mode = (insn >> 11) & 0x3; |
| 116 | |
| 117 | fpcr = rdfpcr(); |
| 118 | swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr); |
| 119 | |
| 120 | if (mode == 3) { |
| 121 | /* Dynamic -- get rounding mode from fpcr. */ |
| 122 | mode = (fpcr >> FPCR_DYN_SHIFT) & 3; |
| 123 | } |
| 124 | |
| 125 | switch (src) { |
| 126 | case FOP_SRC_S: |
| 127 | va = alpha_read_fp_reg_s(fa); |
| 128 | vb = alpha_read_fp_reg_s(fb); |
| 129 | |
| 130 | FP_UNPACK_SP(SA, &va); |
| 131 | FP_UNPACK_SP(SB, &vb); |
| 132 | |
| 133 | switch (func) { |
| 134 | case FOP_FNC_SUBx: |
| 135 | FP_SUB_S(SR, SA, SB); |
| 136 | goto pack_s; |
| 137 | |
| 138 | case FOP_FNC_ADDx: |
| 139 | FP_ADD_S(SR, SA, SB); |
| 140 | goto pack_s; |
| 141 | |
| 142 | case FOP_FNC_MULx: |
| 143 | FP_MUL_S(SR, SA, SB); |
| 144 | goto pack_s; |
| 145 | |
| 146 | case FOP_FNC_DIVx: |
| 147 | FP_DIV_S(SR, SA, SB); |
| 148 | goto pack_s; |
| 149 | |
| 150 | case FOP_FNC_SQRTx: |
| 151 | FP_SQRT_S(SR, SB); |
| 152 | goto pack_s; |
| 153 | } |
| 154 | goto bad_insn; |
| 155 | |
| 156 | case FOP_SRC_T: |
| 157 | va = alpha_read_fp_reg(fa); |
| 158 | vb = alpha_read_fp_reg(fb); |
| 159 | |
| 160 | if ((func & ~3) == FOP_FNC_CMPxUN) { |
| 161 | FP_UNPACK_RAW_DP(DA, &va); |
| 162 | FP_UNPACK_RAW_DP(DB, &vb); |
| 163 | if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) { |
| 164 | FP_SET_EXCEPTION(FP_EX_DENORM); |
| 165 | if (FP_DENORM_ZERO) |
| 166 | _FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1); |
| 167 | } |
| 168 | if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) { |
| 169 | FP_SET_EXCEPTION(FP_EX_DENORM); |
| 170 | if (FP_DENORM_ZERO) |
| 171 | _FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1); |
| 172 | } |
| 173 | FP_CMP_D(res, DA, DB, 3); |
| 174 | vc = 0x4000000000000000UL; |
| 175 | /* CMPTEQ, CMPTUN don't trap on QNaN, |
| 176 | while CMPTLT and CMPTLE do */ |
| 177 | if (res == 3 |
| 178 | && ((func & 3) >= 2 |
| 179 | || FP_ISSIGNAN_D(DA) |
| 180 | || FP_ISSIGNAN_D(DB))) { |
| 181 | FP_SET_EXCEPTION(FP_EX_INVALID); |
| 182 | } |
| 183 | switch (func) { |
| 184 | case FOP_FNC_CMPxUN: if (res != 3) vc = 0; break; |
| 185 | case FOP_FNC_CMPxEQ: if (res) vc = 0; break; |
| 186 | case FOP_FNC_CMPxLT: if (res != -1) vc = 0; break; |
| 187 | case FOP_FNC_CMPxLE: if ((long)res > 0) vc = 0; break; |
| 188 | } |
| 189 | goto done_d; |
| 190 | } |
| 191 | |
| 192 | FP_UNPACK_DP(DA, &va); |
| 193 | FP_UNPACK_DP(DB, &vb); |
| 194 | |
| 195 | switch (func) { |
| 196 | case FOP_FNC_SUBx: |
| 197 | FP_SUB_D(DR, DA, DB); |
| 198 | goto pack_d; |
| 199 | |
| 200 | case FOP_FNC_ADDx: |
| 201 | FP_ADD_D(DR, DA, DB); |
| 202 | goto pack_d; |
| 203 | |
| 204 | case FOP_FNC_MULx: |
| 205 | FP_MUL_D(DR, DA, DB); |
| 206 | goto pack_d; |
| 207 | |
| 208 | case FOP_FNC_DIVx: |
| 209 | FP_DIV_D(DR, DA, DB); |
| 210 | goto pack_d; |
| 211 | |
| 212 | case FOP_FNC_SQRTx: |
| 213 | FP_SQRT_D(DR, DB); |
| 214 | goto pack_d; |
| 215 | |
| 216 | case FOP_FNC_CVTxS: |
| 217 | /* It is irritating that DEC encoded CVTST with |
| 218 | SRC == T_floating. It is also interesting that |
| 219 | the bit used to tell the two apart is /U... */ |
| 220 | if (insn & 0x2000) { |
| 221 | FP_CONV(S,D,1,1,SR,DB); |
| 222 | goto pack_s; |
| 223 | } else { |
| 224 | vb = alpha_read_fp_reg_s(fb); |
| 225 | FP_UNPACK_SP(SB, &vb); |
| 226 | DR_c = DB_c; |
| 227 | DR_s = DB_s; |
Ivan Kokshaysky | 6b2d2ce | 2008-01-17 15:21:13 -0800 | [diff] [blame] | 228 | DR_e = DB_e + (1024 - 128); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 229 | DR_f = SB_f << (52 - 23); |
| 230 | goto pack_d; |
| 231 | } |
| 232 | |
| 233 | case FOP_FNC_CVTxQ: |
| 234 | if (DB_c == FP_CLS_NAN |
| 235 | && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) { |
| 236 | /* AAHB Table B-2 says QNaN should not trigger INV */ |
| 237 | vc = 0; |
| 238 | } else |
| 239 | FP_TO_INT_ROUND_D(vc, DB, 64, 2); |
| 240 | goto done_d; |
| 241 | } |
| 242 | goto bad_insn; |
| 243 | |
| 244 | case FOP_SRC_Q: |
| 245 | vb = alpha_read_fp_reg(fb); |
| 246 | |
| 247 | switch (func) { |
| 248 | case FOP_FNC_CVTQL: |
| 249 | /* Notice: We can get here only due to an integer |
| 250 | overflow. Such overflows are reported as invalid |
| 251 | ops. We return the result the hw would have |
| 252 | computed. */ |
| 253 | vc = ((vb & 0xc0000000) << 32 | /* sign and msb */ |
| 254 | (vb & 0x3fffffff) << 29); /* rest of the int */ |
| 255 | FP_SET_EXCEPTION (FP_EX_INVALID); |
| 256 | goto done_d; |
| 257 | |
| 258 | case FOP_FNC_CVTxS: |
| 259 | FP_FROM_INT_S(SR, ((long)vb), 64, long); |
| 260 | goto pack_s; |
| 261 | |
| 262 | case FOP_FNC_CVTxT: |
| 263 | FP_FROM_INT_D(DR, ((long)vb), 64, long); |
| 264 | goto pack_d; |
| 265 | } |
| 266 | goto bad_insn; |
| 267 | } |
| 268 | goto bad_insn; |
| 269 | |
| 270 | pack_s: |
| 271 | FP_PACK_SP(&vc, SR); |
| 272 | if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) |
| 273 | vc = 0; |
| 274 | alpha_write_fp_reg_s(fc, vc); |
| 275 | goto done; |
| 276 | |
| 277 | pack_d: |
| 278 | FP_PACK_DP(&vc, DR); |
| 279 | if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ)) |
| 280 | vc = 0; |
| 281 | done_d: |
| 282 | alpha_write_fp_reg(fc, vc); |
| 283 | goto done; |
| 284 | |
| 285 | /* |
| 286 | * Take the appropriate action for each possible |
| 287 | * floating-point result: |
| 288 | * |
| 289 | * - Set the appropriate bits in the FPCR |
| 290 | * - If the specified exception is enabled in the FPCR, |
| 291 | * return. The caller (entArith) will dispatch |
| 292 | * the appropriate signal to the translated program. |
| 293 | * |
| 294 | * In addition, properly track the exception state in software |
| 295 | * as described in the Alpha Architecture Handbook section 4.7.7.3. |
| 296 | */ |
| 297 | done: |
| 298 | if (_fex) { |
| 299 | /* Record exceptions in software control word. */ |
| 300 | swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT); |
| 301 | current_thread_info()->ieee_state |
| 302 | |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT); |
| 303 | |
| 304 | /* Update hardware control register. */ |
| 305 | fpcr &= (~FPCR_MASK | FPCR_DYN_MASK); |
| 306 | fpcr |= ieee_swcr_to_fpcr(swcr); |
| 307 | wrfpcr(fpcr); |
| 308 | |
| 309 | /* Do we generate a signal? */ |
| 310 | _fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK; |
| 311 | si_code = 0; |
| 312 | if (_fex) { |
| 313 | if (_fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND; |
| 314 | if (_fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES; |
| 315 | if (_fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND; |
| 316 | if (_fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF; |
| 317 | if (_fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV; |
| 318 | if (_fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV; |
| 319 | } |
| 320 | |
| 321 | return si_code; |
| 322 | } |
| 323 | |
| 324 | /* We used to write the destination register here, but DEC FORTRAN |
| 325 | requires that the result *always* be written... so we do the write |
| 326 | immediately after the operations above. */ |
| 327 | |
| 328 | return 0; |
| 329 | |
| 330 | bad_insn: |
| 331 | printk(KERN_ERR "alpha_fp_emul: Invalid FP insn %#x at %#lx\n", |
| 332 | insn, pc); |
| 333 | return -1; |
| 334 | } |
| 335 | |
| 336 | long |
| 337 | alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) |
| 338 | { |
| 339 | unsigned long trigger_pc = regs->pc - 4; |
| 340 | unsigned long insn, opcode, rc, si_code = 0; |
| 341 | |
| 342 | /* |
| 343 | * Turn off the bits corresponding to registers that are the |
| 344 | * target of instructions that set bits in the exception |
| 345 | * summary register. We have some slack doing this because a |
| 346 | * register that is the target of a trapping instruction can |
| 347 | * be written at most once in the trap shadow. |
| 348 | * |
| 349 | * Branches, jumps, TRAPBs, EXCBs and calls to PALcode all |
| 350 | * bound the trap shadow, so we need not look any further than |
| 351 | * up to the first occurrence of such an instruction. |
| 352 | */ |
| 353 | while (write_mask) { |
| 354 | get_user(insn, (__u32 __user *)(trigger_pc)); |
| 355 | opcode = insn >> 26; |
| 356 | rc = insn & 0x1f; |
| 357 | |
| 358 | switch (opcode) { |
| 359 | case OPC_PAL: |
| 360 | case OPC_JSR: |
| 361 | case 0x30 ... 0x3f: /* branches */ |
| 362 | goto egress; |
| 363 | |
| 364 | case OPC_MISC: |
| 365 | switch (insn & 0xffff) { |
| 366 | case MISC_TRAPB: |
| 367 | case MISC_EXCB: |
| 368 | goto egress; |
| 369 | |
| 370 | default: |
| 371 | break; |
| 372 | } |
| 373 | break; |
| 374 | |
| 375 | case OPC_INTA: |
| 376 | case OPC_INTL: |
| 377 | case OPC_INTS: |
| 378 | case OPC_INTM: |
| 379 | write_mask &= ~(1UL << rc); |
| 380 | break; |
| 381 | |
| 382 | case OPC_FLTC: |
| 383 | case OPC_FLTV: |
| 384 | case OPC_FLTI: |
| 385 | case OPC_FLTL: |
| 386 | write_mask &= ~(1UL << (rc + 32)); |
| 387 | break; |
| 388 | } |
| 389 | if (!write_mask) { |
| 390 | /* Re-execute insns in the trap-shadow. */ |
| 391 | regs->pc = trigger_pc + 4; |
| 392 | si_code = alpha_fp_emul(trigger_pc); |
| 393 | goto egress; |
| 394 | } |
| 395 | trigger_pc -= 4; |
| 396 | } |
| 397 | |
| 398 | egress: |
| 399 | return si_code; |
| 400 | } |