Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | | |
| 2 | | x_unfl.sa 3.4 7/1/91 |
| 3 | | |
| 4 | | fpsp_unfl --- FPSP handler for underflow exception |
| 5 | | |
| 6 | | Trap disabled results |
| 7 | | For 881/2 compatibility, sw must denormalize the intermediate |
| 8 | | result, then store the result. Denormalization is accomplished |
| 9 | | by taking the intermediate result (which is always normalized) and |
| 10 | | shifting the mantissa right while incrementing the exponent until |
| 11 | | it is equal to the denormalized exponent for the destination |
| 12 | | format. After denormalization, the result is rounded to the |
| 13 | | destination format. |
| 14 | | |
| 15 | | Trap enabled results |
| 16 | | All trap disabled code applies. In addition the exceptional |
| 17 | | operand needs to made available to the user with a bias of $6000 |
| 18 | | added to the exponent. |
| 19 | | |
| 20 | |
| 21 | | Copyright (C) Motorola, Inc. 1990 |
| 22 | | All Rights Reserved |
| 23 | | |
Matt Waddel | e00d82d | 2006-02-11 17:55:48 -0800 | [diff] [blame] | 24 | | For details on the license for this file, please see the |
| 25 | | file, README, in this same directory. |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 26 | |
| 27 | X_UNFL: |idnt 2,1 | Motorola 040 Floating Point Software Package |
| 28 | |
| 29 | |section 8 |
| 30 | |
| 31 | #include "fpsp.h" |
| 32 | |
| 33 | |xref denorm |
| 34 | |xref round |
| 35 | |xref store |
| 36 | |xref g_rndpr |
| 37 | |xref g_opcls |
| 38 | |xref g_dfmtou |
| 39 | |xref real_unfl |
| 40 | |xref real_inex |
| 41 | |xref fpsp_done |
| 42 | |xref b1238_fix |
| 43 | |
| 44 | .global fpsp_unfl |
| 45 | fpsp_unfl: |
| 46 | link %a6,#-LOCAL_SIZE |
| 47 | fsave -(%a7) |
| 48 | moveml %d0-%d1/%a0-%a1,USER_DA(%a6) |
| 49 | fmovemx %fp0-%fp3,USER_FP0(%a6) |
| 50 | fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6) |
| 51 | |
| 52 | | |
| 53 | bsrl unf_res |denormalize, round & store interm op |
| 54 | | |
| 55 | | If underflow exceptions are not enabled, check for inexact |
| 56 | | exception |
| 57 | | |
| 58 | btstb #unfl_bit,FPCR_ENABLE(%a6) |
| 59 | beqs ck_inex |
| 60 | |
| 61 | btstb #E3,E_BYTE(%a6) |
| 62 | beqs no_e3_1 |
| 63 | | |
| 64 | | Clear dirty bit on dest resister in the frame before branching |
| 65 | | to b1238_fix. |
| 66 | | |
| 67 | bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no |
| 68 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit |
| 69 | bsrl b1238_fix |test for bug1238 case |
| 70 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) |
| 71 | orl #sx_mask,E_BYTE(%a6) |
| 72 | no_e3_1: |
| 73 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |
| 74 | fmovemx USER_FP0(%a6),%fp0-%fp3 |
| 75 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar |
| 76 | frestore (%a7)+ |
| 77 | unlk %a6 |
| 78 | bral real_unfl |
| 79 | | |
| 80 | | It is possible to have either inex2 or inex1 exceptions with the |
| 81 | | unfl. If the inex enable bit is set in the FPCR, and either |
| 82 | | inex2 or inex1 occurred, we must clean up and branch to the |
| 83 | | real inex handler. |
| 84 | | |
| 85 | ck_inex: |
| 86 | moveb FPCR_ENABLE(%a6),%d0 |
| 87 | andb FPSR_EXCEPT(%a6),%d0 |
| 88 | andib #0x3,%d0 |
| 89 | beqs unfl_done |
| 90 | |
| 91 | | |
| 92 | | Inexact enabled and reported, and we must take an inexact exception |
| 93 | | |
| 94 | take_inex: |
| 95 | btstb #E3,E_BYTE(%a6) |
| 96 | beqs no_e3_2 |
| 97 | | |
| 98 | | Clear dirty bit on dest resister in the frame before branching |
| 99 | | to b1238_fix. |
| 100 | | |
| 101 | bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no |
| 102 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit |
| 103 | bsrl b1238_fix |test for bug1238 case |
| 104 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) |
| 105 | orl #sx_mask,E_BYTE(%a6) |
| 106 | no_e3_2: |
| 107 | moveb #INEX_VEC,EXC_VEC+1(%a6) |
| 108 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |
| 109 | fmovemx USER_FP0(%a6),%fp0-%fp3 |
| 110 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar |
| 111 | frestore (%a7)+ |
| 112 | unlk %a6 |
| 113 | bral real_inex |
| 114 | |
| 115 | unfl_done: |
| 116 | bclrb #E3,E_BYTE(%a6) |
| 117 | beqs e1_set |if set then branch |
| 118 | | |
| 119 | | Clear dirty bit on dest resister in the frame before branching |
| 120 | | to b1238_fix. |
| 121 | | |
| 122 | bfextu CMDREG3B(%a6){#6:#3},%d0 |get dest reg no |
| 123 | bclrb %d0,FPR_DIRTY_BITS(%a6) |clr dest dirty bit |
| 124 | bsrl b1238_fix |test for bug1238 case |
| 125 | movel USER_FPSR(%a6),FPSR_SHADOW(%a6) |
| 126 | orl #sx_mask,E_BYTE(%a6) |
| 127 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |
| 128 | fmovemx USER_FP0(%a6),%fp0-%fp3 |
| 129 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar |
| 130 | frestore (%a7)+ |
| 131 | unlk %a6 |
| 132 | bral fpsp_done |
| 133 | e1_set: |
| 134 | moveml USER_DA(%a6),%d0-%d1/%a0-%a1 |
| 135 | fmovemx USER_FP0(%a6),%fp0-%fp3 |
| 136 | fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar |
| 137 | unlk %a6 |
| 138 | bral fpsp_done |
| 139 | | |
| 140 | | unf_res --- underflow result calculation |
| 141 | | |
| 142 | unf_res: |
| 143 | bsrl g_rndpr |returns RND_PREC in d0 0=ext, |
| 144 | | ;1=sgl, 2=dbl |
| 145 | | ;we need the RND_PREC in the |
| 146 | | ;upper word for round |
| 147 | movew #0,-(%a7) |
| 148 | movew %d0,-(%a7) |copy RND_PREC to stack |
| 149 | | |
| 150 | | |
| 151 | | If the exception bit set is E3, the exceptional operand from the |
| 152 | | fpu is in WBTEMP; else it is in FPTEMP. |
| 153 | | |
| 154 | btstb #E3,E_BYTE(%a6) |
| 155 | beqs unf_E1 |
| 156 | unf_E3: |
| 157 | lea WBTEMP(%a6),%a0 |a0 now points to operand |
| 158 | | |
| 159 | | Test for fsgldiv and fsglmul. If the inst was one of these, then |
| 160 | | force the precision to extended for the denorm routine. Use |
| 161 | | the user's precision for the round routine. |
| 162 | | |
| 163 | movew CMDREG3B(%a6),%d1 |check for fsgldiv or fsglmul |
| 164 | andiw #0x7f,%d1 |
| 165 | cmpiw #0x30,%d1 |check for sgldiv |
| 166 | beqs unf_sgl |
| 167 | cmpiw #0x33,%d1 |check for sglmul |
| 168 | bnes unf_cont |if not, use fpcr prec in round |
| 169 | unf_sgl: |
| 170 | clrl %d0 |
| 171 | movew #0x1,(%a7) |override g_rndpr precision |
| 172 | | ;force single |
| 173 | bras unf_cont |
| 174 | unf_E1: |
| 175 | lea FPTEMP(%a6),%a0 |a0 now points to operand |
| 176 | unf_cont: |
| 177 | bclrb #sign_bit,LOCAL_EX(%a0) |clear sign bit |
| 178 | sne LOCAL_SGN(%a0) |store sign |
| 179 | |
| 180 | bsrl denorm |returns denorm, a0 points to it |
| 181 | | |
| 182 | | WARNING: |
| 183 | | ;d0 has guard,round sticky bit |
| 184 | | ;make sure that it is not corrupted |
| 185 | | ;before it reaches the round subroutine |
| 186 | | ;also ensure that a0 isn't corrupted |
| 187 | |
| 188 | | |
| 189 | | Set up d1 for round subroutine d1 contains the PREC/MODE |
| 190 | | information respectively on upper/lower register halves. |
| 191 | | |
| 192 | bfextu FPCR_MODE(%a6){#2:#2},%d1 |get mode from FPCR |
| 193 | | ;mode in lower d1 |
| 194 | addl (%a7)+,%d1 |merge PREC/MODE |
| 195 | | |
| 196 | | WARNING: a0 and d0 are assumed to be intact between the denorm and |
| 197 | | round subroutines. All code between these two subroutines |
| 198 | | must not corrupt a0 and d0. |
| 199 | | |
| 200 | | |
| 201 | | Perform Round |
| 202 | | Input: a0 points to input operand |
| 203 | | d0{31:29} has guard, round, sticky |
| 204 | | d1{01:00} has rounding mode |
| 205 | | d1{17:16} has rounding precision |
| 206 | | Output: a0 points to rounded operand |
| 207 | | |
| 208 | |
| 209 | bsrl round |returns rounded denorm at (a0) |
| 210 | | |
| 211 | | Differentiate between store to memory vs. store to register |
| 212 | | |
| 213 | unf_store: |
| 214 | bsrl g_opcls |returns opclass in d0{2:0} |
| 215 | cmpib #0x3,%d0 |
| 216 | bnes not_opc011 |
| 217 | | |
| 218 | | At this point, a store to memory is pending |
| 219 | | |
| 220 | opc011: |
| 221 | bsrl g_dfmtou |
| 222 | tstb %d0 |
| 223 | beqs ext_opc011 |If extended, do not subtract |
| 224 | | ;If destination format is sgl/dbl, |
| 225 | tstb LOCAL_HI(%a0) |If rounded result is normal,don't |
| 226 | | ;subtract |
| 227 | bmis ext_opc011 |
| 228 | subqw #1,LOCAL_EX(%a0) |account for denorm bias vs. |
| 229 | | ;normalized bias |
| 230 | | ; normalized denormalized |
| 231 | | ;single $7f $7e |
| 232 | | ;double $3ff $3fe |
| 233 | | |
| 234 | ext_opc011: |
| 235 | bsrl store |stores to memory |
| 236 | bras unf_done |finish up |
| 237 | |
| 238 | | |
| 239 | | At this point, a store to a float register is pending |
| 240 | | |
| 241 | not_opc011: |
| 242 | bsrl store |stores to float register |
| 243 | | ;a0 is not corrupted on a store to a |
| 244 | | ;float register. |
| 245 | | |
| 246 | | Set the condition codes according to result |
| 247 | | |
| 248 | tstl LOCAL_HI(%a0) |check upper mantissa |
| 249 | bnes ck_sgn |
| 250 | tstl LOCAL_LO(%a0) |check lower mantissa |
| 251 | bnes ck_sgn |
| 252 | bsetb #z_bit,FPSR_CC(%a6) |set condition codes if zero |
| 253 | ck_sgn: |
| 254 | btstb #sign_bit,LOCAL_EX(%a0) |check the sign bit |
| 255 | beqs unf_done |
| 256 | bsetb #neg_bit,FPSR_CC(%a6) |
| 257 | |
| 258 | | |
| 259 | | Finish. |
| 260 | | |
| 261 | unf_done: |
| 262 | btstb #inex2_bit,FPSR_EXCEPT(%a6) |
| 263 | beqs no_aunfl |
| 264 | bsetb #aunfl_bit,FPSR_AEXCEPT(%a6) |
| 265 | no_aunfl: |
| 266 | rts |
| 267 | |
| 268 | |end |