blob: d1d4e48b4f67d2cd7281cd6891395390e81356f7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001 .file "reg_round.S"
2/*---------------------------------------------------------------------------+
3 | reg_round.S |
4 | |
5 | Rounding/truncation/etc for FPU basic arithmetic functions. |
6 | |
7 | Copyright (C) 1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@suburbia.net |
10 | |
11 | This code has four possible entry points. |
12 | The following must be entered by a jmp instruction: |
13 | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. |
14 | |
15 | The FPU_round entry point is intended to be used by C code. |
16 | From C, call as: |
17 | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
18 | |
19 | Return value is the tag of the answer, or-ed with FPU_Exception if |
20 | one was raised, or -1 on internal error. |
21 | |
22 | For correct "up" and "down" rounding, the argument must have the correct |
23 | sign. |
24 | |
25 +---------------------------------------------------------------------------*/
26
27/*---------------------------------------------------------------------------+
28 | Four entry points. |
29 | |
30 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: |
31 | %eax:%ebx 64 bit significand |
32 | %edx 32 bit extension of the significand |
33 | %edi pointer to an FPU_REG for the result to be stored |
34 | stack calling function must have set up a C stack frame and |
35 | pushed %esi, %edi, and %ebx |
36 | |
37 | Needed just for the fpu_reg_round_sqrt entry point: |
38 | %cx A control word in the same format as the FPU control word. |
39 | Otherwise, PARAM4 must give such a value. |
40 | |
41 | |
42 | The significand and its extension are assumed to be exact in the |
43 | following sense: |
44 | If the significand by itself is the exact result then the significand |
45 | extension (%edx) must contain 0, otherwise the significand extension |
46 | must be non-zero. |
47 | If the significand extension is non-zero then the significand is |
48 | smaller than the magnitude of the correct exact result by an amount |
49 | greater than zero and less than one ls bit of the significand. |
50 | The significand extension is only required to have three possible |
51 | non-zero values: |
52 | less than 0x80000000 <=> the significand is less than 1/2 an ls |
53 | bit smaller than the magnitude of the |
54 | true exact result. |
55 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit |
56 | smaller than the magnitude of the true |
57 | exact result. |
58 | greater than 0x80000000 <=> the significand is more than 1/2 an ls |
59 | bit smaller than the magnitude of the |
60 | true exact result. |
61 | |
62 +---------------------------------------------------------------------------*/
63
64/*---------------------------------------------------------------------------+
65 | The code in this module has become quite complex, but it should handle |
66 | all of the FPU flags which are set at this stage of the basic arithmetic |
67 | computations. |
68 | There are a few rare cases where the results are not set identically to |
69 | a real FPU. These require a bit more thought because at this stage the |
70 | results of the code here appear to be more consistent... |
71 | This may be changed in a future version. |
72 +---------------------------------------------------------------------------*/
73
74
75#include "fpu_emu.h"
76#include "exception.h"
77#include "control_w.h"
78
79/* Flags for FPU_bits_lost */
80#define LOST_DOWN $1
81#define LOST_UP $2
82
83/* Flags for FPU_denormal */
84#define DENORMAL $1
85#define UNMASKED_UNDERFLOW $2
86
87
88#ifndef NON_REENTRANT_FPU
89/* Make the code re-entrant by putting
90 local storage on the stack: */
91#define FPU_bits_lost (%esp)
92#define FPU_denormal 1(%esp)
93
94#else
95/* Not re-entrant, so we can gain speed by putting
96 local storage in a static area: */
97.data
98 .align 4,0
99FPU_bits_lost:
100 .byte 0
101FPU_denormal:
102 .byte 0
103#endif /* NON_REENTRANT_FPU */
104
105
106.text
107.globl fpu_reg_round
108.globl fpu_Arith_exit
109
110/* Entry point when called from C */
111ENTRY(FPU_round)
112 pushl %ebp
113 movl %esp,%ebp
114 pushl %esi
115 pushl %edi
116 pushl %ebx
117
118 movl PARAM1,%edi
119 movl SIGH(%edi),%eax
120 movl SIGL(%edi),%ebx
121 movl PARAM2,%edx
122
123fpu_reg_round: /* Normal entry point */
124 movl PARAM4,%ecx
125
126#ifndef NON_REENTRANT_FPU
127 pushl %ebx /* adjust the stack pointer */
128#endif /* NON_REENTRANT_FPU */
129
130#ifdef PARANOID
131/* Cannot use this here yet */
132/* orl %eax,%eax */
133/* jns L_entry_bugged */
134#endif /* PARANOID */
135
136 cmpw EXP_UNDER,EXP(%edi)
137 jle L_Make_denorm /* The number is a de-normal */
138
139 movb $0,FPU_denormal /* 0 -> not a de-normal */
140
141Denorm_done:
142 movb $0,FPU_bits_lost /* No bits yet lost in rounding */
143
144 movl %ecx,%esi
145 andl CW_PC,%ecx
146 cmpl PR_64_BITS,%ecx
147 je LRound_To_64
148
149 cmpl PR_53_BITS,%ecx
150 je LRound_To_53
151
152 cmpl PR_24_BITS,%ecx
153 je LRound_To_24
154
155#ifdef PECULIAR_486
156/* With the precision control bits set to 01 "(reserved)", a real 80486
157 behaves as if the precision control bits were set to 11 "64 bits" */
158 cmpl PR_RESERVED_BITS,%ecx
159 je LRound_To_64
160#ifdef PARANOID
161 jmp L_bugged_denorm_486
162#endif /* PARANOID */
163#else
164#ifdef PARANOID
165 jmp L_bugged_denorm /* There is no bug, just a bad control word */
166#endif /* PARANOID */
167#endif /* PECULIAR_486 */
168
169
170/* Round etc to 24 bit precision */
171LRound_To_24:
172 movl %esi,%ecx
173 andl CW_RC,%ecx
174 cmpl RC_RND,%ecx
175 je LRound_nearest_24
176
177 cmpl RC_CHOP,%ecx
178 je LCheck_truncate_24
179
180 cmpl RC_UP,%ecx /* Towards +infinity */
181 je LUp_24
182
183 cmpl RC_DOWN,%ecx /* Towards -infinity */
184 je LDown_24
185
186#ifdef PARANOID
187 jmp L_bugged_round24
188#endif /* PARANOID */
189
190LUp_24:
191 cmpb SIGN_POS,PARAM5
192 jne LCheck_truncate_24 /* If negative then up==truncate */
193
194 jmp LCheck_24_round_up
195
196LDown_24:
197 cmpb SIGN_POS,PARAM5
198 je LCheck_truncate_24 /* If positive then down==truncate */
199
200LCheck_24_round_up:
201 movl %eax,%ecx
202 andl $0x000000ff,%ecx
203 orl %ebx,%ecx
204 orl %edx,%ecx
205 jnz LDo_24_round_up
206 jmp L_Re_normalise
207
208LRound_nearest_24:
209 /* Do rounding of the 24th bit if needed (nearest or even) */
210 movl %eax,%ecx
211 andl $0x000000ff,%ecx
212 cmpl $0x00000080,%ecx
213 jc LCheck_truncate_24 /* less than half, no increment needed */
214
215 jne LGreater_Half_24 /* greater than half, increment needed */
216
217 /* Possibly half, we need to check the ls bits */
218 orl %ebx,%ebx
219 jnz LGreater_Half_24 /* greater than half, increment needed */
220
221 orl %edx,%edx
222 jnz LGreater_Half_24 /* greater than half, increment needed */
223
224 /* Exactly half, increment only if 24th bit is 1 (round to even) */
225 testl $0x00000100,%eax
226 jz LDo_truncate_24
227
228LGreater_Half_24: /* Rounding: increment at the 24th bit */
229LDo_24_round_up:
230 andl $0xffffff00,%eax /* Truncate to 24 bits */
231 xorl %ebx,%ebx
232 movb LOST_UP,FPU_bits_lost
233 addl $0x00000100,%eax
234 jmp LCheck_Round_Overflow
235
236LCheck_truncate_24:
237 movl %eax,%ecx
238 andl $0x000000ff,%ecx
239 orl %ebx,%ecx
240 orl %edx,%ecx
241 jz L_Re_normalise /* No truncation needed */
242
243LDo_truncate_24:
244 andl $0xffffff00,%eax /* Truncate to 24 bits */
245 xorl %ebx,%ebx
246 movb LOST_DOWN,FPU_bits_lost
247 jmp L_Re_normalise
248
249
250/* Round etc to 53 bit precision */
251LRound_To_53:
252 movl %esi,%ecx
253 andl CW_RC,%ecx
254 cmpl RC_RND,%ecx
255 je LRound_nearest_53
256
257 cmpl RC_CHOP,%ecx
258 je LCheck_truncate_53
259
260 cmpl RC_UP,%ecx /* Towards +infinity */
261 je LUp_53
262
263 cmpl RC_DOWN,%ecx /* Towards -infinity */
264 je LDown_53
265
266#ifdef PARANOID
267 jmp L_bugged_round53
268#endif /* PARANOID */
269
270LUp_53:
271 cmpb SIGN_POS,PARAM5
272 jne LCheck_truncate_53 /* If negative then up==truncate */
273
274 jmp LCheck_53_round_up
275
276LDown_53:
277 cmpb SIGN_POS,PARAM5
278 je LCheck_truncate_53 /* If positive then down==truncate */
279
280LCheck_53_round_up:
281 movl %ebx,%ecx
282 andl $0x000007ff,%ecx
283 orl %edx,%ecx
284 jnz LDo_53_round_up
285 jmp L_Re_normalise
286
287LRound_nearest_53:
288 /* Do rounding of the 53rd bit if needed (nearest or even) */
289 movl %ebx,%ecx
290 andl $0x000007ff,%ecx
291 cmpl $0x00000400,%ecx
292 jc LCheck_truncate_53 /* less than half, no increment needed */
293
294 jnz LGreater_Half_53 /* greater than half, increment needed */
295
296 /* Possibly half, we need to check the ls bits */
297 orl %edx,%edx
298 jnz LGreater_Half_53 /* greater than half, increment needed */
299
300 /* Exactly half, increment only if 53rd bit is 1 (round to even) */
301 testl $0x00000800,%ebx
302 jz LTruncate_53
303
304LGreater_Half_53: /* Rounding: increment at the 53rd bit */
305LDo_53_round_up:
306 movb LOST_UP,FPU_bits_lost
307 andl $0xfffff800,%ebx /* Truncate to 53 bits */
308 addl $0x00000800,%ebx
309 adcl $0,%eax
310 jmp LCheck_Round_Overflow
311
312LCheck_truncate_53:
313 movl %ebx,%ecx
314 andl $0x000007ff,%ecx
315 orl %edx,%ecx
316 jz L_Re_normalise
317
318LTruncate_53:
319 movb LOST_DOWN,FPU_bits_lost
320 andl $0xfffff800,%ebx /* Truncate to 53 bits */
321 jmp L_Re_normalise
322
323
324/* Round etc to 64 bit precision */
325LRound_To_64:
326 movl %esi,%ecx
327 andl CW_RC,%ecx
328 cmpl RC_RND,%ecx
329 je LRound_nearest_64
330
331 cmpl RC_CHOP,%ecx
332 je LCheck_truncate_64
333
334 cmpl RC_UP,%ecx /* Towards +infinity */
335 je LUp_64
336
337 cmpl RC_DOWN,%ecx /* Towards -infinity */
338 je LDown_64
339
340#ifdef PARANOID
341 jmp L_bugged_round64
342#endif /* PARANOID */
343
344LUp_64:
345 cmpb SIGN_POS,PARAM5
346 jne LCheck_truncate_64 /* If negative then up==truncate */
347
348 orl %edx,%edx
349 jnz LDo_64_round_up
350 jmp L_Re_normalise
351
352LDown_64:
353 cmpb SIGN_POS,PARAM5
354 je LCheck_truncate_64 /* If positive then down==truncate */
355
356 orl %edx,%edx
357 jnz LDo_64_round_up
358 jmp L_Re_normalise
359
360LRound_nearest_64:
361 cmpl $0x80000000,%edx
362 jc LCheck_truncate_64
363
364 jne LDo_64_round_up
365
366 /* Now test for round-to-even */
367 testb $1,%bl
368 jz LCheck_truncate_64
369
370LDo_64_round_up:
371 movb LOST_UP,FPU_bits_lost
372 addl $1,%ebx
373 adcl $0,%eax
374
375LCheck_Round_Overflow:
376 jnc L_Re_normalise
377
378 /* Overflow, adjust the result (significand to 1.0) */
379 rcrl $1,%eax
380 rcrl $1,%ebx
381 incw EXP(%edi)
382 jmp L_Re_normalise
383
384LCheck_truncate_64:
385 orl %edx,%edx
386 jz L_Re_normalise
387
388LTruncate_64:
389 movb LOST_DOWN,FPU_bits_lost
390
391L_Re_normalise:
392 testb $0xff,FPU_denormal
393 jnz Normalise_result
394
395L_Normalised:
396 movl TAG_Valid,%edx
397
398L_deNormalised:
399 cmpb LOST_UP,FPU_bits_lost
400 je L_precision_lost_up
401
402 cmpb LOST_DOWN,FPU_bits_lost
403 je L_precision_lost_down
404
405L_no_precision_loss:
406 /* store the result */
407
408L_Store_significand:
409 movl %eax,SIGH(%edi)
410 movl %ebx,SIGL(%edi)
411
412 cmpw EXP_OVER,EXP(%edi)
413 jge L_overflow
414
415 movl %edx,%eax
416
417 /* Convert the exponent to 80x87 form. */
418 addw EXTENDED_Ebias,EXP(%edi)
419 andw $0x7fff,EXP(%edi)
420
421fpu_reg_round_signed_special_exit:
422
423 cmpb SIGN_POS,PARAM5
424 je fpu_reg_round_special_exit
425
426 orw $0x8000,EXP(%edi) /* Negative sign for the result. */
427
428fpu_reg_round_special_exit:
429
430#ifndef NON_REENTRANT_FPU
431 popl %ebx /* adjust the stack pointer */
432#endif /* NON_REENTRANT_FPU */
433
434fpu_Arith_exit:
435 popl %ebx
436 popl %edi
437 popl %esi
438 leave
439 ret
440
441
442/*
443 * Set the FPU status flags to represent precision loss due to
444 * round-up.
445 */
446L_precision_lost_up:
447 push %edx
448 push %eax
449 call set_precision_flag_up
450 popl %eax
451 popl %edx
452 jmp L_no_precision_loss
453
454/*
455 * Set the FPU status flags to represent precision loss due to
456 * truncation.
457 */
458L_precision_lost_down:
459 push %edx
460 push %eax
461 call set_precision_flag_down
462 popl %eax
463 popl %edx
464 jmp L_no_precision_loss
465
466
467/*
468 * The number is a denormal (which might get rounded up to a normal)
469 * Shift the number right the required number of bits, which will
470 * have to be undone later...
471 */
472L_Make_denorm:
473 /* The action to be taken depends upon whether the underflow
474 exception is masked */
475 testb CW_Underflow,%cl /* Underflow mask. */
476 jz Unmasked_underflow /* Do not make a denormal. */
477
478 movb DENORMAL,FPU_denormal
479
480 pushl %ecx /* Save */
481 movw EXP_UNDER+1,%cx
482 subw EXP(%edi),%cx
483
484 cmpw $64,%cx /* shrd only works for 0..31 bits */
485 jnc Denorm_shift_more_than_63
486
487 cmpw $32,%cx /* shrd only works for 0..31 bits */
488 jnc Denorm_shift_more_than_32
489
490/*
491 * We got here without jumps by assuming that the most common requirement
492 * is for a small de-normalising shift.
493 * Shift by [1..31] bits
494 */
495 addw %cx,EXP(%edi)
496 orl %edx,%edx /* extension */
497 setne %ch /* Save whether %edx is non-zero */
498 xorl %edx,%edx
499 shrd %cl,%ebx,%edx
500 shrd %cl,%eax,%ebx
501 shr %cl,%eax
502 orb %ch,%dl
503 popl %ecx
504 jmp Denorm_done
505
506/* Shift by [32..63] bits */
507Denorm_shift_more_than_32:
508 addw %cx,EXP(%edi)
509 subb $32,%cl
510 orl %edx,%edx
511 setne %ch
512 orb %ch,%bl
513 xorl %edx,%edx
514 shrd %cl,%ebx,%edx
515 shrd %cl,%eax,%ebx
516 shr %cl,%eax
517 orl %edx,%edx /* test these 32 bits */
518 setne %cl
519 orb %ch,%bl
520 orb %cl,%bl
521 movl %ebx,%edx
522 movl %eax,%ebx
523 xorl %eax,%eax
524 popl %ecx
525 jmp Denorm_done
526
527/* Shift by [64..) bits */
528Denorm_shift_more_than_63:
529 cmpw $64,%cx
530 jne Denorm_shift_more_than_64
531
532/* Exactly 64 bit shift */
533 addw %cx,EXP(%edi)
534 xorl %ecx,%ecx
535 orl %edx,%edx
536 setne %cl
537 orl %ebx,%ebx
538 setne %ch
539 orb %ch,%cl
540 orb %cl,%al
541 movl %eax,%edx
542 xorl %eax,%eax
543 xorl %ebx,%ebx
544 popl %ecx
545 jmp Denorm_done
546
547Denorm_shift_more_than_64:
548 movw EXP_UNDER+1,EXP(%edi)
549/* This is easy, %eax must be non-zero, so.. */
550 movl $1,%edx
551 xorl %eax,%eax
552 xorl %ebx,%ebx
553 popl %ecx
554 jmp Denorm_done
555
556
557Unmasked_underflow:
558 movb UNMASKED_UNDERFLOW,FPU_denormal
559 jmp Denorm_done
560
561
562/* Undo the de-normalisation. */
563Normalise_result:
564 cmpb UNMASKED_UNDERFLOW,FPU_denormal
565 je Signal_underflow
566
567/* The number must be a denormal if we got here. */
568#ifdef PARANOID
569 /* But check it... just in case. */
570 cmpw EXP_UNDER+1,EXP(%edi)
571 jne L_norm_bugged
572#endif /* PARANOID */
573
574#ifdef PECULIAR_486
575 /*
576 * This implements a special feature of 80486 behaviour.
577 * Underflow will be signalled even if the number is
578 * not a denormal after rounding.
579 * This difference occurs only for masked underflow, and not
580 * in the unmasked case.
581 * Actual 80486 behaviour differs from this in some circumstances.
582 */
583 orl %eax,%eax /* ms bits */
584 js LPseudoDenormal /* Will be masked underflow */
585#else
586 orl %eax,%eax /* ms bits */
587 js L_Normalised /* No longer a denormal */
588#endif /* PECULIAR_486 */
589
590 jnz LDenormal_adj_exponent
591
592 orl %ebx,%ebx
593 jz L_underflow_to_zero /* The contents are zero */
594
595LDenormal_adj_exponent:
596 decw EXP(%edi)
597
598LPseudoDenormal:
599 testb $0xff,FPU_bits_lost /* bits lost == underflow */
600 movl TAG_Special,%edx
601 jz L_deNormalised
602
603 /* There must be a masked underflow */
604 push %eax
605 pushl EX_Underflow
606 call EXCEPTION
607 popl %eax
608 popl %eax
609 movl TAG_Special,%edx
610 jmp L_deNormalised
611
612
613/*
614 * The operations resulted in a number too small to represent.
615 * Masked response.
616 */
617L_underflow_to_zero:
618 push %eax
619 call set_precision_flag_down
620 popl %eax
621
622 push %eax
623 pushl EX_Underflow
624 call EXCEPTION
625 popl %eax
626 popl %eax
627
628/* Reduce the exponent to EXP_UNDER */
629 movw EXP_UNDER,EXP(%edi)
630 movl TAG_Zero,%edx
631 jmp L_Store_significand
632
633
634/* The operations resulted in a number too large to represent. */
635L_overflow:
636 addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */
637 push %edi
638 call arith_overflow
639 pop %edi
640 jmp fpu_reg_round_signed_special_exit
641
642
643Signal_underflow:
644 /* The number may have been changed to a non-denormal */
645 /* by the rounding operations. */
646 cmpw EXP_UNDER,EXP(%edi)
647 jle Do_unmasked_underflow
648
649 jmp L_Normalised
650
651Do_unmasked_underflow:
652 /* Increase the exponent by the magic number */
653 addw $(3*(1<<13)),EXP(%edi)
654 push %eax
655 pushl EX_Underflow
656 call EXCEPTION
657 popl %eax
658 popl %eax
659 jmp L_Normalised
660
661
662#ifdef PARANOID
663#ifdef PECULIAR_486
664L_bugged_denorm_486:
665 pushl EX_INTERNAL|0x236
666 call EXCEPTION
667 popl %ebx
668 jmp L_exception_exit
669#else
670L_bugged_denorm:
671 pushl EX_INTERNAL|0x230
672 call EXCEPTION
673 popl %ebx
674 jmp L_exception_exit
675#endif /* PECULIAR_486 */
676
677L_bugged_round24:
678 pushl EX_INTERNAL|0x231
679 call EXCEPTION
680 popl %ebx
681 jmp L_exception_exit
682
683L_bugged_round53:
684 pushl EX_INTERNAL|0x232
685 call EXCEPTION
686 popl %ebx
687 jmp L_exception_exit
688
689L_bugged_round64:
690 pushl EX_INTERNAL|0x233
691 call EXCEPTION
692 popl %ebx
693 jmp L_exception_exit
694
695L_norm_bugged:
696 pushl EX_INTERNAL|0x234
697 call EXCEPTION
698 popl %ebx
699 jmp L_exception_exit
700
701L_entry_bugged:
702 pushl EX_INTERNAL|0x235
703 call EXCEPTION
704 popl %ebx
705L_exception_exit:
706 mov $-1,%eax
707 jmp fpu_reg_round_special_exit
708#endif /* PARANOID */