Paul Mundt | 180ae20 | 2008-12-12 16:53:14 +0900 | [diff] [blame] | 1 | .global __sdivsi3 |
| 2 | .section .text..SHmedia32,"ax" |
| 3 | .align 2 |
| 4 | |
| 5 | /* inputs: r4,r5 */ |
| 6 | /* clobbered: r1,r18,r19,r20,r21,r25,tr0 */ |
| 7 | /* result in r0 */ |
| 8 | __sdivsi3: |
| 9 | ptb __div_table,tr0 |
| 10 | |
| 11 | nsb r5, r1 |
| 12 | shlld r5, r1, r25 /* normalize; [-2 ..1, 1..2) in s2.62 */ |
| 13 | shari r25, 58, r21 /* extract 5(6) bit index (s2.4 with hole -1..1) */ |
| 14 | /* bubble */ |
| 15 | gettr tr0,r20 |
| 16 | ldx.ub r20, r21, r19 /* u0.8 */ |
| 17 | shari r25, 32, r25 /* normalize to s2.30 */ |
| 18 | shlli r21, 1, r21 |
| 19 | muls.l r25, r19, r19 /* s2.38 */ |
| 20 | ldx.w r20, r21, r21 /* s2.14 */ |
| 21 | ptabs r18, tr0 |
| 22 | shari r19, 24, r19 /* truncate to s2.14 */ |
| 23 | sub r21, r19, r19 /* some 11 bit inverse in s1.14 */ |
| 24 | muls.l r19, r19, r21 /* u0.28 */ |
| 25 | sub r63, r1, r1 |
| 26 | addi r1, 92, r1 |
| 27 | muls.l r25, r21, r18 /* s2.58 */ |
| 28 | shlli r19, 45, r19 /* multiply by two and convert to s2.58 */ |
| 29 | /* bubble */ |
| 30 | sub r19, r18, r18 |
| 31 | shari r18, 28, r18 /* some 22 bit inverse in s1.30 */ |
| 32 | muls.l r18, r25, r0 /* s2.60 */ |
| 33 | muls.l r18, r4, r25 /* s32.30 */ |
| 34 | /* bubble */ |
| 35 | shari r0, 16, r19 /* s-16.44 */ |
| 36 | muls.l r19, r18, r19 /* s-16.74 */ |
| 37 | shari r25, 63, r0 |
| 38 | shari r4, 14, r18 /* s19.-14 */ |
| 39 | shari r19, 30, r19 /* s-16.44 */ |
| 40 | muls.l r19, r18, r19 /* s15.30 */ |
| 41 | xor r21, r0, r21 /* You could also use the constant 1 << 27. */ |
| 42 | add r21, r25, r21 |
| 43 | sub r21, r19, r21 |
| 44 | shard r21, r1, r21 |
| 45 | sub r21, r0, r0 |
| 46 | blink tr0, r63 |
| 47 | |
| 48 | /* This table has been generated by divtab.c . |
| 49 | Defects for bias -330: |
| 50 | Max defect: 6.081536e-07 at -1.000000e+00 |
| 51 | Min defect: 2.849516e-08 at 1.030651e+00 |
| 52 | Max 2nd step defect: 9.606539e-12 at -1.000000e+00 |
| 53 | Min 2nd step defect: 0.000000e+00 at 0.000000e+00 |
| 54 | Defect at 1: 1.238659e-07 |
| 55 | Defect at -2: 1.061708e-07 */ |
| 56 | |
| 57 | .balign 2 |
| 58 | .type __div_table,@object |
| 59 | .size __div_table,128 |
| 60 | /* negative division constants */ |
| 61 | .word -16638 |
| 62 | .word -17135 |
| 63 | .word -17737 |
| 64 | .word -18433 |
| 65 | .word -19103 |
| 66 | .word -19751 |
| 67 | .word -20583 |
| 68 | .word -21383 |
| 69 | .word -22343 |
| 70 | .word -23353 |
| 71 | .word -24407 |
| 72 | .word -25582 |
| 73 | .word -26863 |
| 74 | .word -28382 |
| 75 | .word -29965 |
| 76 | .word -31800 |
| 77 | /* negative division factors */ |
| 78 | .byte 66 |
| 79 | .byte 70 |
| 80 | .byte 75 |
| 81 | .byte 81 |
| 82 | .byte 87 |
| 83 | .byte 93 |
| 84 | .byte 101 |
| 85 | .byte 109 |
| 86 | .byte 119 |
| 87 | .byte 130 |
| 88 | .byte 142 |
| 89 | .byte 156 |
| 90 | .byte 172 |
| 91 | .byte 192 |
| 92 | .byte 214 |
| 93 | .byte 241 |
| 94 | .skip 16 |
| 95 | .global __div_table |
| 96 | __div_table: |
| 97 | .skip 16 |
| 98 | /* positive division factors */ |
| 99 | .byte 241 |
| 100 | .byte 214 |
| 101 | .byte 192 |
| 102 | .byte 172 |
| 103 | .byte 156 |
| 104 | .byte 142 |
| 105 | .byte 130 |
| 106 | .byte 119 |
| 107 | .byte 109 |
| 108 | .byte 101 |
| 109 | .byte 93 |
| 110 | .byte 87 |
| 111 | .byte 81 |
| 112 | .byte 75 |
| 113 | .byte 70 |
| 114 | .byte 66 |
| 115 | /* positive division constants */ |
| 116 | .word 31801 |
| 117 | .word 29966 |
| 118 | .word 28383 |
| 119 | .word 26864 |
| 120 | .word 25583 |
| 121 | .word 24408 |
| 122 | .word 23354 |
| 123 | .word 22344 |
| 124 | .word 21384 |
| 125 | .word 20584 |
| 126 | .word 19752 |
| 127 | .word 19104 |
| 128 | .word 18434 |
| 129 | .word 17738 |
| 130 | .word 17136 |
| 131 | .word 16639 |