Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | | |
| 2 | | slogn.sa 3.1 12/10/90 |
| 3 | | |
| 4 | | slogn computes the natural logarithm of an |
| 5 | | input value. slognd does the same except the input value is a |
| 6 | | denormalized number. slognp1 computes log(1+X), and slognp1d |
| 7 | | computes log(1+X) for denormalized X. |
| 8 | | |
| 9 | | Input: Double-extended value in memory location pointed to by address |
| 10 | | register a0. |
| 11 | | |
| 12 | | Output: log(X) or log(1+X) returned in floating-point register Fp0. |
| 13 | | |
| 14 | | Accuracy and Monotonicity: The returned result is within 2 ulps in |
| 15 | | 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the |
| 16 | | result is subsequently rounded to double precision. The |
| 17 | | result is provably monotonic in double precision. |
| 18 | | |
| 19 | | Speed: The program slogn takes approximately 190 cycles for input |
| 20 | | argument X such that |X-1| >= 1/16, which is the usual |
| 21 | | situation. For those arguments, slognp1 takes approximately |
| 22 | | 210 cycles. For the less common arguments, the program will |
| 23 | | run no worse than 10% slower. |
| 24 | | |
| 25 | | Algorithm: |
| 26 | | LOGN: |
| 27 | | Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in |
| 28 | | u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2. |
| 29 | | |
| 30 | | Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven |
| 31 | | significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base |
| 32 | | 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7). |
| 33 | | |
| 34 | | Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u, |
| 35 | | log(1+u) = poly. |
| 36 | | |
| 37 | | Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) |
| 38 | | by k*log(2) + (log(F) + poly). The values of log(F) are calculated |
| 39 | | beforehand and stored in the program. |
| 40 | | |
| 41 | | lognp1: |
| 42 | | Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in |
| 43 | | u where u = 2X/(2+X). Otherwise, move on to Step 2. |
| 44 | | |
| 45 | | Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2 |
| 46 | | of the algorithm for LOGN and compute log(1+X) as |
| 47 | | k*log(2) + log(F) + poly where poly approximates log(1+u), |
| 48 | | u = (Y-F)/F. |
| 49 | | |
| 50 | | Implementation Notes: |
| 51 | | Note 1. There are 64 different possible values for F, thus 64 log(F)'s |
| 52 | | need to be tabulated. Moreover, the values of 1/F are also |
| 53 | | tabulated so that the division in (Y-F)/F can be performed by a |
| 54 | | multiplication. |
| 55 | | |
| 56 | | Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value |
| 57 | | Y-F has to be calculated carefully when 1/2 <= X < 3/2. |
| 58 | | |
| 59 | | Note 3. To fully exploit the pipeline, polynomials are usually separated |
| 60 | | into two parts evaluated independently before being added up. |
| 61 | | |
| 62 | |
| 63 | | Copyright (C) Motorola, Inc. 1990 |
| 64 | | All Rights Reserved |
| 65 | | |
Matt Waddel | e00d82d | 2006-02-11 17:55:48 -0800 | [diff] [blame] | 66 | | For details on the license for this file, please see the |
| 67 | | file, README, in this same directory. |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 68 | |
| 69 | |slogn idnt 2,1 | Motorola 040 Floating Point Software Package |
| 70 | |
| 71 | |section 8 |
| 72 | |
| 73 | #include "fpsp.h" |
| 74 | |
| 75 | BOUNDS1: .long 0x3FFEF07D,0x3FFF8841 |
| 76 | BOUNDS2: .long 0x3FFE8000,0x3FFFC000 |
| 77 | |
| 78 | LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 |
| 79 | |
| 80 | one: .long 0x3F800000 |
| 81 | zero: .long 0x00000000 |
| 82 | infty: .long 0x7F800000 |
| 83 | negone: .long 0xBF800000 |
| 84 | |
| 85 | LOGA6: .long 0x3FC2499A,0xB5E4040B |
| 86 | LOGA5: .long 0xBFC555B5,0x848CB7DB |
| 87 | |
| 88 | LOGA4: .long 0x3FC99999,0x987D8730 |
| 89 | LOGA3: .long 0xBFCFFFFF,0xFF6F7E97 |
| 90 | |
| 91 | LOGA2: .long 0x3FD55555,0x555555a4 |
| 92 | LOGA1: .long 0xBFE00000,0x00000008 |
| 93 | |
| 94 | LOGB5: .long 0x3F175496,0xADD7DAD6 |
| 95 | LOGB4: .long 0x3F3C71C2,0xFE80C7E0 |
| 96 | |
| 97 | LOGB3: .long 0x3F624924,0x928BCCFF |
| 98 | LOGB2: .long 0x3F899999,0x999995EC |
| 99 | |
| 100 | LOGB1: .long 0x3FB55555,0x55555555 |
| 101 | TWO: .long 0x40000000,0x00000000 |
| 102 | |
| 103 | LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000 |
| 104 | |
| 105 | LOGTBL: |
| 106 | .long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 |
| 107 | .long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 |
| 108 | .long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 |
| 109 | .long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 |
| 110 | .long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 |
| 111 | .long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 |
| 112 | .long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 |
| 113 | .long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 |
| 114 | .long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 |
| 115 | .long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 |
| 116 | .long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 |
| 117 | .long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 |
| 118 | .long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 |
| 119 | .long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 |
| 120 | .long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 |
| 121 | .long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 |
| 122 | .long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 |
| 123 | .long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 |
| 124 | .long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 |
| 125 | .long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 |
| 126 | .long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 |
| 127 | .long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 |
| 128 | .long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 |
| 129 | .long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 |
| 130 | .long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 |
| 131 | .long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 |
| 132 | .long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 |
| 133 | .long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 |
| 134 | .long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 |
| 135 | .long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 |
| 136 | .long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 |
| 137 | .long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 |
| 138 | .long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 |
| 139 | .long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 |
| 140 | .long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 |
| 141 | .long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 |
| 142 | .long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 |
| 143 | .long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 |
| 144 | .long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 |
| 145 | .long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 |
| 146 | .long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 |
| 147 | .long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 |
| 148 | .long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 |
| 149 | .long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 |
| 150 | .long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 |
| 151 | .long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 |
| 152 | .long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 |
| 153 | .long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 |
| 154 | .long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 |
| 155 | .long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 |
| 156 | .long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 |
| 157 | .long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 |
| 158 | .long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 |
| 159 | .long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 |
| 160 | .long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 |
| 161 | .long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 |
| 162 | .long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 |
| 163 | .long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 |
| 164 | .long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 |
| 165 | .long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 |
| 166 | .long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 |
| 167 | .long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 |
| 168 | .long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 |
| 169 | .long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 |
| 170 | .long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 |
| 171 | .long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 |
| 172 | .long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 |
| 173 | .long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 |
| 174 | .long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 |
| 175 | .long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 |
| 176 | .long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 |
| 177 | .long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 |
| 178 | .long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 |
| 179 | .long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 |
| 180 | .long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 |
| 181 | .long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 |
| 182 | .long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 |
| 183 | .long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 |
| 184 | .long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 |
| 185 | .long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 |
| 186 | .long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 |
| 187 | .long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 |
| 188 | .long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 |
| 189 | .long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 |
| 190 | .long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 |
| 191 | .long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 |
| 192 | .long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 |
| 193 | .long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 |
| 194 | .long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 |
| 195 | .long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 |
| 196 | .long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 |
| 197 | .long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 |
| 198 | .long 0x3FFE0000,0x94458094,0x45809446,0x00000000 |
| 199 | .long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 |
| 200 | .long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 |
| 201 | .long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 |
| 202 | .long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 |
| 203 | .long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 |
| 204 | .long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 |
| 205 | .long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 |
| 206 | .long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 |
| 207 | .long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 |
| 208 | .long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 |
| 209 | .long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 |
| 210 | .long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 |
| 211 | .long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 |
| 212 | .long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 |
| 213 | .long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 |
| 214 | .long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 |
| 215 | .long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 |
| 216 | .long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 |
| 217 | .long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 |
| 218 | .long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 |
| 219 | .long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 |
| 220 | .long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 |
| 221 | .long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 |
| 222 | .long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 |
| 223 | .long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 |
| 224 | .long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 |
| 225 | .long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 |
| 226 | .long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 |
| 227 | .long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 |
| 228 | .long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 |
| 229 | .long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 |
| 230 | .long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 |
| 231 | .long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 |
| 232 | .long 0x3FFE0000,0x80808080,0x80808081,0x00000000 |
| 233 | .long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 |
| 234 | |
| 235 | .set ADJK,L_SCR1 |
| 236 | |
| 237 | .set X,FP_SCR1 |
| 238 | .set XDCARE,X+2 |
| 239 | .set XFRAC,X+4 |
| 240 | |
| 241 | .set F,FP_SCR2 |
| 242 | .set FFRAC,F+4 |
| 243 | |
| 244 | .set KLOG2,FP_SCR3 |
| 245 | |
| 246 | .set SAVEU,FP_SCR4 |
| 247 | |
| 248 | | xref t_frcinx |
| 249 | |xref t_extdnrm |
| 250 | |xref t_operr |
| 251 | |xref t_dz |
| 252 | |
| 253 | .global slognd |
| 254 | slognd: |
| 255 | |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT |
| 256 | |
| 257 | movel #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0 |
| 258 | |
| 259 | |----normalize the input value by left shifting k bits (k to be determined |
| 260 | |----below), adjusting exponent and storing -k to ADJK |
| 261 | |----the value TWOTO100 is no longer needed. |
| 262 | |----Note that this code assumes the denormalized input is NON-ZERO. |
| 263 | |
| 264 | moveml %d2-%d7,-(%a7) | ...save some registers |
| 265 | movel #0x00000000,%d3 | ...D3 is exponent of smallest norm. # |
| 266 | movel 4(%a0),%d4 |
| 267 | movel 8(%a0),%d5 | ...(D4,D5) is (Hi_X,Lo_X) |
| 268 | clrl %d2 | ...D2 used for holding K |
| 269 | |
| 270 | tstl %d4 |
| 271 | bnes HiX_not0 |
| 272 | |
| 273 | HiX_0: |
| 274 | movel %d5,%d4 |
| 275 | clrl %d5 |
| 276 | movel #32,%d2 |
| 277 | clrl %d6 |
| 278 | bfffo %d4{#0:#32},%d6 |
| 279 | lsll %d6,%d4 |
| 280 | addl %d6,%d2 | ...(D3,D4,D5) is normalized |
| 281 | |
| 282 | movel %d3,X(%a6) |
| 283 | movel %d4,XFRAC(%a6) |
| 284 | movel %d5,XFRAC+4(%a6) |
| 285 | negl %d2 |
| 286 | movel %d2,ADJK(%a6) |
| 287 | fmovex X(%a6),%fp0 |
| 288 | moveml (%a7)+,%d2-%d7 | ...restore registers |
| 289 | lea X(%a6),%a0 |
| 290 | bras LOGBGN | ...begin regular log(X) |
| 291 | |
| 292 | |
| 293 | HiX_not0: |
| 294 | clrl %d6 |
| 295 | bfffo %d4{#0:#32},%d6 | ...find first 1 |
| 296 | movel %d6,%d2 | ...get k |
| 297 | lsll %d6,%d4 |
| 298 | movel %d5,%d7 | ...a copy of D5 |
| 299 | lsll %d6,%d5 |
| 300 | negl %d6 |
| 301 | addil #32,%d6 |
| 302 | lsrl %d6,%d7 |
| 303 | orl %d7,%d4 | ...(D3,D4,D5) normalized |
| 304 | |
| 305 | movel %d3,X(%a6) |
| 306 | movel %d4,XFRAC(%a6) |
| 307 | movel %d5,XFRAC+4(%a6) |
| 308 | negl %d2 |
| 309 | movel %d2,ADJK(%a6) |
| 310 | fmovex X(%a6),%fp0 |
| 311 | moveml (%a7)+,%d2-%d7 | ...restore registers |
| 312 | lea X(%a6),%a0 |
| 313 | bras LOGBGN | ...begin regular log(X) |
| 314 | |
| 315 | |
| 316 | .global slogn |
| 317 | slogn: |
| 318 | |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S |
| 319 | |
| 320 | fmovex (%a0),%fp0 | ...LOAD INPUT |
| 321 | movel #0x00000000,ADJK(%a6) |
| 322 | |
| 323 | LOGBGN: |
| 324 | |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS |
| 325 | |--A FINITE, NON-ZERO, NORMALIZED NUMBER. |
| 326 | |
| 327 | movel (%a0),%d0 |
| 328 | movew 4(%a0),%d0 |
| 329 | |
| 330 | movel (%a0),X(%a6) |
| 331 | movel 4(%a0),X+4(%a6) |
| 332 | movel 8(%a0),X+8(%a6) |
| 333 | |
| 334 | cmpil #0,%d0 | ...CHECK IF X IS NEGATIVE |
| 335 | blt LOGNEG | ...LOG OF NEGATIVE ARGUMENT IS INVALID |
| 336 | cmp2l BOUNDS1,%d0 | ...X IS POSITIVE, CHECK IF X IS NEAR 1 |
| 337 | bcc LOGNEAR1 | ...BOUNDS IS ROUGHLY [15/16, 17/16] |
| 338 | |
| 339 | LOGMAIN: |
| 340 | |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 |
| 341 | |
| 342 | |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. |
| 343 | |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. |
| 344 | |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) |
| 345 | |-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). |
| 346 | |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING |
| 347 | |--LOG(1+U) CAN BE VERY EFFICIENT. |
| 348 | |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO |
| 349 | |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. |
| 350 | |
| 351 | |--GET K, Y, F, AND ADDRESS OF 1/F. |
| 352 | asrl #8,%d0 |
| 353 | asrl #8,%d0 | ...SHIFTED 16 BITS, BIASED EXPO. OF X |
| 354 | subil #0x3FFF,%d0 | ...THIS IS K |
| 355 | addl ADJK(%a6),%d0 | ...ADJUST K, ORIGINAL INPUT MAY BE DENORM. |
| 356 | lea LOGTBL,%a0 | ...BASE ADDRESS OF 1/F AND LOG(F) |
| 357 | fmovel %d0,%fp1 | ...CONVERT K TO FLOATING-POINT FORMAT |
| 358 | |
| 359 | |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F |
| 360 | movel #0x3FFF0000,X(%a6) | ...X IS NOW Y, I.E. 2^(-K)*X |
| 361 | movel XFRAC(%a6),FFRAC(%a6) |
| 362 | andil #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y |
| 363 | oril #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT |
| 364 | movel FFRAC(%a6),%d0 | ...READY TO GET ADDRESS OF 1/F |
| 365 | andil #0x7E000000,%d0 |
| 366 | asrl #8,%d0 |
| 367 | asrl #8,%d0 |
| 368 | asrl #4,%d0 | ...SHIFTED 20, D0 IS THE DISPLACEMENT |
| 369 | addal %d0,%a0 | ...A0 IS THE ADDRESS FOR 1/F |
| 370 | |
| 371 | fmovex X(%a6),%fp0 |
| 372 | movel #0x3fff0000,F(%a6) |
| 373 | clrl F+8(%a6) |
| 374 | fsubx F(%a6),%fp0 | ...Y-F |
| 375 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 WHILE FP0 IS NOT READY |
| 376 | |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K |
| 377 | |--REGISTERS SAVED: FPCR, FP1, FP2 |
| 378 | |
| 379 | LP1CONT1: |
| 380 | |--AN RE-ENTRY POINT FOR LOGNP1 |
| 381 | fmulx (%a0),%fp0 | ...FP0 IS U = (Y-F)/F |
| 382 | fmulx LOGOF2,%fp1 | ...GET K*LOG2 WHILE FP0 IS NOT READY |
| 383 | fmovex %fp0,%fp2 |
| 384 | fmulx %fp2,%fp2 | ...FP2 IS V=U*U |
| 385 | fmovex %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1 |
| 386 | |
| 387 | |--LOG(1+U) IS APPROXIMATED BY |
| 388 | |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS |
| 389 | |--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] |
| 390 | |
| 391 | fmovex %fp2,%fp3 |
| 392 | fmovex %fp2,%fp1 |
| 393 | |
| 394 | fmuld LOGA6,%fp1 | ...V*A6 |
| 395 | fmuld LOGA5,%fp2 | ...V*A5 |
| 396 | |
| 397 | faddd LOGA4,%fp1 | ...A4+V*A6 |
| 398 | faddd LOGA3,%fp2 | ...A3+V*A5 |
| 399 | |
| 400 | fmulx %fp3,%fp1 | ...V*(A4+V*A6) |
| 401 | fmulx %fp3,%fp2 | ...V*(A3+V*A5) |
| 402 | |
| 403 | faddd LOGA2,%fp1 | ...A2+V*(A4+V*A6) |
| 404 | faddd LOGA1,%fp2 | ...A1+V*(A3+V*A5) |
| 405 | |
| 406 | fmulx %fp3,%fp1 | ...V*(A2+V*(A4+V*A6)) |
| 407 | addal #16,%a0 | ...ADDRESS OF LOG(F) |
| 408 | fmulx %fp3,%fp2 | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED |
| 409 | |
| 410 | fmulx %fp0,%fp1 | ...U*V*(A2+V*(A4+V*A6)) |
| 411 | faddx %fp2,%fp0 | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED |
| 412 | |
| 413 | faddx (%a0),%fp1 | ...LOG(F)+U*V*(A2+V*(A4+V*A6)) |
| 414 | fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...RESTORE FP2 |
| 415 | faddx %fp1,%fp0 | ...FP0 IS LOG(F) + LOG(1+U) |
| 416 | |
| 417 | fmovel %d1,%fpcr |
| 418 | faddx KLOG2(%a6),%fp0 | ...FINAL ADD |
| 419 | bra t_frcinx |
| 420 | |
| 421 | |
| 422 | LOGNEAR1: |
| 423 | |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. |
| 424 | fmovex %fp0,%fp1 |
| 425 | fsubs one,%fp1 | ...FP1 IS X-1 |
| 426 | fadds one,%fp0 | ...FP0 IS X+1 |
| 427 | faddx %fp1,%fp1 | ...FP1 IS 2(X-1) |
| 428 | |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL |
| 429 | |--IN U, U = 2(X-1)/(X+1) = FP1/FP0 |
| 430 | |
| 431 | LP1CONT2: |
| 432 | |--THIS IS AN RE-ENTRY POINT FOR LOGNP1 |
| 433 | fdivx %fp0,%fp1 | ...FP1 IS U |
| 434 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 |
| 435 | |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 |
| 436 | |--LET V=U*U, W=V*V, CALCULATE |
| 437 | |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY |
| 438 | |--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) |
| 439 | fmovex %fp1,%fp0 |
| 440 | fmulx %fp0,%fp0 | ...FP0 IS V |
| 441 | fmovex %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1 |
| 442 | fmovex %fp0,%fp1 |
| 443 | fmulx %fp1,%fp1 | ...FP1 IS W |
| 444 | |
| 445 | fmoved LOGB5,%fp3 |
| 446 | fmoved LOGB4,%fp2 |
| 447 | |
| 448 | fmulx %fp1,%fp3 | ...W*B5 |
| 449 | fmulx %fp1,%fp2 | ...W*B4 |
| 450 | |
| 451 | faddd LOGB3,%fp3 | ...B3+W*B5 |
| 452 | faddd LOGB2,%fp2 | ...B2+W*B4 |
| 453 | |
| 454 | fmulx %fp3,%fp1 | ...W*(B3+W*B5), FP3 RELEASED |
| 455 | |
| 456 | fmulx %fp0,%fp2 | ...V*(B2+W*B4) |
| 457 | |
| 458 | faddd LOGB1,%fp1 | ...B1+W*(B3+W*B5) |
| 459 | fmulx SAVEU(%a6),%fp0 | ...FP0 IS U*V |
| 460 | |
| 461 | faddx %fp2,%fp1 | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED |
| 462 | fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED |
| 463 | |
| 464 | fmulx %fp1,%fp0 | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) |
| 465 | |
| 466 | fmovel %d1,%fpcr |
| 467 | faddx SAVEU(%a6),%fp0 |
| 468 | bra t_frcinx |
| 469 | rts |
| 470 | |
| 471 | LOGNEG: |
| 472 | |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID |
| 473 | bra t_operr |
| 474 | |
| 475 | .global slognp1d |
| 476 | slognp1d: |
| 477 | |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT |
| 478 | | Simply return the denorm |
| 479 | |
| 480 | bra t_extdnrm |
| 481 | |
| 482 | .global slognp1 |
| 483 | slognp1: |
| 484 | |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S |
| 485 | |
| 486 | fmovex (%a0),%fp0 | ...LOAD INPUT |
| 487 | fabsx %fp0 |test magnitude |
| 488 | fcmpx LTHOLD,%fp0 |compare with min threshold |
| 489 | fbgt LP1REAL |if greater, continue |
| 490 | fmovel #0,%fpsr |clr N flag from compare |
| 491 | fmovel %d1,%fpcr |
| 492 | fmovex (%a0),%fp0 |return signed argument |
| 493 | bra t_frcinx |
| 494 | |
| 495 | LP1REAL: |
| 496 | fmovex (%a0),%fp0 | ...LOAD INPUT |
| 497 | movel #0x00000000,ADJK(%a6) |
| 498 | fmovex %fp0,%fp1 | ...FP1 IS INPUT Z |
| 499 | fadds one,%fp0 | ...X := ROUND(1+Z) |
| 500 | fmovex %fp0,X(%a6) |
| 501 | movew XFRAC(%a6),XDCARE(%a6) |
| 502 | movel X(%a6),%d0 |
| 503 | cmpil #0,%d0 |
| 504 | ble LP1NEG0 | ...LOG OF ZERO OR -VE |
| 505 | cmp2l BOUNDS2,%d0 |
| 506 | bcs LOGMAIN | ...BOUNDS2 IS [1/2,3/2] |
| 507 | |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, |
| 508 | |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, |
| 509 | |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). |
| 510 | |
| 511 | LP1NEAR1: |
| 512 | |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) |
| 513 | cmp2l BOUNDS1,%d0 |
| 514 | bcss LP1CARE |
| 515 | |
| 516 | LP1ONE16: |
| 517 | |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) |
| 518 | |--WHERE U = 2Z/(2+Z) = 2Z/(1+X). |
| 519 | faddx %fp1,%fp1 | ...FP1 IS 2Z |
| 520 | fadds one,%fp0 | ...FP0 IS 1+X |
| 521 | |--U = FP1/FP0 |
| 522 | bra LP1CONT2 |
| 523 | |
| 524 | LP1CARE: |
| 525 | |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE |
| 526 | |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST |
| 527 | |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], |
| 528 | |--THERE ARE ONLY TWO CASES. |
| 529 | |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z |
| 530 | |--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z |
| 531 | |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF |
| 532 | |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. |
| 533 | |
| 534 | movel XFRAC(%a6),FFRAC(%a6) |
| 535 | andil #0xFE000000,FFRAC(%a6) |
| 536 | oril #0x01000000,FFRAC(%a6) | ...F OBTAINED |
| 537 | cmpil #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1 |
| 538 | bges KISZERO |
| 539 | |
| 540 | KISNEG1: |
| 541 | fmoves TWO,%fp0 |
| 542 | movel #0x3fff0000,F(%a6) |
| 543 | clrl F+8(%a6) |
| 544 | fsubx F(%a6),%fp0 | ...2-F |
| 545 | movel FFRAC(%a6),%d0 |
| 546 | andil #0x7E000000,%d0 |
| 547 | asrl #8,%d0 |
| 548 | asrl #8,%d0 |
| 549 | asrl #4,%d0 | ...D0 CONTAINS DISPLACEMENT FOR 1/F |
| 550 | faddx %fp1,%fp1 | ...GET 2Z |
| 551 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...SAVE FP2 |
| 552 | faddx %fp1,%fp0 | ...FP0 IS Y-F = (2-F)+2Z |
| 553 | lea LOGTBL,%a0 | ...A0 IS ADDRESS OF 1/F |
| 554 | addal %d0,%a0 |
| 555 | fmoves negone,%fp1 | ...FP1 IS K = -1 |
| 556 | bra LP1CONT1 |
| 557 | |
| 558 | KISZERO: |
| 559 | fmoves one,%fp0 |
| 560 | movel #0x3fff0000,F(%a6) |
| 561 | clrl F+8(%a6) |
| 562 | fsubx F(%a6),%fp0 | ...1-F |
| 563 | movel FFRAC(%a6),%d0 |
| 564 | andil #0x7E000000,%d0 |
| 565 | asrl #8,%d0 |
| 566 | asrl #8,%d0 |
| 567 | asrl #4,%d0 |
| 568 | faddx %fp1,%fp0 | ...FP0 IS Y-F |
| 569 | fmovemx %fp2-%fp2/%fp3,-(%sp) | ...FP2 SAVED |
| 570 | lea LOGTBL,%a0 |
| 571 | addal %d0,%a0 | ...A0 IS ADDRESS OF 1/F |
| 572 | fmoves zero,%fp1 | ...FP1 IS K = 0 |
| 573 | bra LP1CONT1 |
| 574 | |
| 575 | LP1NEG0: |
| 576 | |--FPCR SAVED. D0 IS X IN COMPACT FORM. |
| 577 | cmpil #0,%d0 |
| 578 | blts LP1NEG |
| 579 | LP1ZERO: |
| 580 | fmoves negone,%fp0 |
| 581 | |
| 582 | fmovel %d1,%fpcr |
| 583 | bra t_dz |
| 584 | |
| 585 | LP1NEG: |
| 586 | fmoves zero,%fp0 |
| 587 | |
| 588 | fmovel %d1,%fpcr |
| 589 | bra t_operr |
| 590 | |
| 591 | |end |