blob: 0d5e6a1436a638c59f0fc123974f43c418169d69 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001|
2| stwotox.sa 3.1 12/10/90
3|
4| stwotox --- 2**X
5| stwotoxd --- 2**X for denormalized X
6| stentox --- 10**X
7| stentoxd --- 10**X for denormalized X
8|
9| Input: Double-extended number X in location pointed to
10| by address register a0.
11|
12| Output: The function values are returned in Fp0.
13|
14| Accuracy and Monotonicity: The returned result is within 2 ulps in
15| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16| result is subsequently rounded to double precision. The
17| result is provably monotonic in double precision.
18|
19| Speed: The program stwotox takes approximately 190 cycles and the
20| program stentox takes approximately 200 cycles.
21|
22| Algorithm:
23|
24| twotox
25| 1. If |X| > 16480, go to ExpBig.
26|
27| 2. If |X| < 2**(-70), go to ExpSm.
28|
29| 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
30| decompose N as
31| N = 64(M + M') + j, j = 0,1,2,...,63.
32|
33| 4. Overwrite r := r * log2. Then
34| 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
35| Go to expr to compute that expression.
36|
37| tentox
38| 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
39|
40| 2. If |X| < 2**(-70), go to ExpSm.
41|
42| 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
43| N := round-to-int(y). Decompose N as
44| N = 64(M + M') + j, j = 0,1,2,...,63.
45|
46| 4. Define r as
47| r := ((X - N*L1)-N*L2) * L10
48| where L1, L2 are the leading and trailing parts of log_10(2)/64
49| and L10 is the natural log of 10. Then
50| 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
51| Go to expr to compute that expression.
52|
53| expr
54| 1. Fetch 2**(j/64) from table as Fact1 and Fact2.
55|
56| 2. Overwrite Fact1 and Fact2 by
57| Fact1 := 2**(M) * Fact1
58| Fact2 := 2**(M) * Fact2
59| Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
60|
61| 3. Calculate P where 1 + P approximates exp(r):
62| P = r + r*r*(A1+r*(A2+...+r*A5)).
63|
64| 4. Let AdjFact := 2**(M'). Return
65| AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
66| Exit.
67|
68| ExpBig
69| 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
70| underflow by Tiny * Tiny.
71|
72| ExpSm
73| 1. Return 1 + X.
74|
75
76| Copyright (C) Motorola, Inc. 1990
77| All Rights Reserved
78|
Matt Waddele00d82d2006-02-11 17:55:48 -080079| For details on the license for this file, please see the
80| file, README, in this same directory.
Linus Torvalds1da177e2005-04-16 15:20:36 -070081
82|STWOTOX idnt 2,1 | Motorola 040 Floating Point Software Package
83
84 |section 8
85
86#include "fpsp.h"
87
88BOUNDS1: .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
89BOUNDS2: .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
90
91L2TEN64: .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
92L10TWO1: .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
93
94L10TWO2: .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
95
96LOG10: .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
97
98LOG2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
99
100EXPA5: .long 0x3F56C16D,0x6F7BD0B2
101EXPA4: .long 0x3F811112,0x302C712C
102EXPA3: .long 0x3FA55555,0x55554CC1
103EXPA2: .long 0x3FC55555,0x55554A54
104EXPA1: .long 0x3FE00000,0x00000000,0x00000000,0x00000000
105
106HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
107TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
108
109EXPTBL:
110 .long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
111 .long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
112 .long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
113 .long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
114 .long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
115 .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
116 .long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
117 .long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
118 .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
119 .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
120 .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
121 .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
122 .long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
123 .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
124 .long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
125 .long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
126 .long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
127 .long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
128 .long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
129 .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
130 .long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
131 .long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
132 .long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
133 .long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
134 .long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
135 .long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
136 .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
137 .long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
138 .long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
139 .long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
140 .long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
141 .long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
142 .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
143 .long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
144 .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
145 .long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
146 .long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
147 .long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
148 .long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
149 .long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
150 .long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
151 .long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
152 .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
153 .long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
154 .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
155 .long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
156 .long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
157 .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
158 .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
159 .long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
160 .long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
161 .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
162 .long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
163 .long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
164 .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
165 .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
166 .long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
167 .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
168 .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
169 .long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
170 .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
171 .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
172 .long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
173 .long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
174
175 .set N,L_SCR1
176
177 .set X,FP_SCR1
178 .set XDCARE,X+2
179 .set XFRAC,X+4
180
181 .set ADJFACT,FP_SCR2
182
183 .set FACT1,FP_SCR3
184 .set FACT1HI,FACT1+4
185 .set FACT1LOW,FACT1+8
186
187 .set FACT2,FP_SCR4
188 .set FACT2HI,FACT2+4
189 .set FACT2LOW,FACT2+8
190
191 | xref t_unfl
192 |xref t_ovfl
193 |xref t_frcinx
194
195 .global stwotoxd
196stwotoxd:
197|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
198
199 fmovel %d1,%fpcr | ...set user's rounding mode/precision
200 fmoves #0x3F800000,%fp0 | ...RETURN 1 + X
201 movel (%a0),%d0
202 orl #0x00800001,%d0
203 fadds %d0,%fp0
204 bra t_frcinx
205
206 .global stwotox
207stwotox:
208|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
209 fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
210
211 movel (%a0),%d0
212 movew 4(%a0),%d0
213 fmovex %fp0,X(%a6)
214 andil #0x7FFFFFFF,%d0
215
216 cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)?
217 bges TWOOK1
218 bra EXPBORS
219
220TWOOK1:
221 cmpil #0x400D80C0,%d0 | ...|X| > 16480?
222 bles TWOMAIN
223 bra EXPBORS
224
225
226TWOMAIN:
227|--USUAL CASE, 2^(-70) <= |X| <= 16480
228
229 fmovex %fp0,%fp1
230 fmuls #0x42800000,%fp1 | ...64 * X
231
232 fmovel %fp1,N(%a6) | ...N = ROUND-TO-INT(64 X)
233 movel %d2,-(%sp)
234 lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
235 fmovel N(%a6),%fp1 | ...N --> FLOATING FMT
236 movel N(%a6),%d0
237 movel %d0,%d2
238 andil #0x3F,%d0 | ...D0 IS J
239 asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64)
240 addal %d0,%a1 | ...ADDRESS FOR 2^(J/64)
241 asrl #6,%d2 | ...d2 IS L, N = 64L + J
242 movel %d2,%d0
243 asrl #1,%d0 | ...D0 IS M
244 subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J
245 addil #0x3FFF,%d2
246 movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M')
247 movel (%sp)+,%d2
248|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
249|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
250|--ADJFACT = 2^(M').
251|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
252
253 fmuls #0x3C800000,%fp1 | ...(1/64)*N
254 movel (%a1)+,FACT1(%a6)
255 movel (%a1)+,FACT1HI(%a6)
256 movel (%a1)+,FACT1LOW(%a6)
257 movew (%a1)+,FACT2(%a6)
258 clrw FACT2+2(%a6)
259
260 fsubx %fp1,%fp0 | ...X - (1/64)*INT(64 X)
261
262 movew (%a1)+,FACT2HI(%a6)
263 clrw FACT2HI+2(%a6)
264 clrl FACT2LOW(%a6)
265 addw %d0,FACT1(%a6)
266
267 fmulx LOG2,%fp0 | ...FP0 IS R
268 addw %d0,FACT2(%a6)
269
270 bra expr
271
272EXPBORS:
273|--FPCR, D0 SAVED
274 cmpil #0x3FFF8000,%d0
275 bgts EXPBIG
276
277EXPSM:
278|--|X| IS SMALL, RETURN 1 + X
279
280 fmovel %d1,%FPCR |restore users exceptions
281 fadds #0x3F800000,%fp0 | ...RETURN 1 + X
282
283 bra t_frcinx
284
285EXPBIG:
286|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
287|--REGISTERS SAVE SO FAR ARE FPCR AND D0
288 movel X(%a6),%d0
289 cmpil #0,%d0
290 blts EXPNEG
291
292 bclrb #7,(%a0) |t_ovfl expects positive value
293 bra t_ovfl
294
295EXPNEG:
296 bclrb #7,(%a0) |t_unfl expects positive value
297 bra t_unfl
298
299 .global stentoxd
300stentoxd:
301|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
302
303 fmovel %d1,%fpcr | ...set user's rounding mode/precision
304 fmoves #0x3F800000,%fp0 | ...RETURN 1 + X
305 movel (%a0),%d0
306 orl #0x00800001,%d0
307 fadds %d0,%fp0
308 bra t_frcinx
309
310 .global stentox
311stentox:
312|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
313 fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
314
315 movel (%a0),%d0
316 movew 4(%a0),%d0
317 fmovex %fp0,X(%a6)
318 andil #0x7FFFFFFF,%d0
319
320 cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)?
321 bges TENOK1
322 bra EXPBORS
323
324TENOK1:
325 cmpil #0x400B9B07,%d0 | ...|X| <= 16480*log2/log10 ?
326 bles TENMAIN
327 bra EXPBORS
328
329TENMAIN:
330|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
331
332 fmovex %fp0,%fp1
333 fmuld L2TEN64,%fp1 | ...X*64*LOG10/LOG2
334
335 fmovel %fp1,N(%a6) | ...N=INT(X*64*LOG10/LOG2)
336 movel %d2,-(%sp)
337 lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
338 fmovel N(%a6),%fp1 | ...N --> FLOATING FMT
339 movel N(%a6),%d0
340 movel %d0,%d2
341 andil #0x3F,%d0 | ...D0 IS J
342 asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64)
343 addal %d0,%a1 | ...ADDRESS FOR 2^(J/64)
344 asrl #6,%d2 | ...d2 IS L, N = 64L + J
345 movel %d2,%d0
346 asrl #1,%d0 | ...D0 IS M
347 subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J
348 addil #0x3FFF,%d2
349 movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M')
350 movel (%sp)+,%d2
351
352|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
353|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
354|--ADJFACT = 2^(M').
355|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
356
357 fmovex %fp1,%fp2
358
359 fmuld L10TWO1,%fp1 | ...N*(LOG2/64LOG10)_LEAD
360 movel (%a1)+,FACT1(%a6)
361
362 fmulx L10TWO2,%fp2 | ...N*(LOG2/64LOG10)_TRAIL
363
364 movel (%a1)+,FACT1HI(%a6)
365 movel (%a1)+,FACT1LOW(%a6)
366 fsubx %fp1,%fp0 | ...X - N L_LEAD
367 movew (%a1)+,FACT2(%a6)
368
369 fsubx %fp2,%fp0 | ...X - N L_TRAIL
370
371 clrw FACT2+2(%a6)
372 movew (%a1)+,FACT2HI(%a6)
373 clrw FACT2HI+2(%a6)
374 clrl FACT2LOW(%a6)
375
376 fmulx LOG10,%fp0 | ...FP0 IS R
377
378 addw %d0,FACT1(%a6)
379 addw %d0,FACT2(%a6)
380
381expr:
382|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
383|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
384|--FP0 IS R. THE FOLLOWING CODE COMPUTES
385|-- 2**(M'+M) * 2**(J/64) * EXP(R)
386
387 fmovex %fp0,%fp1
388 fmulx %fp1,%fp1 | ...FP1 IS S = R*R
389
390 fmoved EXPA5,%fp2 | ...FP2 IS A5
391 fmoved EXPA4,%fp3 | ...FP3 IS A4
392
393 fmulx %fp1,%fp2 | ...FP2 IS S*A5
394 fmulx %fp1,%fp3 | ...FP3 IS S*A4
395
396 faddd EXPA3,%fp2 | ...FP2 IS A3+S*A5
397 faddd EXPA2,%fp3 | ...FP3 IS A2+S*A4
398
399 fmulx %fp1,%fp2 | ...FP2 IS S*(A3+S*A5)
400 fmulx %fp1,%fp3 | ...FP3 IS S*(A2+S*A4)
401
402 faddd EXPA1,%fp2 | ...FP2 IS A1+S*(A3+S*A5)
403 fmulx %fp0,%fp3 | ...FP3 IS R*S*(A2+S*A4)
404
405 fmulx %fp1,%fp2 | ...FP2 IS S*(A1+S*(A3+S*A5))
406 faddx %fp3,%fp0 | ...FP0 IS R+R*S*(A2+S*A4)
407
408 faddx %fp2,%fp0 | ...FP0 IS EXP(R) - 1
409
410
411|--FINAL RECONSTRUCTION PROCESS
412|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
413
414 fmulx FACT1(%a6),%fp0
415 faddx FACT2(%a6),%fp0
416 faddx FACT1(%a6),%fp0
417
418 fmovel %d1,%FPCR |restore users exceptions
419 clrw ADJFACT+2(%a6)
420 movel #0x80000000,ADJFACT+4(%a6)
421 clrl ADJFACT+8(%a6)
422 fmulx ADJFACT(%a6),%fp0 | ...FINAL ADJUSTMENT
423
424 bra t_frcinx
425
426 |end