blob: 4e3c1407d3df0a10503e592692cb1422e44621ae [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001|
2| stwotox.sa 3.1 12/10/90
3|
4| stwotox --- 2**X
5| stwotoxd --- 2**X for denormalized X
6| stentox --- 10**X
7| stentoxd --- 10**X for denormalized X
8|
9| Input: Double-extended number X in location pointed to
10| by address register a0.
11|
12| Output: The function values are returned in Fp0.
13|
14| Accuracy and Monotonicity: The returned result is within 2 ulps in
15| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16| result is subsequently rounded to double precision. The
17| result is provably monotonic in double precision.
18|
19| Speed: The program stwotox takes approximately 190 cycles and the
20| program stentox takes approximately 200 cycles.
21|
22| Algorithm:
23|
24| twotox
25| 1. If |X| > 16480, go to ExpBig.
26|
27| 2. If |X| < 2**(-70), go to ExpSm.
28|
29| 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
30| decompose N as
31| N = 64(M + M') + j, j = 0,1,2,...,63.
32|
33| 4. Overwrite r := r * log2. Then
34| 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
35| Go to expr to compute that expression.
36|
37| tentox
38| 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
39|
40| 2. If |X| < 2**(-70), go to ExpSm.
41|
42| 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
43| N := round-to-int(y). Decompose N as
44| N = 64(M + M') + j, j = 0,1,2,...,63.
45|
46| 4. Define r as
47| r := ((X - N*L1)-N*L2) * L10
48| where L1, L2 are the leading and trailing parts of log_10(2)/64
49| and L10 is the natural log of 10. Then
50| 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
51| Go to expr to compute that expression.
52|
53| expr
54| 1. Fetch 2**(j/64) from table as Fact1 and Fact2.
55|
56| 2. Overwrite Fact1 and Fact2 by
57| Fact1 := 2**(M) * Fact1
58| Fact2 := 2**(M) * Fact2
59| Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
60|
61| 3. Calculate P where 1 + P approximates exp(r):
62| P = r + r*r*(A1+r*(A2+...+r*A5)).
63|
64| 4. Let AdjFact := 2**(M'). Return
65| AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
66| Exit.
67|
68| ExpBig
69| 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
70| underflow by Tiny * Tiny.
71|
72| ExpSm
73| 1. Return 1 + X.
74|
75
76| Copyright (C) Motorola, Inc. 1990
77| All Rights Reserved
78|
79| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
80| The copyright notice above does not evidence any
81| actual or intended publication of such source code.
82
83|STWOTOX idnt 2,1 | Motorola 040 Floating Point Software Package
84
85 |section 8
86
87#include "fpsp.h"
88
89BOUNDS1: .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
90BOUNDS2: .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
91
92L2TEN64: .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
93L10TWO1: .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
94
95L10TWO2: .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
96
97LOG10: .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
98
99LOG2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
100
101EXPA5: .long 0x3F56C16D,0x6F7BD0B2
102EXPA4: .long 0x3F811112,0x302C712C
103EXPA3: .long 0x3FA55555,0x55554CC1
104EXPA2: .long 0x3FC55555,0x55554A54
105EXPA1: .long 0x3FE00000,0x00000000,0x00000000,0x00000000
106
107HUGE: .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
108TINY: .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
109
110EXPTBL:
111 .long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
112 .long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
113 .long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
114 .long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
115 .long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
116 .long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
117 .long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
118 .long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
119 .long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
120 .long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
121 .long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
122 .long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
123 .long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
124 .long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
125 .long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
126 .long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
127 .long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
128 .long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
129 .long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
130 .long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
131 .long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
132 .long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
133 .long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
134 .long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
135 .long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
136 .long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
137 .long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
138 .long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
139 .long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
140 .long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
141 .long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
142 .long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
143 .long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
144 .long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
145 .long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
146 .long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
147 .long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
148 .long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
149 .long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
150 .long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
151 .long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
152 .long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
153 .long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
154 .long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
155 .long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
156 .long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
157 .long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
158 .long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
159 .long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
160 .long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
161 .long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
162 .long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
163 .long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
164 .long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
165 .long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
166 .long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
167 .long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
168 .long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
169 .long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
170 .long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
171 .long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
172 .long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
173 .long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
174 .long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
175
176 .set N,L_SCR1
177
178 .set X,FP_SCR1
179 .set XDCARE,X+2
180 .set XFRAC,X+4
181
182 .set ADJFACT,FP_SCR2
183
184 .set FACT1,FP_SCR3
185 .set FACT1HI,FACT1+4
186 .set FACT1LOW,FACT1+8
187
188 .set FACT2,FP_SCR4
189 .set FACT2HI,FACT2+4
190 .set FACT2LOW,FACT2+8
191
192 | xref t_unfl
193 |xref t_ovfl
194 |xref t_frcinx
195
196 .global stwotoxd
197stwotoxd:
198|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
199
200 fmovel %d1,%fpcr | ...set user's rounding mode/precision
201 fmoves #0x3F800000,%fp0 | ...RETURN 1 + X
202 movel (%a0),%d0
203 orl #0x00800001,%d0
204 fadds %d0,%fp0
205 bra t_frcinx
206
207 .global stwotox
208stwotox:
209|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
210 fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
211
212 movel (%a0),%d0
213 movew 4(%a0),%d0
214 fmovex %fp0,X(%a6)
215 andil #0x7FFFFFFF,%d0
216
217 cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)?
218 bges TWOOK1
219 bra EXPBORS
220
221TWOOK1:
222 cmpil #0x400D80C0,%d0 | ...|X| > 16480?
223 bles TWOMAIN
224 bra EXPBORS
225
226
227TWOMAIN:
228|--USUAL CASE, 2^(-70) <= |X| <= 16480
229
230 fmovex %fp0,%fp1
231 fmuls #0x42800000,%fp1 | ...64 * X
232
233 fmovel %fp1,N(%a6) | ...N = ROUND-TO-INT(64 X)
234 movel %d2,-(%sp)
235 lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
236 fmovel N(%a6),%fp1 | ...N --> FLOATING FMT
237 movel N(%a6),%d0
238 movel %d0,%d2
239 andil #0x3F,%d0 | ...D0 IS J
240 asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64)
241 addal %d0,%a1 | ...ADDRESS FOR 2^(J/64)
242 asrl #6,%d2 | ...d2 IS L, N = 64L + J
243 movel %d2,%d0
244 asrl #1,%d0 | ...D0 IS M
245 subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J
246 addil #0x3FFF,%d2
247 movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M')
248 movel (%sp)+,%d2
249|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
250|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
251|--ADJFACT = 2^(M').
252|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
253
254 fmuls #0x3C800000,%fp1 | ...(1/64)*N
255 movel (%a1)+,FACT1(%a6)
256 movel (%a1)+,FACT1HI(%a6)
257 movel (%a1)+,FACT1LOW(%a6)
258 movew (%a1)+,FACT2(%a6)
259 clrw FACT2+2(%a6)
260
261 fsubx %fp1,%fp0 | ...X - (1/64)*INT(64 X)
262
263 movew (%a1)+,FACT2HI(%a6)
264 clrw FACT2HI+2(%a6)
265 clrl FACT2LOW(%a6)
266 addw %d0,FACT1(%a6)
267
268 fmulx LOG2,%fp0 | ...FP0 IS R
269 addw %d0,FACT2(%a6)
270
271 bra expr
272
273EXPBORS:
274|--FPCR, D0 SAVED
275 cmpil #0x3FFF8000,%d0
276 bgts EXPBIG
277
278EXPSM:
279|--|X| IS SMALL, RETURN 1 + X
280
281 fmovel %d1,%FPCR |restore users exceptions
282 fadds #0x3F800000,%fp0 | ...RETURN 1 + X
283
284 bra t_frcinx
285
286EXPBIG:
287|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
288|--REGISTERS SAVE SO FAR ARE FPCR AND D0
289 movel X(%a6),%d0
290 cmpil #0,%d0
291 blts EXPNEG
292
293 bclrb #7,(%a0) |t_ovfl expects positive value
294 bra t_ovfl
295
296EXPNEG:
297 bclrb #7,(%a0) |t_unfl expects positive value
298 bra t_unfl
299
300 .global stentoxd
301stentoxd:
302|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
303
304 fmovel %d1,%fpcr | ...set user's rounding mode/precision
305 fmoves #0x3F800000,%fp0 | ...RETURN 1 + X
306 movel (%a0),%d0
307 orl #0x00800001,%d0
308 fadds %d0,%fp0
309 bra t_frcinx
310
311 .global stentox
312stentox:
313|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
314 fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
315
316 movel (%a0),%d0
317 movew 4(%a0),%d0
318 fmovex %fp0,X(%a6)
319 andil #0x7FFFFFFF,%d0
320
321 cmpil #0x3FB98000,%d0 | ...|X| >= 2**(-70)?
322 bges TENOK1
323 bra EXPBORS
324
325TENOK1:
326 cmpil #0x400B9B07,%d0 | ...|X| <= 16480*log2/log10 ?
327 bles TENMAIN
328 bra EXPBORS
329
330TENMAIN:
331|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
332
333 fmovex %fp0,%fp1
334 fmuld L2TEN64,%fp1 | ...X*64*LOG10/LOG2
335
336 fmovel %fp1,N(%a6) | ...N=INT(X*64*LOG10/LOG2)
337 movel %d2,-(%sp)
338 lea EXPTBL,%a1 | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
339 fmovel N(%a6),%fp1 | ...N --> FLOATING FMT
340 movel N(%a6),%d0
341 movel %d0,%d2
342 andil #0x3F,%d0 | ...D0 IS J
343 asll #4,%d0 | ...DISPLACEMENT FOR 2^(J/64)
344 addal %d0,%a1 | ...ADDRESS FOR 2^(J/64)
345 asrl #6,%d2 | ...d2 IS L, N = 64L + J
346 movel %d2,%d0
347 asrl #1,%d0 | ...D0 IS M
348 subl %d0,%d2 | ...d2 IS M', N = 64(M+M') + J
349 addil #0x3FFF,%d2
350 movew %d2,ADJFACT(%a6) | ...ADJFACT IS 2^(M')
351 movel (%sp)+,%d2
352
353|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
354|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
355|--ADJFACT = 2^(M').
356|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
357
358 fmovex %fp1,%fp2
359
360 fmuld L10TWO1,%fp1 | ...N*(LOG2/64LOG10)_LEAD
361 movel (%a1)+,FACT1(%a6)
362
363 fmulx L10TWO2,%fp2 | ...N*(LOG2/64LOG10)_TRAIL
364
365 movel (%a1)+,FACT1HI(%a6)
366 movel (%a1)+,FACT1LOW(%a6)
367 fsubx %fp1,%fp0 | ...X - N L_LEAD
368 movew (%a1)+,FACT2(%a6)
369
370 fsubx %fp2,%fp0 | ...X - N L_TRAIL
371
372 clrw FACT2+2(%a6)
373 movew (%a1)+,FACT2HI(%a6)
374 clrw FACT2HI+2(%a6)
375 clrl FACT2LOW(%a6)
376
377 fmulx LOG10,%fp0 | ...FP0 IS R
378
379 addw %d0,FACT1(%a6)
380 addw %d0,FACT2(%a6)
381
382expr:
383|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
384|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
385|--FP0 IS R. THE FOLLOWING CODE COMPUTES
386|-- 2**(M'+M) * 2**(J/64) * EXP(R)
387
388 fmovex %fp0,%fp1
389 fmulx %fp1,%fp1 | ...FP1 IS S = R*R
390
391 fmoved EXPA5,%fp2 | ...FP2 IS A5
392 fmoved EXPA4,%fp3 | ...FP3 IS A4
393
394 fmulx %fp1,%fp2 | ...FP2 IS S*A5
395 fmulx %fp1,%fp3 | ...FP3 IS S*A4
396
397 faddd EXPA3,%fp2 | ...FP2 IS A3+S*A5
398 faddd EXPA2,%fp3 | ...FP3 IS A2+S*A4
399
400 fmulx %fp1,%fp2 | ...FP2 IS S*(A3+S*A5)
401 fmulx %fp1,%fp3 | ...FP3 IS S*(A2+S*A4)
402
403 faddd EXPA1,%fp2 | ...FP2 IS A1+S*(A3+S*A5)
404 fmulx %fp0,%fp3 | ...FP3 IS R*S*(A2+S*A4)
405
406 fmulx %fp1,%fp2 | ...FP2 IS S*(A1+S*(A3+S*A5))
407 faddx %fp3,%fp0 | ...FP0 IS R+R*S*(A2+S*A4)
408
409 faddx %fp2,%fp0 | ...FP0 IS EXP(R) - 1
410
411
412|--FINAL RECONSTRUCTION PROCESS
413|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
414
415 fmulx FACT1(%a6),%fp0
416 faddx FACT2(%a6),%fp0
417 faddx FACT1(%a6),%fp0
418
419 fmovel %d1,%FPCR |restore users exceptions
420 clrw ADJFACT+2(%a6)
421 movel #0x80000000,ADJFACT+4(%a6)
422 clrl ADJFACT+8(%a6)
423 fmulx ADJFACT(%a6),%fp0 | ...FINAL ADJUSTMENT
424
425 bra t_frcinx
426
427 |end