blob: d258f59564e11b71a37148873bbf61b41d3c4b82 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001 .file "wm_sqrt.S"
2/*---------------------------------------------------------------------------+
3 | wm_sqrt.S |
4 | |
5 | Fixed point arithmetic square root evaluation. |
6 | |
7 | Copyright (C) 1992,1993,1995,1997 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@suburbia.net |
10 | |
11 | Call from C as: |
12 | int wm_sqrt(FPU_REG *n, unsigned int control_word) |
13 | |
14 +---------------------------------------------------------------------------*/
15
16/*---------------------------------------------------------------------------+
17 | wm_sqrt(FPU_REG *n, unsigned int control_word) |
18 | returns the square root of n in n. |
19 | |
20 | Use Newton's method to compute the square root of a number, which must |
21 | be in the range [1.0 .. 4.0), to 64 bits accuracy. |
22 | Does not check the sign or tag of the argument. |
23 | Sets the exponent, but not the sign or tag of the result. |
24 | |
25 | The guess is kept in %esi:%edi |
26 +---------------------------------------------------------------------------*/
27
28#include "exception.h"
29#include "fpu_emu.h"
30
31
32#ifndef NON_REENTRANT_FPU
33/* Local storage on the stack: */
34#define FPU_accum_3 -4(%ebp) /* ms word */
35#define FPU_accum_2 -8(%ebp)
36#define FPU_accum_1 -12(%ebp)
37#define FPU_accum_0 -16(%ebp)
38
39/*
40 * The de-normalised argument:
41 * sq_2 sq_1 sq_0
42 * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
43 * ^ binary point here
44 */
45#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
46#define FPU_fsqrt_arg_1 -24(%ebp)
47#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
48
49#else
50/* Local storage in a static area: */
51.data
52 .align 4,0
53FPU_accum_3:
54 .long 0 /* ms word */
55FPU_accum_2:
56 .long 0
57FPU_accum_1:
58 .long 0
59FPU_accum_0:
60 .long 0
61
62/* The de-normalised argument:
63 sq_2 sq_1 sq_0
64 b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
65 ^ binary point here
66 */
67FPU_fsqrt_arg_2:
68 .long 0 /* ms word */
69FPU_fsqrt_arg_1:
70 .long 0
71FPU_fsqrt_arg_0:
72 .long 0 /* ls word, at most the ms bit is set */
73#endif /* NON_REENTRANT_FPU */
74
75
76.text
77ENTRY(wm_sqrt)
78 pushl %ebp
79 movl %esp,%ebp
80#ifndef NON_REENTRANT_FPU
81 subl $28,%esp
82#endif /* NON_REENTRANT_FPU */
83 pushl %esi
84 pushl %edi
85 pushl %ebx
86
87 movl PARAM1,%esi
88
89 movl SIGH(%esi),%eax
90 movl SIGL(%esi),%ecx
91 xorl %edx,%edx
92
93/* We use a rough linear estimate for the first guess.. */
94
95 cmpw EXP_BIAS,EXP(%esi)
96 jnz sqrt_arg_ge_2
97
98 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
99 rcrl $1,%ecx
100 rcrl $1,%edx
101
102sqrt_arg_ge_2:
103/* From here on, n is never accessed directly again until it is
104 replaced by the answer. */
105
106 movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
107 movl %ecx,FPU_fsqrt_arg_1
108 movl %edx,FPU_fsqrt_arg_0
109
110/* Make a linear first estimate */
111 shrl $1,%eax
112 addl $0x40000000,%eax
113 movl $0xaaaaaaaa,%ecx
114 mull %ecx
115 shll %edx /* max result was 7fff... */
116 testl $0x80000000,%edx /* but min was 3fff... */
117 jnz sqrt_prelim_no_adjust
118
119 movl $0x80000000,%edx /* round up */
120
121sqrt_prelim_no_adjust:
122 movl %edx,%esi /* Our first guess */
123
124/* We have now computed (approx) (2 + x) / 3, which forms the basis
125 for a few iterations of Newton's method */
126
127 movl FPU_fsqrt_arg_2,%ecx /* ms word */
128
129/*
130 * From our initial estimate, three iterations are enough to get us
131 * to 30 bits or so. This will then allow two iterations at better
132 * precision to complete the process.
133 */
134
135/* Compute (g + n/g)/2 at each iteration (g is the guess). */
136 shrl %ecx /* Doing this first will prevent a divide */
137 /* overflow later. */
138
139 movl %ecx,%edx /* msw of the arg / 2 */
140 divl %esi /* current estimate */
141 shrl %esi /* divide by 2 */
142 addl %eax,%esi /* the new estimate */
143
144 movl %ecx,%edx
145 divl %esi
146 shrl %esi
147 addl %eax,%esi
148
149 movl %ecx,%edx
150 divl %esi
151 shrl %esi
152 addl %eax,%esi
153
154/*
155 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
156 * we improve it to 60 bits or so.
157 *
158 * The strategy from now on is to compute new estimates from
159 * guess := guess + (n - guess^2) / (2 * guess)
160 */
161
162/* First, find the square of the guess */
163 movl %esi,%eax
164 mull %esi
165/* guess^2 now in %edx:%eax */
166
167 movl FPU_fsqrt_arg_1,%ecx
168 subl %ecx,%eax
169 movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
170 sbbl %ecx,%edx
171 jnc sqrt_stage_2_positive
172
173/* Subtraction gives a negative result,
174 negate the result before division. */
175 notl %edx
176 notl %eax
177 addl $1,%eax
178 adcl $0,%edx
179
180 divl %esi
181 movl %eax,%ecx
182
183 movl %edx,%eax
184 divl %esi
185 jmp sqrt_stage_2_finish
186
187sqrt_stage_2_positive:
188 divl %esi
189 movl %eax,%ecx
190
191 movl %edx,%eax
192 divl %esi
193
194 notl %ecx
195 notl %eax
196 addl $1,%eax
197 adcl $0,%ecx
198
199sqrt_stage_2_finish:
200 sarl $1,%ecx /* divide by 2 */
201 rcrl $1,%eax
202
203 /* Form the new estimate in %esi:%edi */
204 movl %eax,%edi
205 addl %ecx,%esi
206
207 jnz sqrt_stage_2_done /* result should be [1..2) */
208
209#ifdef PARANOID
210/* It should be possible to get here only if the arg is ffff....ffff */
211 cmp $0xffffffff,FPU_fsqrt_arg_1
212 jnz sqrt_stage_2_error
213#endif /* PARANOID */
214
215/* The best rounded result. */
216 xorl %eax,%eax
217 decl %eax
218 movl %eax,%edi
219 movl %eax,%esi
220 movl $0x7fffffff,%eax
221 jmp sqrt_round_result
222
223#ifdef PARANOID
224sqrt_stage_2_error:
225 pushl EX_INTERNAL|0x213
226 call EXCEPTION
227#endif /* PARANOID */
228
229sqrt_stage_2_done:
230
231/* Now the square root has been computed to better than 60 bits. */
232
233/* Find the square of the guess. */
234 movl %edi,%eax /* ls word of guess */
235 mull %edi
236 movl %edx,FPU_accum_1
237
238 movl %esi,%eax
239 mull %esi
240 movl %edx,FPU_accum_3
241 movl %eax,FPU_accum_2
242
243 movl %edi,%eax
244 mull %esi
245 addl %eax,FPU_accum_1
246 adcl %edx,FPU_accum_2
247 adcl $0,FPU_accum_3
248
249/* movl %esi,%eax */
250/* mull %edi */
251 addl %eax,FPU_accum_1
252 adcl %edx,FPU_accum_2
253 adcl $0,FPU_accum_3
254
255/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
256
257 movl FPU_fsqrt_arg_0,%eax /* get normalized n */
258 subl %eax,FPU_accum_1
259 movl FPU_fsqrt_arg_1,%eax
260 sbbl %eax,FPU_accum_2
261 movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
262 sbbl %eax,FPU_accum_3
263 jnc sqrt_stage_3_positive
264
265/* Subtraction gives a negative result,
266 negate the result before division */
267 notl FPU_accum_1
268 notl FPU_accum_2
269 notl FPU_accum_3
270 addl $1,FPU_accum_1
271 adcl $0,FPU_accum_2
272
273#ifdef PARANOID
274 adcl $0,FPU_accum_3 /* This must be zero */
275 jz sqrt_stage_3_no_error
276
277sqrt_stage_3_error:
278 pushl EX_INTERNAL|0x207
279 call EXCEPTION
280
281sqrt_stage_3_no_error:
282#endif /* PARANOID */
283
284 movl FPU_accum_2,%edx
285 movl FPU_accum_1,%eax
286 divl %esi
287 movl %eax,%ecx
288
289 movl %edx,%eax
290 divl %esi
291
292 sarl $1,%ecx /* divide by 2 */
293 rcrl $1,%eax
294
295 /* prepare to round the result */
296
297 addl %ecx,%edi
298 adcl $0,%esi
299
300 jmp sqrt_stage_3_finished
301
302sqrt_stage_3_positive:
303 movl FPU_accum_2,%edx
304 movl FPU_accum_1,%eax
305 divl %esi
306 movl %eax,%ecx
307
308 movl %edx,%eax
309 divl %esi
310
311 sarl $1,%ecx /* divide by 2 */
312 rcrl $1,%eax
313
314 /* prepare to round the result */
315
316 notl %eax /* Negate the correction term */
317 notl %ecx
318 addl $1,%eax
319 adcl $0,%ecx /* carry here ==> correction == 0 */
320 adcl $0xffffffff,%esi
321
322 addl %ecx,%edi
323 adcl $0,%esi
324
325sqrt_stage_3_finished:
326
327/*
328 * The result in %esi:%edi:%esi should be good to about 90 bits here,
329 * and the rounding information here does not have sufficient accuracy
330 * in a few rare cases.
331 */
332 cmpl $0xffffffe0,%eax
333 ja sqrt_near_exact_x
334
335 cmpl $0x00000020,%eax
336 jb sqrt_near_exact
337
338 cmpl $0x7fffffe0,%eax
339 jb sqrt_round_result
340
341 cmpl $0x80000020,%eax
342 jb sqrt_get_more_precision
343
344sqrt_round_result:
345/* Set up for rounding operations */
346 movl %eax,%edx
347 movl %esi,%eax
348 movl %edi,%ebx
349 movl PARAM1,%edi
350 movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
351 jmp fpu_reg_round
352
353
354sqrt_near_exact_x:
355/* First, the estimate must be rounded up. */
356 addl $1,%edi
357 adcl $0,%esi
358
359sqrt_near_exact:
360/*
361 * This is an easy case because x^1/2 is monotonic.
362 * We need just find the square of our estimate, compare it
363 * with the argument, and deduce whether our estimate is
364 * above, below, or exact. We use the fact that the estimate
365 * is known to be accurate to about 90 bits.
366 */
367 movl %edi,%eax /* ls word of guess */
368 mull %edi
369 movl %edx,%ebx /* 2nd ls word of square */
370 movl %eax,%ecx /* ls word of square */
371
372 movl %edi,%eax
373 mull %esi
374 addl %eax,%ebx
375 addl %eax,%ebx
376
377#ifdef PARANOID
378 cmp $0xffffffb0,%ebx
379 jb sqrt_near_exact_ok
380
381 cmp $0x00000050,%ebx
382 ja sqrt_near_exact_ok
383
384 pushl EX_INTERNAL|0x214
385 call EXCEPTION
386
387sqrt_near_exact_ok:
388#endif /* PARANOID */
389
390 or %ebx,%ebx
391 js sqrt_near_exact_small
392
393 jnz sqrt_near_exact_large
394
395 or %ebx,%edx
396 jnz sqrt_near_exact_large
397
398/* Our estimate is exactly the right answer */
399 xorl %eax,%eax
400 jmp sqrt_round_result
401
402sqrt_near_exact_small:
403/* Our estimate is too small */
404 movl $0x000000ff,%eax
405 jmp sqrt_round_result
406
407sqrt_near_exact_large:
408/* Our estimate is too large, we need to decrement it */
409 subl $1,%edi
410 sbbl $0,%esi
411 movl $0xffffff00,%eax
412 jmp sqrt_round_result
413
414
415sqrt_get_more_precision:
416/* This case is almost the same as the above, except we start
417 with an extra bit of precision in the estimate. */
418 stc /* The extra bit. */
419 rcll $1,%edi /* Shift the estimate left one bit */
420 rcll $1,%esi
421
422 movl %edi,%eax /* ls word of guess */
423 mull %edi
424 movl %edx,%ebx /* 2nd ls word of square */
425 movl %eax,%ecx /* ls word of square */
426
427 movl %edi,%eax
428 mull %esi
429 addl %eax,%ebx
430 addl %eax,%ebx
431
432/* Put our estimate back to its original value */
433 stc /* The ms bit. */
434 rcrl $1,%esi /* Shift the estimate left one bit */
435 rcrl $1,%edi
436
437#ifdef PARANOID
438 cmp $0xffffff60,%ebx
439 jb sqrt_more_prec_ok
440
441 cmp $0x000000a0,%ebx
442 ja sqrt_more_prec_ok
443
444 pushl EX_INTERNAL|0x215
445 call EXCEPTION
446
447sqrt_more_prec_ok:
448#endif /* PARANOID */
449
450 or %ebx,%ebx
451 js sqrt_more_prec_small
452
453 jnz sqrt_more_prec_large
454
455 or %ebx,%ecx
456 jnz sqrt_more_prec_large
457
458/* Our estimate is exactly the right answer */
459 movl $0x80000000,%eax
460 jmp sqrt_round_result
461
462sqrt_more_prec_small:
463/* Our estimate is too small */
464 movl $0x800000ff,%eax
465 jmp sqrt_round_result
466
467sqrt_more_prec_large:
468/* Our estimate is too large */
469 movl $0x7fffff00,%eax
470 jmp sqrt_round_result