blob: 17315c89ff3d07ae1c263a206dfafbe0bd531338 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*---------------------------------------------------------------------------+
2 | polynomial_Xsig.S |
3 | |
4 | Fixed point arithmetic polynomial evaluation. |
5 | |
6 | Copyright (C) 1992,1993,1994,1995 |
7 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
9 | |
10 | Call from C as: |
11 | void polynomial_Xsig(Xsig *accum, unsigned long long x, |
12 | unsigned long long terms[], int n) |
13 | |
14 | Computes: |
15 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
16 | and adds the result to the 12 byte Xsig. |
17 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
18 | precision. |
19 | |
20 | This function must be used carefully: most overflow of intermediate |
21 | results is controlled, but overflow of the result is not. |
22 | |
23 +---------------------------------------------------------------------------*/
24 .file "polynomial_Xsig.S"
25
26#include "fpu_emu.h"
27
28
29#define TERM_SIZE $8
30#define SUM_MS -20(%ebp) /* sum ms long */
31#define SUM_MIDDLE -24(%ebp) /* sum middle long */
32#define SUM_LS -28(%ebp) /* sum ls long */
33#define ACCUM_MS -4(%ebp) /* accum ms long */
34#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
35#define ACCUM_LS -12(%ebp) /* accum ls long */
36#define OVERFLOWED -16(%ebp) /* addition overflow flag */
37
38.text
39ENTRY(polynomial_Xsig)
40 pushl %ebp
41 movl %esp,%ebp
42 subl $32,%esp
43 pushl %esi
44 pushl %edi
45 pushl %ebx
46
47 movl PARAM2,%esi /* x */
48 movl PARAM3,%edi /* terms */
49
50 movl TERM_SIZE,%eax
51 mull PARAM4 /* n */
52 addl %eax,%edi
53
54 movl 4(%edi),%edx /* terms[n] */
55 movl %edx,SUM_MS
56 movl (%edi),%edx /* terms[n] */
57 movl %edx,SUM_MIDDLE
58 xor %eax,%eax
59 movl %eax,SUM_LS
60 movb %al,OVERFLOWED
61
62 subl TERM_SIZE,%edi
63 decl PARAM4
64 js L_accum_done
65
66L_accum_loop:
67 xor %eax,%eax
68 movl %eax,ACCUM_MS
69 movl %eax,ACCUM_MIDDLE
70
71 movl SUM_MIDDLE,%eax
72 mull (%esi) /* x ls long */
73 movl %edx,ACCUM_LS
74
75 movl SUM_MIDDLE,%eax
76 mull 4(%esi) /* x ms long */
77 addl %eax,ACCUM_LS
78 adcl %edx,ACCUM_MIDDLE
79 adcl $0,ACCUM_MS
80
81 movl SUM_MS,%eax
82 mull (%esi) /* x ls long */
83 addl %eax,ACCUM_LS
84 adcl %edx,ACCUM_MIDDLE
85 adcl $0,ACCUM_MS
86
87 movl SUM_MS,%eax
88 mull 4(%esi) /* x ms long */
89 addl %eax,ACCUM_MIDDLE
90 adcl %edx,ACCUM_MS
91
92 testb $0xff,OVERFLOWED
93 jz L_no_overflow
94
95 movl (%esi),%eax
96 addl %eax,ACCUM_MIDDLE
97 movl 4(%esi),%eax
98 adcl %eax,ACCUM_MS /* This could overflow too */
99
100L_no_overflow:
101
102/*
103 * Now put the sum of next term and the accumulator
104 * into the sum register
105 */
106 movl ACCUM_LS,%eax
107 addl (%edi),%eax /* term ls long */
108 movl %eax,SUM_LS
109 movl ACCUM_MIDDLE,%eax
110 adcl (%edi),%eax /* term ls long */
111 movl %eax,SUM_MIDDLE
112 movl ACCUM_MS,%eax
113 adcl 4(%edi),%eax /* term ms long */
114 movl %eax,SUM_MS
115 sbbb %al,%al
116 movb %al,OVERFLOWED /* Used in the next iteration */
117
118 subl TERM_SIZE,%edi
119 decl PARAM4
120 jns L_accum_loop
121
122L_accum_done:
123 movl PARAM1,%edi /* accum */
124 movl SUM_LS,%eax
125 addl %eax,(%edi)
126 movl SUM_MIDDLE,%eax
127 adcl %eax,4(%edi)
128 movl SUM_MS,%eax
129 adcl %eax,8(%edi)
130
131 popl %ebx
132 popl %edi
133 popl %esi
134 leave
135 ret