blob: 604f0b2d17e89c345072caaa89092ee90464bc2b [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001/* SPDX-License-Identifier: GPL-2.0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*---------------------------------------------------------------------------+
3 | polynomial_Xsig.S |
4 | |
5 | Fixed point arithmetic polynomial evaluation. |
6 | |
7 | Copyright (C) 1992,1993,1994,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | Call from C as: |
12 | void polynomial_Xsig(Xsig *accum, unsigned long long x, |
13 | unsigned long long terms[], int n) |
14 | |
15 | Computes: |
16 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
17 | and adds the result to the 12 byte Xsig. |
18 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
19 | precision. |
20 | |
21 | This function must be used carefully: most overflow of intermediate |
22 | results is controlled, but overflow of the result is not. |
23 | |
24 +---------------------------------------------------------------------------*/
25 .file "polynomial_Xsig.S"
26
27#include "fpu_emu.h"
28
29
30#define TERM_SIZE $8
31#define SUM_MS -20(%ebp) /* sum ms long */
32#define SUM_MIDDLE -24(%ebp) /* sum middle long */
33#define SUM_LS -28(%ebp) /* sum ls long */
34#define ACCUM_MS -4(%ebp) /* accum ms long */
35#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
36#define ACCUM_LS -12(%ebp) /* accum ls long */
37#define OVERFLOWED -16(%ebp) /* addition overflow flag */
38
39.text
40ENTRY(polynomial_Xsig)
41 pushl %ebp
42 movl %esp,%ebp
43 subl $32,%esp
44 pushl %esi
45 pushl %edi
46 pushl %ebx
47
48 movl PARAM2,%esi /* x */
49 movl PARAM3,%edi /* terms */
50
51 movl TERM_SIZE,%eax
52 mull PARAM4 /* n */
53 addl %eax,%edi
54
55 movl 4(%edi),%edx /* terms[n] */
56 movl %edx,SUM_MS
57 movl (%edi),%edx /* terms[n] */
58 movl %edx,SUM_MIDDLE
59 xor %eax,%eax
60 movl %eax,SUM_LS
61 movb %al,OVERFLOWED
62
63 subl TERM_SIZE,%edi
64 decl PARAM4
65 js L_accum_done
66
67L_accum_loop:
68 xor %eax,%eax
69 movl %eax,ACCUM_MS
70 movl %eax,ACCUM_MIDDLE
71
72 movl SUM_MIDDLE,%eax
73 mull (%esi) /* x ls long */
74 movl %edx,ACCUM_LS
75
76 movl SUM_MIDDLE,%eax
77 mull 4(%esi) /* x ms long */
78 addl %eax,ACCUM_LS
79 adcl %edx,ACCUM_MIDDLE
80 adcl $0,ACCUM_MS
81
82 movl SUM_MS,%eax
83 mull (%esi) /* x ls long */
84 addl %eax,ACCUM_LS
85 adcl %edx,ACCUM_MIDDLE
86 adcl $0,ACCUM_MS
87
88 movl SUM_MS,%eax
89 mull 4(%esi) /* x ms long */
90 addl %eax,ACCUM_MIDDLE
91 adcl %edx,ACCUM_MS
92
93 testb $0xff,OVERFLOWED
94 jz L_no_overflow
95
96 movl (%esi),%eax
97 addl %eax,ACCUM_MIDDLE
98 movl 4(%esi),%eax
99 adcl %eax,ACCUM_MS /* This could overflow too */
100
101L_no_overflow:
102
103/*
104 * Now put the sum of next term and the accumulator
105 * into the sum register
106 */
107 movl ACCUM_LS,%eax
108 addl (%edi),%eax /* term ls long */
109 movl %eax,SUM_LS
110 movl ACCUM_MIDDLE,%eax
111 adcl (%edi),%eax /* term ls long */
112 movl %eax,SUM_MIDDLE
113 movl ACCUM_MS,%eax
114 adcl 4(%edi),%eax /* term ms long */
115 movl %eax,SUM_MS
116 sbbb %al,%al
117 movb %al,OVERFLOWED /* Used in the next iteration */
118
119 subl TERM_SIZE,%edi
120 decl PARAM4
121 jns L_accum_loop
122
123L_accum_done:
124 movl PARAM1,%edi /* accum */
125 movl SUM_LS,%eax
126 addl %eax,(%edi)
127 movl SUM_MIDDLE,%eax
128 adcl %eax,4(%edi)
129 movl SUM_MS,%eax
130 adcl %eax,8(%edi)
131
132 popl %ebx
133 popl %edi
134 popl %esi
135 leave
136 ret
Jiri Slabybd6be572017-08-24 10:06:23 +0200137ENDPROC(polynomial_Xsig)