blob: 51842831798599018d62aa13e8553ac559a73827 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001 .file "wm_shrx.S"
2/*---------------------------------------------------------------------------+
3 | wm_shrx.S |
4 | |
5 | 64 bit right shift functions |
6 | |
7 | Copyright (C) 1992,1995 |
8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9 | Australia. E-mail billm@jacobi.maths.monash.edu.au |
10 | |
11 | Call from C as: |
12 | unsigned FPU_shrx(void *arg1, unsigned arg2) |
13 | and |
14 | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
15 | |
16 +---------------------------------------------------------------------------*/
17
18#include "fpu_emu.h"
19
20.text
21/*---------------------------------------------------------------------------+
22 | unsigned FPU_shrx(void *arg1, unsigned arg2) |
23 | |
24 | Extended shift right function. |
25 | Fastest for small shifts. |
26 | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
27 | right by the number of bits specified by the second arg (arg2). |
28 | Forms a 96 bit quantity from the 64 bit arg and eax: |
29 | [ 64 bit arg ][ eax ] |
30 | shift right ---------> |
31 | The eax register is initialized to 0 before the shifting. |
32 | Results returned in the 64 bit arg and eax. |
33 +---------------------------------------------------------------------------*/
34
35ENTRY(FPU_shrx)
36 push %ebp
37 movl %esp,%ebp
38 pushl %esi
39 movl PARAM2,%ecx
40 movl PARAM1,%esi
41 cmpl $32,%ecx /* shrd only works for 0..31 bits */
42 jnc L_more_than_31
43
44/* less than 32 bits */
45 pushl %ebx
46 movl (%esi),%ebx /* lsl */
47 movl 4(%esi),%edx /* msl */
48 xorl %eax,%eax /* extension */
49 shrd %cl,%ebx,%eax
50 shrd %cl,%edx,%ebx
51 shr %cl,%edx
52 movl %ebx,(%esi)
53 movl %edx,4(%esi)
54 popl %ebx
55 popl %esi
56 leave
57 ret
58
59L_more_than_31:
60 cmpl $64,%ecx
61 jnc L_more_than_63
62
63 subb $32,%cl
64 movl (%esi),%eax /* lsl */
65 movl 4(%esi),%edx /* msl */
66 shrd %cl,%edx,%eax
67 shr %cl,%edx
68 movl %edx,(%esi)
69 movl $0,4(%esi)
70 popl %esi
71 leave
72 ret
73
74L_more_than_63:
75 cmpl $96,%ecx
76 jnc L_more_than_95
77
78 subb $64,%cl
79 movl 4(%esi),%eax /* msl */
80 shr %cl,%eax
81 xorl %edx,%edx
82 movl %edx,(%esi)
83 movl %edx,4(%esi)
84 popl %esi
85 leave
86 ret
87
88L_more_than_95:
89 xorl %eax,%eax
90 movl %eax,(%esi)
91 movl %eax,4(%esi)
92 popl %esi
93 leave
94 ret
95
96
97/*---------------------------------------------------------------------------+
98 | unsigned FPU_shrxs(void *arg1, unsigned arg2) |
99 | |
100 | Extended shift right function (optimized for small floating point |
101 | integers). |
102 | Shifts the 64 bit quantity pointed to by the first arg (arg1) |
103 | right by the number of bits specified by the second arg (arg2). |
104 | Forms a 96 bit quantity from the 64 bit arg and eax: |
105 | [ 64 bit arg ][ eax ] |
106 | shift right ---------> |
107 | The eax register is initialized to 0 before the shifting. |
108 | The lower 8 bits of eax are lost and replaced by a flag which is |
109 | set (to 0x01) if any bit, apart from the first one, is set in the |
110 | part which has been shifted out of the arg. |
111 | Results returned in the 64 bit arg and eax. |
112 +---------------------------------------------------------------------------*/
113ENTRY(FPU_shrxs)
114 push %ebp
115 movl %esp,%ebp
116 pushl %esi
117 pushl %ebx
118 movl PARAM2,%ecx
119 movl PARAM1,%esi
120 cmpl $64,%ecx /* shrd only works for 0..31 bits */
121 jnc Ls_more_than_63
122
123 cmpl $32,%ecx /* shrd only works for 0..31 bits */
124 jc Ls_less_than_32
125
126/* We got here without jumps by assuming that the most common requirement
127 is for small integers */
128/* Shift by [32..63] bits */
129 subb $32,%cl
130 movl (%esi),%eax /* lsl */
131 movl 4(%esi),%edx /* msl */
132 xorl %ebx,%ebx
133 shrd %cl,%eax,%ebx
134 shrd %cl,%edx,%eax
135 shr %cl,%edx
136 orl %ebx,%ebx /* test these 32 bits */
137 setne %bl
138 test $0x7fffffff,%eax /* and 31 bits here */
139 setne %bh
140 orw %bx,%bx /* Any of the 63 bit set ? */
141 setne %al
142 movl %edx,(%esi)
143 movl $0,4(%esi)
144 popl %ebx
145 popl %esi
146 leave
147 ret
148
149/* Shift by [0..31] bits */
150Ls_less_than_32:
151 movl (%esi),%ebx /* lsl */
152 movl 4(%esi),%edx /* msl */
153 xorl %eax,%eax /* extension */
154 shrd %cl,%ebx,%eax
155 shrd %cl,%edx,%ebx
156 shr %cl,%edx
157 test $0x7fffffff,%eax /* only need to look at eax here */
158 setne %al
159 movl %ebx,(%esi)
160 movl %edx,4(%esi)
161 popl %ebx
162 popl %esi
163 leave
164 ret
165
166/* Shift by [64..95] bits */
167Ls_more_than_63:
168 cmpl $96,%ecx
169 jnc Ls_more_than_95
170
171 subb $64,%cl
172 movl (%esi),%ebx /* lsl */
173 movl 4(%esi),%eax /* msl */
174 xorl %edx,%edx /* extension */
175 shrd %cl,%ebx,%edx
176 shrd %cl,%eax,%ebx
177 shr %cl,%eax
178 orl %ebx,%edx
179 setne %bl
180 test $0x7fffffff,%eax /* only need to look at eax here */
181 setne %bh
182 orw %bx,%bx
183 setne %al
184 xorl %edx,%edx
185 movl %edx,(%esi) /* set to zero */
186 movl %edx,4(%esi) /* set to zero */
187 popl %ebx
188 popl %esi
189 leave
190 ret
191
192Ls_more_than_95:
193/* Shift by [96..inf) bits */
194 xorl %eax,%eax
195 movl (%esi),%ebx
196 orl 4(%esi),%ebx
197 setne %al
198 xorl %ebx,%ebx
199 movl %ebx,(%esi)
200 movl %ebx,4(%esi)
201 popl %ebx
202 popl %esi
203 leave
204 ret