blob: 1ce22da678fed71f14767b96b22243801907354f [file] [log] [blame]
Daniel Dunbarfd089992009-06-26 16:47:03 +00001// This file is distributed under the University of Illinois Open Source
2// License. See LICENSE.TXT for details.
3
Daniel Dunbar7d504782009-10-27 17:49:50 +00004#include "../assembly.h"
5
Daniel Dunbarfd089992009-06-26 16:47:03 +00006// di_int __divdi3(di_int a, di_int b);
7
8// result = a / b.
9// both inputs and the output are 64-bit signed integers.
10// This will do whatever the underlying hardware is set to do on division by zero.
11// No other exceptions are generated, as the divide cannot overflow.
12//
13// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
14// on x86_64. The performance goal is ~40 cycles per divide, which is faster than
15// currently possible via simulation of integer divides on the x87 unit.
16//
17// Stephen Canon, December 2008
18
19#ifdef __i386__
20
21.text
22.align 4
23.globl ___divdi3
24___divdi3:
25
26/* This is currently implemented by wrapping the unsigned divide up in an absolute
27 value, then restoring the correct sign at the end of the computation. This could
28 certainly be improved upon. */
29
30 pushl %esi
31 movl 20(%esp), %edx // high word of b
32 movl 16(%esp), %eax // low word of b
33 movl %edx, %ecx
34 sarl $31, %ecx // (b < 0) ? -1 : 0
35 xorl %ecx, %eax
36 xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b
37 subl %ecx, %eax
38 sbbl %ecx, %edx // EDX:EAX = abs(b)
39 movl %edx, 20(%esp)
40 movl %eax, 16(%esp) // store abs(b) back to stack
41 movl %ecx, %esi // set aside sign of b
42
43 movl 12(%esp), %edx // high word of b
44 movl 8(%esp), %eax // low word of b
45 movl %edx, %ecx
46 sarl $31, %ecx // (a < 0) ? -1 : 0
47 xorl %ecx, %eax
48 xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a
49 subl %ecx, %eax
50 sbbl %ecx, %edx // EDX:EAX = abs(a)
51 movl %edx, 12(%esp)
52 movl %eax, 8(%esp) // store abs(a) back to stack
53 xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b)
54
55 pushl %ebx
56 movl 24(%esp), %ebx // Find the index i of the leading bit in b.
57 bsrl %ebx, %ecx // If the high word of b is zero, jump to
58 jz 9f // the code to handle that special case [9].
59
60 /* High word of b is known to be non-zero on this branch */
61
62 movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b
63
64 shrl %cl, %eax // Practically, this means that bhi is given by:
65 shrl %eax //
66 notl %ecx // bhi = (high word of b) << (31 - i) |
67 shll %cl, %ebx // (low word of b) >> (1 + i)
68 orl %eax, %ebx //
69 movl 16(%esp), %edx // Load the high and low words of a, and jump
70 movl 12(%esp), %eax // to [1] if the high word is larger than bhi
71 cmpl %ebx, %edx // to avoid overflowing the upcoming divide.
72 jae 1f
73
74 /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
75
76 divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
77
78 pushl %edi
79 notl %ecx
80 shrl %eax
81 shrl %cl, %eax // q = qs >> (1 + i)
82 movl %eax, %edi
83 mull 24(%esp) // q*blo
84 movl 16(%esp), %ebx
85 movl 20(%esp), %ecx // ECX:EBX = a
86 subl %eax, %ebx
87 sbbl %edx, %ecx // ECX:EBX = a - q*blo
88 movl 28(%esp), %eax
89 imull %edi, %eax // q*bhi
90 subl %eax, %ecx // ECX:EBX = a - q*b
91 sbbl $0, %edi // decrement q if remainder is negative
92 xorl %edx, %edx
93 movl %edi, %eax
94
95 addl %esi, %eax // Restore correct sign to result
96 adcl %esi, %edx
97 xorl %esi, %eax
98 xorl %esi, %edx
99 popl %edi // Restore callee-save registers
100 popl %ebx
101 popl %esi
102 retl // Return
103
104
1051: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
106
107 subl %ebx, %edx // subtract bhi from ahi so that divide will not
108 divl %ebx // overflow, and find q and r such that
109 //
110 // ahi:alo = (1:q)*bhi + r
111 //
112 // Note that q is a number in (31-i).(1+i)
113 // fix point.
114
115 pushl %edi
116 notl %ecx
117 shrl %eax
118 orl $0x80000000, %eax
119 shrl %cl, %eax // q = (1:qs) >> (1 + i)
120 movl %eax, %edi
121 mull 24(%esp) // q*blo
122 movl 16(%esp), %ebx
123 movl 20(%esp), %ecx // ECX:EBX = a
124 subl %eax, %ebx
125 sbbl %edx, %ecx // ECX:EBX = a - q*blo
126 movl 28(%esp), %eax
127 imull %edi, %eax // q*bhi
128 subl %eax, %ecx // ECX:EBX = a - q*b
129 sbbl $0, %edi // decrement q if remainder is negative
130 xorl %edx, %edx
131 movl %edi, %eax
132
133 addl %esi, %eax // Restore correct sign to result
134 adcl %esi, %edx
135 xorl %esi, %eax
136 xorl %esi, %edx
137 popl %edi // Restore callee-save registers
138 popl %ebx
139 popl %esi
140 retl // Return
141
142
1439: /* High word of b is zero on this branch */
144
145 movl 16(%esp), %eax // Find qhi and rhi such that
146 movl 20(%esp), %ecx //
147 xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b
148 divl %ecx //
149 movl %eax, %ebx //
150 movl 12(%esp), %eax // Find qlo such that
151 divl %ecx //
152 movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b
153
154 addl %esi, %eax // Restore correct sign to result
155 adcl %esi, %edx
156 xorl %esi, %eax
157 xorl %esi, %edx
158 popl %ebx // Restore callee-save registers
159 popl %esi
160 retl // Return
161
162#endif // __i386__