blob: 3b50266d72f32f458d6c00f7bc18f8d50fb89b6f [file] [log] [blame]
Daniel Dunbarfd089992009-06-26 16:47:03 +00001// This file is distributed under the University of Illinois Open Source
2// License. See LICENSE.TXT for details.
3
4// di_int __divdi3(di_int a, di_int b);
5
6// result = a / b.
7// both inputs and the output are 64-bit signed integers.
8// This will do whatever the underlying hardware is set to do on division by zero.
9// No other exceptions are generated, as the divide cannot overflow.
10//
11// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
12// on x86_64. The performance goal is ~40 cycles per divide, which is faster than
13// currently possible via simulation of integer divides on the x87 unit.
14//
15// Stephen Canon, December 2008
16
17#ifdef __i386__
18
19.text
20.align 4
21.globl ___divdi3
22___divdi3:
23
24/* This is currently implemented by wrapping the unsigned divide up in an absolute
25 value, then restoring the correct sign at the end of the computation. This could
26 certainly be improved upon. */
27
28 pushl %esi
29 movl 20(%esp), %edx // high word of b
30 movl 16(%esp), %eax // low word of b
31 movl %edx, %ecx
32 sarl $31, %ecx // (b < 0) ? -1 : 0
33 xorl %ecx, %eax
34 xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b
35 subl %ecx, %eax
36 sbbl %ecx, %edx // EDX:EAX = abs(b)
37 movl %edx, 20(%esp)
38 movl %eax, 16(%esp) // store abs(b) back to stack
39 movl %ecx, %esi // set aside sign of b
40
41 movl 12(%esp), %edx // high word of b
42 movl 8(%esp), %eax // low word of b
43 movl %edx, %ecx
44 sarl $31, %ecx // (a < 0) ? -1 : 0
45 xorl %ecx, %eax
46 xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a
47 subl %ecx, %eax
48 sbbl %ecx, %edx // EDX:EAX = abs(a)
49 movl %edx, 12(%esp)
50 movl %eax, 8(%esp) // store abs(a) back to stack
51 xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b)
52
53 pushl %ebx
54 movl 24(%esp), %ebx // Find the index i of the leading bit in b.
55 bsrl %ebx, %ecx // If the high word of b is zero, jump to
56 jz 9f // the code to handle that special case [9].
57
58 /* High word of b is known to be non-zero on this branch */
59
60 movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b
61
62 shrl %cl, %eax // Practically, this means that bhi is given by:
63 shrl %eax //
64 notl %ecx // bhi = (high word of b) << (31 - i) |
65 shll %cl, %ebx // (low word of b) >> (1 + i)
66 orl %eax, %ebx //
67 movl 16(%esp), %edx // Load the high and low words of a, and jump
68 movl 12(%esp), %eax // to [1] if the high word is larger than bhi
69 cmpl %ebx, %edx // to avoid overflowing the upcoming divide.
70 jae 1f
71
72 /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
73
74 divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
75
76 pushl %edi
77 notl %ecx
78 shrl %eax
79 shrl %cl, %eax // q = qs >> (1 + i)
80 movl %eax, %edi
81 mull 24(%esp) // q*blo
82 movl 16(%esp), %ebx
83 movl 20(%esp), %ecx // ECX:EBX = a
84 subl %eax, %ebx
85 sbbl %edx, %ecx // ECX:EBX = a - q*blo
86 movl 28(%esp), %eax
87 imull %edi, %eax // q*bhi
88 subl %eax, %ecx // ECX:EBX = a - q*b
89 sbbl $0, %edi // decrement q if remainder is negative
90 xorl %edx, %edx
91 movl %edi, %eax
92
93 addl %esi, %eax // Restore correct sign to result
94 adcl %esi, %edx
95 xorl %esi, %eax
96 xorl %esi, %edx
97 popl %edi // Restore callee-save registers
98 popl %ebx
99 popl %esi
100 retl // Return
101
102
1031: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
104
105 subl %ebx, %edx // subtract bhi from ahi so that divide will not
106 divl %ebx // overflow, and find q and r such that
107 //
108 // ahi:alo = (1:q)*bhi + r
109 //
110 // Note that q is a number in (31-i).(1+i)
111 // fix point.
112
113 pushl %edi
114 notl %ecx
115 shrl %eax
116 orl $0x80000000, %eax
117 shrl %cl, %eax // q = (1:qs) >> (1 + i)
118 movl %eax, %edi
119 mull 24(%esp) // q*blo
120 movl 16(%esp), %ebx
121 movl 20(%esp), %ecx // ECX:EBX = a
122 subl %eax, %ebx
123 sbbl %edx, %ecx // ECX:EBX = a - q*blo
124 movl 28(%esp), %eax
125 imull %edi, %eax // q*bhi
126 subl %eax, %ecx // ECX:EBX = a - q*b
127 sbbl $0, %edi // decrement q if remainder is negative
128 xorl %edx, %edx
129 movl %edi, %eax
130
131 addl %esi, %eax // Restore correct sign to result
132 adcl %esi, %edx
133 xorl %esi, %eax
134 xorl %esi, %edx
135 popl %edi // Restore callee-save registers
136 popl %ebx
137 popl %esi
138 retl // Return
139
140
1419: /* High word of b is zero on this branch */
142
143 movl 16(%esp), %eax // Find qhi and rhi such that
144 movl 20(%esp), %ecx //
145 xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b
146 divl %ecx //
147 movl %eax, %ebx //
148 movl 12(%esp), %eax // Find qlo such that
149 divl %ecx //
150 movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b
151
152 addl %esi, %eax // Restore correct sign to result
153 adcl %esi, %edx
154 xorl %esi, %eax
155 xorl %esi, %edx
156 popl %ebx // Restore callee-save registers
157 popl %esi
158 retl // Return
159
160#endif // __i386__