blob: 69593e32e9e39b919d93f447020ae653da74cf89 [file] [log] [blame]
Howard Hinnant9ad441f2010-11-16 22:13:33 +00001// This file is dual licensed under the MIT and the University of Illinois Open
2// Source Licenses. See LICENSE.TXT for details.
Daniel Dunbarb3a69012009-06-26 16:47:03 +00003
Daniel Dunbar19336a22009-10-27 17:49:50 +00004#include "../assembly.h"
5
Daniel Dunbarb3a69012009-06-26 16:47:03 +00006// di_int __divdi3(di_int a, di_int b);
7
8// result = a / b.
9// both inputs and the output are 64-bit signed integers.
10// This will do whatever the underlying hardware is set to do on division by zero.
11// No other exceptions are generated, as the divide cannot overflow.
12//
13// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
14// on x86_64. The performance goal is ~40 cycles per divide, which is faster than
15// currently possible via simulation of integer divides on the x87 unit.
16//
17// Stephen Canon, December 2008
18
19#ifdef __i386__
20
21.text
22.align 4
Daniel Dunbarb4b1e8c2009-10-27 17:50:21 +000023DEFINE_COMPILERRT_FUNCTION(__divdi3)
Daniel Dunbarb3a69012009-06-26 16:47:03 +000024
25/* This is currently implemented by wrapping the unsigned divide up in an absolute
26 value, then restoring the correct sign at the end of the computation. This could
27 certainly be improved upon. */
28
29 pushl %esi
30 movl 20(%esp), %edx // high word of b
31 movl 16(%esp), %eax // low word of b
32 movl %edx, %ecx
33 sarl $31, %ecx // (b < 0) ? -1 : 0
34 xorl %ecx, %eax
35 xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b
36 subl %ecx, %eax
37 sbbl %ecx, %edx // EDX:EAX = abs(b)
38 movl %edx, 20(%esp)
39 movl %eax, 16(%esp) // store abs(b) back to stack
40 movl %ecx, %esi // set aside sign of b
41
42 movl 12(%esp), %edx // high word of b
43 movl 8(%esp), %eax // low word of b
44 movl %edx, %ecx
45 sarl $31, %ecx // (a < 0) ? -1 : 0
46 xorl %ecx, %eax
47 xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a
48 subl %ecx, %eax
49 sbbl %ecx, %edx // EDX:EAX = abs(a)
50 movl %edx, 12(%esp)
51 movl %eax, 8(%esp) // store abs(a) back to stack
52 xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b)
53
54 pushl %ebx
55 movl 24(%esp), %ebx // Find the index i of the leading bit in b.
56 bsrl %ebx, %ecx // If the high word of b is zero, jump to
57 jz 9f // the code to handle that special case [9].
58
59 /* High word of b is known to be non-zero on this branch */
60
61 movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b
62
63 shrl %cl, %eax // Practically, this means that bhi is given by:
64 shrl %eax //
65 notl %ecx // bhi = (high word of b) << (31 - i) |
66 shll %cl, %ebx // (low word of b) >> (1 + i)
67 orl %eax, %ebx //
68 movl 16(%esp), %edx // Load the high and low words of a, and jump
69 movl 12(%esp), %eax // to [1] if the high word is larger than bhi
70 cmpl %ebx, %edx // to avoid overflowing the upcoming divide.
71 jae 1f
72
73 /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
74
75 divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
76
77 pushl %edi
78 notl %ecx
79 shrl %eax
80 shrl %cl, %eax // q = qs >> (1 + i)
81 movl %eax, %edi
82 mull 24(%esp) // q*blo
83 movl 16(%esp), %ebx
84 movl 20(%esp), %ecx // ECX:EBX = a
85 subl %eax, %ebx
86 sbbl %edx, %ecx // ECX:EBX = a - q*blo
87 movl 28(%esp), %eax
88 imull %edi, %eax // q*bhi
89 subl %eax, %ecx // ECX:EBX = a - q*b
90 sbbl $0, %edi // decrement q if remainder is negative
91 xorl %edx, %edx
92 movl %edi, %eax
93
94 addl %esi, %eax // Restore correct sign to result
95 adcl %esi, %edx
96 xorl %esi, %eax
97 xorl %esi, %edx
98 popl %edi // Restore callee-save registers
99 popl %ebx
100 popl %esi
101 retl // Return
102
103
1041: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
105
106 subl %ebx, %edx // subtract bhi from ahi so that divide will not
107 divl %ebx // overflow, and find q and r such that
108 //
109 // ahi:alo = (1:q)*bhi + r
110 //
111 // Note that q is a number in (31-i).(1+i)
112 // fix point.
113
114 pushl %edi
115 notl %ecx
116 shrl %eax
117 orl $0x80000000, %eax
118 shrl %cl, %eax // q = (1:qs) >> (1 + i)
119 movl %eax, %edi
120 mull 24(%esp) // q*blo
121 movl 16(%esp), %ebx
122 movl 20(%esp), %ecx // ECX:EBX = a
123 subl %eax, %ebx
124 sbbl %edx, %ecx // ECX:EBX = a - q*blo
125 movl 28(%esp), %eax
126 imull %edi, %eax // q*bhi
127 subl %eax, %ecx // ECX:EBX = a - q*b
128 sbbl $0, %edi // decrement q if remainder is negative
129 xorl %edx, %edx
130 movl %edi, %eax
131
132 addl %esi, %eax // Restore correct sign to result
133 adcl %esi, %edx
134 xorl %esi, %eax
135 xorl %esi, %edx
136 popl %edi // Restore callee-save registers
137 popl %ebx
138 popl %esi
139 retl // Return
140
141
1429: /* High word of b is zero on this branch */
143
144 movl 16(%esp), %eax // Find qhi and rhi such that
145 movl 20(%esp), %ecx //
146 xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b
147 divl %ecx //
148 movl %eax, %ebx //
149 movl 12(%esp), %eax // Find qlo such that
150 divl %ecx //
151 movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b
152
153 addl %esi, %eax // Restore correct sign to result
154 adcl %esi, %edx
155 xorl %esi, %eax
156 xorl %esi, %edx
157 popl %ebx // Restore callee-save registers
158 popl %esi
159 retl // Return
160
161#endif // __i386__