Blame - lib/i386/divdi3.S - platform/external/compiler-rt

blob: 69593e32e9e39b919d93f447020ae653da74cf89 [file] [log] [blame]

Howard Hinnant	9ad441f	2010-11-16 22:13:33 +0000	[diff] [blame]	1	// This file is dual licensed under the MIT and the University of Illinois Open
				2	// Source Licenses. See LICENSE.TXT for details.
Daniel Dunbar	b3a6901	2009-06-26 16:47:03 +0000	[diff] [blame]	3
Daniel Dunbar	19336a2	2009-10-27 17:49:50 +0000	[diff] [blame]	4	#include "../assembly.h"
				5
Daniel Dunbar	b3a6901	2009-06-26 16:47:03 +0000	[diff] [blame]	6	// di_int __divdi3(di_int a, di_int b);
				7
				8	// result = a / b.
				9	// both inputs and the output are 64-bit signed integers.
				10	// This will do whatever the underlying hardware is set to do on division by zero.
				11	// No other exceptions are generated, as the divide cannot overflow.
				12	//
				13	// This is targeted at 32-bit x86 only, as this can be done directly in hardware
				14	// on x86_64. The performance goal is ~40 cycles per divide, which is faster than
				15	// currently possible via simulation of integer divides on the x87 unit.
				16	//
				17	// Stephen Canon, December 2008
				18
				19	#ifdef __i386__
				20
				21	.text
				22	.align 4
Daniel Dunbar	b4b1e8c	2009-10-27 17:50:21 +0000	[diff] [blame]	23	DEFINE_COMPILERRT_FUNCTION(__divdi3)
Daniel Dunbar	b3a6901	2009-06-26 16:47:03 +0000	[diff] [blame]	24
				25	/* This is currently implemented by wrapping the unsigned divide up in an absolute
				26	value, then restoring the correct sign at the end of the computation. This could
				27	certainly be improved upon. */
				28
				29	pushl %esi
				30	movl 20(%esp), %edx // high word of b
				31	movl 16(%esp), %eax // low word of b
				32	movl %edx, %ecx
				33	sarl $31, %ecx // (b < 0) ? -1 : 0
				34	xorl %ecx, %eax
				35	xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b
				36	subl %ecx, %eax
				37	sbbl %ecx, %edx // EDX:EAX = abs(b)
				38	movl %edx, 20(%esp)
				39	movl %eax, 16(%esp) // store abs(b) back to stack
				40	movl %ecx, %esi // set aside sign of b
				41
				42	movl 12(%esp), %edx // high word of b
				43	movl 8(%esp), %eax // low word of b
				44	movl %edx, %ecx
				45	sarl $31, %ecx // (a < 0) ? -1 : 0
				46	xorl %ecx, %eax
				47	xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a
				48	subl %ecx, %eax
				49	sbbl %ecx, %edx // EDX:EAX = abs(a)
				50	movl %edx, 12(%esp)
				51	movl %eax, 8(%esp) // store abs(a) back to stack
				52	xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b)
				53
				54	pushl %ebx
				55	movl 24(%esp), %ebx // Find the index i of the leading bit in b.
				56	bsrl %ebx, %ecx // If the high word of b is zero, jump to
				57	jz 9f // the code to handle that special case [9].
				58
				59	/* High word of b is known to be non-zero on this branch */
				60
				61	movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b
				62
				63	shrl %cl, %eax // Practically, this means that bhi is given by:
				64	shrl %eax //
				65	notl %ecx // bhi = (high word of b) << (31 - i) \|
				66	shll %cl, %ebx // (low word of b) >> (1 + i)
				67	orl %eax, %ebx //
				68	movl 16(%esp), %edx // Load the high and low words of a, and jump
				69	movl 12(%esp), %eax // to [1] if the high word is larger than bhi
				70	cmpl %ebx, %edx // to avoid overflowing the upcoming divide.
				71	jae 1f
				72
				73	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
				74
				75	divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
				76
				77	pushl %edi
				78	notl %ecx
				79	shrl %eax
				80	shrl %cl, %eax // q = qs >> (1 + i)
				81	movl %eax, %edi
				82	mull 24(%esp) // q*blo
				83	movl 16(%esp), %ebx
				84	movl 20(%esp), %ecx // ECX:EBX = a
				85	subl %eax, %ebx
				86	sbbl %edx, %ecx // ECX:EBX = a - q*blo
				87	movl 28(%esp), %eax
				88	imull %edi, %eax // q*bhi
				89	subl %eax, %ecx // ECX:EBX = a - q*b
				90	sbbl $0, %edi // decrement q if remainder is negative
				91	xorl %edx, %edx
				92	movl %edi, %eax
				93
				94	addl %esi, %eax // Restore correct sign to result
				95	adcl %esi, %edx
				96	xorl %esi, %eax
				97	xorl %esi, %edx
				98	popl %edi // Restore callee-save registers
				99	popl %ebx
				100	popl %esi
				101	retl // Return
				102
				103
				104	1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
				105
				106	subl %ebx, %edx // subtract bhi from ahi so that divide will not
				107	divl %ebx // overflow, and find q and r such that
				108	//
				109	// ahi:alo = (1:q)*bhi + r
				110	//
				111	// Note that q is a number in (31-i).(1+i)
				112	// fix point.
				113
				114	pushl %edi
				115	notl %ecx
				116	shrl %eax
				117	orl $0x80000000, %eax
				118	shrl %cl, %eax // q = (1:qs) >> (1 + i)
				119	movl %eax, %edi
				120	mull 24(%esp) // q*blo
				121	movl 16(%esp), %ebx
				122	movl 20(%esp), %ecx // ECX:EBX = a
				123	subl %eax, %ebx
				124	sbbl %edx, %ecx // ECX:EBX = a - q*blo
				125	movl 28(%esp), %eax
				126	imull %edi, %eax // q*bhi
				127	subl %eax, %ecx // ECX:EBX = a - q*b
				128	sbbl $0, %edi // decrement q if remainder is negative
				129	xorl %edx, %edx
				130	movl %edi, %eax
				131
				132	addl %esi, %eax // Restore correct sign to result
				133	adcl %esi, %edx
				134	xorl %esi, %eax
				135	xorl %esi, %edx
				136	popl %edi // Restore callee-save registers
				137	popl %ebx
				138	popl %esi
				139	retl // Return
				140
				141
				142	9: /* High word of b is zero on this branch */
				143
				144	movl 16(%esp), %eax // Find qhi and rhi such that
				145	movl 20(%esp), %ecx //
				146	xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b
				147	divl %ecx //
				148	movl %eax, %ebx //
				149	movl 12(%esp), %eax // Find qlo such that
				150	divl %ecx //
				151	movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b
				152
				153	addl %esi, %eax // Restore correct sign to result
				154	adcl %esi, %edx
				155	xorl %esi, %eax
				156	xorl %esi, %edx
				157	popl %ebx // Restore callee-save registers
				158	popl %esi
				159	retl // Return
				160
				161	#endif // __i386__