Blame - compiler-rt/lib/i386/moddi3.S - toolchain/llvm-project

blob: af1f38a13ec151db22fd52c5152267560506797f [file] [log] [blame]

Daniel Dunbar	fd08999	2009-06-26 16:47:03 +0000	[diff] [blame]	1	// This file is distributed under the University of Illinois Open Source
				2	// License. See LICENSE.TXT for details.
				3
				4	// di_int __moddi3(di_int a, di_int b);
				5
				6	// result = remainder of a / b.
				7	// both inputs and the output are 64-bit signed integers.
				8	// This will do whatever the underlying hardware is set to do on division by zero.
				9	// No other exceptions are generated, as the divide cannot overflow.
				10	//
				11	// This is targeted at 32-bit x86 only, as this can be done directly in hardware
				12	// on x86_64. The performance goal is ~40 cycles per divide, which is faster than
				13	// currently possible via simulation of integer divides on the x87 unit.
				14	//
				15
				16	// Stephen Canon, December 2008
				17
				18	#ifdef __i386__
				19
				20	.text
				21	.align 4
				22	.globl ___moddi3
				23	___moddi3:
				24
				25	/* This is currently implemented by wrapping the unsigned modulus up in an absolute
				26	value. This could certainly be improved upon. */
				27
				28	pushl %esi
				29	movl 20(%esp), %edx // high word of b
				30	movl 16(%esp), %eax // low word of b
				31	movl %edx, %ecx
				32	sarl $31, %ecx // (b < 0) ? -1 : 0
				33	xorl %ecx, %eax
				34	xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b
				35	subl %ecx, %eax
				36	sbbl %ecx, %edx // EDX:EAX = abs(b)
				37	movl %edx, 20(%esp)
				38	movl %eax, 16(%esp) // store abs(b) back to stack
				39
				40	movl 12(%esp), %edx // high word of b
				41	movl 8(%esp), %eax // low word of b
				42	movl %edx, %ecx
				43	sarl $31, %ecx // (a < 0) ? -1 : 0
				44	xorl %ecx, %eax
				45	xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a
				46	subl %ecx, %eax
				47	sbbl %ecx, %edx // EDX:EAX = abs(a)
				48	movl %edx, 12(%esp)
				49	movl %eax, 8(%esp) // store abs(a) back to stack
				50	movl %ecx, %esi // set aside sign of a
				51
				52	pushl %ebx
				53	movl 24(%esp), %ebx // Find the index i of the leading bit in b.
				54	bsrl %ebx, %ecx // If the high word of b is zero, jump to
				55	jz 9f // the code to handle that special case [9].
				56
				57	/* High word of b is known to be non-zero on this branch */
				58
				59	movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b
				60
				61	shrl %cl, %eax // Practically, this means that bhi is given by:
				62	shrl %eax //
				63	notl %ecx // bhi = (high word of b) << (31 - i) \|
				64	shll %cl, %ebx // (low word of b) >> (1 + i)
				65	orl %eax, %ebx //
				66	movl 16(%esp), %edx // Load the high and low words of a, and jump
				67	movl 12(%esp), %eax // to [2] if the high word is larger than bhi
				68	cmpl %ebx, %edx // to avoid overflowing the upcoming divide.
				69	jae 2f
				70
				71	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
				72
				73	divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
				74
				75	pushl %edi
				76	notl %ecx
				77	shrl %eax
				78	shrl %cl, %eax // q = qs >> (1 + i)
				79	movl %eax, %edi
				80	mull 24(%esp) // q*blo
				81	movl 16(%esp), %ebx
				82	movl 20(%esp), %ecx // ECX:EBX = a
				83	subl %eax, %ebx
				84	sbbl %edx, %ecx // ECX:EBX = a - q*blo
				85	movl 28(%esp), %eax
				86	imull %edi, %eax // q*bhi
				87	subl %eax, %ecx // ECX:EBX = a - q*b
				88
				89	jnc 1f // if positive, this is the result.
				90	addl 24(%esp), %ebx // otherwise
				91	adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result
				92	1: movl %ebx, %eax
				93	movl %ecx, %edx
				94
				95	addl %esi, %eax // Restore correct sign to result
				96	adcl %esi, %edx
				97	xorl %esi, %eax
				98	xorl %esi, %edx
				99	popl %edi // Restore callee-save registers
				100	popl %ebx
				101	popl %esi
				102	retl // Return
				103
				104	2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
				105
				106	subl %ebx, %edx // subtract bhi from ahi so that divide will not
				107	divl %ebx // overflow, and find q and r such that
				108	//
				109	// ahi:alo = (1:q)*bhi + r
				110	//
				111	// Note that q is a number in (31-i).(1+i)
				112	// fix point.
				113
				114	pushl %edi
				115	notl %ecx
				116	shrl %eax
				117	orl $0x80000000, %eax
				118	shrl %cl, %eax // q = (1:qs) >> (1 + i)
				119	movl %eax, %edi
				120	mull 24(%esp) // q*blo
				121	movl 16(%esp), %ebx
				122	movl 20(%esp), %ecx // ECX:EBX = a
				123	subl %eax, %ebx
				124	sbbl %edx, %ecx // ECX:EBX = a - q*blo
				125	movl 28(%esp), %eax
				126	imull %edi, %eax // q*bhi
				127	subl %eax, %ecx // ECX:EBX = a - q*b
				128
				129	jnc 3f // if positive, this is the result.
				130	addl 24(%esp), %ebx // otherwise
				131	adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result
				132	3: movl %ebx, %eax
				133	movl %ecx, %edx
				134
				135	addl %esi, %eax // Restore correct sign to result
				136	adcl %esi, %edx
				137	xorl %esi, %eax
				138	xorl %esi, %edx
				139	popl %edi // Restore callee-save registers
				140	popl %ebx
				141	popl %esi
				142	retl // Return
				143
				144	9: /* High word of b is zero on this branch */
				145
				146	movl 16(%esp), %eax // Find qhi and rhi such that
				147	movl 20(%esp), %ecx //
				148	xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b
				149	divl %ecx //
				150	movl %eax, %ebx //
				151	movl 12(%esp), %eax // Find rlo such that
				152	divl %ecx //
				153	movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b
				154	popl %ebx //
				155	xorl %edx, %edx // and return 0:rlo
				156
				157	addl %esi, %eax // Restore correct sign to result
				158	adcl %esi, %edx
				159	xorl %esi, %eax
				160	xorl %esi, %edx
				161	popl %esi
				162	retl // Return
				163
				164
				165	#endif // __i386__