Blame - compiler-rt/lib/arm/udivmodsi4.S - toolchain/llvm-project

blob: d164a751d0895414ea89463a57246b90c201429a [file] [log] [blame]

Stephen Canon	5abb5c1	2011-03-18 16:35:02 +0000	[diff] [blame]	1	/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
				2	*
				3	* The LLVM Compiler Infrastructure
				4	*
				5	* This file is dual licensed under the MIT and the University of Illinois Open
				6	* Source Licenses. See LICENSE.TXT for details.
				7	*
				8	*===----------------------------------------------------------------------===//
				9	*
				10	* This file implements the __udivmodsi4 (32-bit unsigned integer divide and
				11	* modulus) function for the ARM architecture. A naive digit-by-digit
				12	* computation is employed for simplicity.
				13	*
				14	===----------------------------------------------------------------------===/
				15
				16	#include "../assembly.h"
				17
				18	#define ESTABLISH_FRAME \
				19	push {r4, r7, lr} ;\
				20	add r7, sp, #4
				21	#define CLEAR_FRAME_AND_RETURN \
				22	pop {r4, r7, pc}
				23
				24	#define a r0
				25	#define b r1
				26	#define i r3
				27	#define r r4
				28	#define q ip
				29	#define one lr
				30
				31	.syntax unified
				32	.align 3
				33	DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
				34	// We use a simple digit by digit algorithm; before we get into the actual
				35	// divide loop, we must calculate the left-shift amount necessary to align
				36	// the MSB of the divisor with that of the dividend (If this shift is
				37	// negative, then the result is zero, and we early out). We also conjure a
				38	// bit mask of 1 to use in constructing the quotient, and initialize the
				39	// quotient to zero.
				40	ESTABLISH_FRAME
				41	clz r4, a
				42	tst b, b // detect divide-by-zero
				43	clz r3, b
				44	mov q, #0
Anton Korobeynikov	bdadd87	2011-04-19 17:50:42 +0000	[diff] [blame]	45	beq LOCAL_LABEL(return) // return 0 if b is zero.
Stephen Canon	5abb5c1	2011-03-18 16:35:02 +0000	[diff] [blame]	46	mov one, #1
				47	subs i, r3, r4
Anton Korobeynikov	bdadd87	2011-04-19 17:50:42 +0000	[diff] [blame]	48	blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b)
Stephen Canon	5abb5c1	2011-03-18 16:35:02 +0000	[diff] [blame]	49
Anton Korobeynikov	bdadd87	2011-04-19 17:50:42 +0000	[diff] [blame]	50	LOCAL_LABEL(mainLoop):
Stephen Canon	5abb5c1	2011-03-18 16:35:02 +0000	[diff] [blame]	51	// This loop basically implements the following:
				52	//
				53	// do {
				54	// if (a >= b << i) {
				55	// a -= b << i;
				56	// q \|= 1 << i;
				57	// if (a == 0) break;
				58	// }
				59	// } while (--i)
				60	//
				61	// Note that this does not perform the final iteration (i == 0); by doing it
				62	// this way, we can merge the two branches which is a substantial win for
				63	// such a tight loop on current ARM architectures.
				64	subs r, a, b, lsl i
				65	orrhs q, q,one, lsl i
				66	movhs a, r
				67	subsne i, i, #1
Anton Korobeynikov	bdadd87	2011-04-19 17:50:42 +0000	[diff] [blame]	68	bhi LOCAL_LABEL(mainLoop)
Stephen Canon	5abb5c1	2011-03-18 16:35:02 +0000	[diff] [blame]	69
				70	// Do the final test subtraction and update of quotient (i == 0), as it is
				71	// not performed in the main loop.
				72	subs r, a, b
				73	orrhs q, #1
				74	movhs a, r
				75
Anton Korobeynikov	bdadd87	2011-04-19 17:50:42 +0000	[diff] [blame]	76	LOCAL_LABEL(return):
Stephen Canon	5abb5c1	2011-03-18 16:35:02 +0000	[diff] [blame]	77	// Store the remainder, and move the quotient to r0, then return.
				78	str a, [r2]
				79	mov r0, q
				80	CLEAR_FRAME_AND_RETURN