Blame - arch/arc/lib/strcmp.S - kernel/msm-4.9

blob: 3544600fefe6d6a480e432d5fff27fde69abbe0b [file] [log] [blame]

Vineet Gupta	5210d1e	2013-01-18 15:12:18 +0530	[diff] [blame]	1	/*
				2	* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
				3	*
				4	* This program is free software; you can redistribute it and/or modify
				5	* it under the terms of the GNU General Public License version 2 as
				6	* published by the Free Software Foundation.
				7	*/
				8
				9	/* This is optimized primarily for the ARC700.
				10	It would be possible to speed up the loops by one cycle / word
				11	respective one cycle / byte by forcing double source 1 alignment, unrolling
				12	by a factor of two, and speculatively loading the second word / byte of
				13	source 1; however, that would increase the overhead for loop setup / finish,
				14	and strcmp might often terminate early. */
				15
Vineet Gupta	ec7ac6a	2014-02-07 13:47:43 +0530	[diff] [blame]	16	#include <linux/linkage.h>
Vineet Gupta	5210d1e	2013-01-18 15:12:18 +0530	[diff] [blame]	17
Vineet Gupta	ec7ac6a	2014-02-07 13:47:43 +0530	[diff] [blame]	18	ENTRY(strcmp)
Vineet Gupta	5210d1e	2013-01-18 15:12:18 +0530	[diff] [blame]	19	or r2,r0,r1
				20	bmsk_s r2,r2,1
				21	brne r2,0,.Lcharloop
				22	mov_s r12,0x01010101
				23	ror r5,r12
				24	.Lwordloop:
				25	ld.ab r2,[r0,4]
				26	ld.ab r3,[r1,4]
				27	nop_s
				28	sub r4,r2,r12
				29	bic r4,r4,r2
				30	and r4,r4,r5
				31	brne r4,0,.Lfound0
				32	breq r2,r3,.Lwordloop
				33	#ifdef __LITTLE_ENDIAN__
				34	xor r0,r2,r3 ; mask for difference
				35	sub_s r1,r0,1
				36	bic_s r0,r0,r1 ; mask for least significant difference bit
				37	sub r1,r5,r0
				38	xor r0,r5,r1 ; mask for least significant difference byte
				39	and_s r2,r2,r0
				40	and_s r3,r3,r0
				41	#endif /* LITTLE ENDIAN */
				42	cmp_s r2,r3
				43	mov_s r0,1
				44	j_s.d [blink]
				45	bset.lo r0,r0,31
				46
				47	.balign 4
				48	#ifdef __LITTLE_ENDIAN__
				49	.Lfound0:
				50	xor r0,r2,r3 ; mask for difference
				51	or r0,r0,r4 ; or in zero indicator
				52	sub_s r1,r0,1
				53	bic_s r0,r0,r1 ; mask for least significant difference bit
				54	sub r1,r5,r0
				55	xor r0,r5,r1 ; mask for least significant difference byte
				56	and_s r2,r2,r0
				57	and_s r3,r3,r0
				58	sub.f r0,r2,r3
				59	mov.hi r0,1
				60	j_s.d [blink]
				61	bset.lo r0,r0,31
				62	#else /* BIG ENDIAN */
				63	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
				64	because of carry-propagateion from a lower significant zero byte.
				65	We can compensate for this by checking that bit0 is zero.
				66	This compensation is not necessary in the step where we
				67	get a low estimate for r2, because in any affected bytes
				68	we already have 0x00 or 0x01, which will remain unchanged
				69	when bit 7 is cleared. */
				70	.balign 4
				71	.Lfound0:
				72	lsr r0,r4,8
				73	lsr_s r1,r2
				74	bic_s r2,r2,r0 ; get low estimate for r2 and get ...
				75	bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
				76	or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
				77	cmp_s r3,r2 ; ... be independent of trailing garbage
				78	or_s r2,r2,r0 ; likewise for r3 > r2
				79	bic_s r3,r3,r0
				80	rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
				81	cmp_s r2,r3
				82	j_s.d [blink]
				83	bset.lo r0,r0,31
				84	#endif /* ENDIAN */
				85
				86	.balign 4
				87	.Lcharloop:
				88	ldb.ab r2,[r0,1]
				89	ldb.ab r3,[r1,1]
				90	nop_s
				91	breq r2,0,.Lcmpend
				92	breq r2,r3,.Lcharloop
				93	.Lcmpend:
				94	j_s.d [blink]
				95	sub r0,r2,r3
Vineet Gupta	ec7ac6a	2014-02-07 13:47:43 +0530	[diff] [blame]	96	END(strcmp)