Blame - arch/arm64/lib/strcmp.S - kernel/msm-4.9

blob: 42f828b06c59a4daab35562e03aa70516a5d864a [file] [log] [blame]

zhichang.yuan	192c4d9	2014-04-28 13:11:33 +0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2013 ARM Ltd.
				3	* Copyright (C) 2013 Linaro.
				4	*
				5	* This code is based on glibc cortex strings work originally authored by Linaro
				6	* and re-licensed under GPLv2 for the Linux kernel. The original code can
				7	* be found @
				8	*
				9	* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
				10	* files/head:/src/aarch64/
				11	*
				12	* This program is free software; you can redistribute it and/or modify
				13	* it under the terms of the GNU General Public License version 2 as
				14	* published by the Free Software Foundation.
				15	*
				16	* This program is distributed in the hope that it will be useful,
				17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				19	* GNU General Public License for more details.
				20	*
				21	* You should have received a copy of the GNU General Public License
				22	* along with this program. If not, see <http://www.gnu.org/licenses/>.
				23	*/
				24
				25	#include <linux/linkage.h>
				26	#include <asm/assembler.h>
				27
				28	/*
				29	* compare two strings
				30	*
				31	* Parameters:
				32	* x0 - const string 1 pointer
				33	* x1 - const string 2 pointer
				34	* Returns:
				35	* x0 - an integer less than, equal to, or greater than zero
				36	* if s1 is found, respectively, to be less than, to match,
				37	* or be greater than s2.
				38	*/
				39
				40	#define REP8_01 0x0101010101010101
				41	#define REP8_7f 0x7f7f7f7f7f7f7f7f
				42	#define REP8_80 0x8080808080808080
				43
				44	/* Parameters and result. */
				45	src1 .req x0
				46	src2 .req x1
				47	result .req x0
				48
				49	/* Internal variables. */
				50	data1 .req x2
				51	data1w .req w2
				52	data2 .req x3
				53	data2w .req w3
				54	has_nul .req x4
				55	diff .req x5
				56	syndrome .req x6
				57	tmp1 .req x7
				58	tmp2 .req x8
				59	tmp3 .req x9
				60	zeroones .req x10
				61	pos .req x11
				62
				63	ENTRY(strcmp)
				64	eor tmp1, src1, src2
				65	mov zeroones, #REP8_01
				66	tst tmp1, #7
				67	b.ne .Lmisaligned8
				68	ands tmp1, src1, #7
				69	b.ne .Lmutual_align
				70
				71	/*
				72	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
				73	* (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
				74	* can be done in parallel across the entire word.
				75	*/
				76	.Lloop_aligned:
				77	ldr data1, [src1], #8
				78	ldr data2, [src2], #8
				79	.Lstart_realigned:
				80	sub tmp1, data1, zeroones
				81	orr tmp2, data1, #REP8_7f
				82	eor diff, data1, data2 /* Non-zero if differences found. */
				83	bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
				84	orr syndrome, diff, has_nul
				85	cbz syndrome, .Lloop_aligned
				86	b .Lcal_cmpresult
				87
				88	.Lmutual_align:
				89	/*
				90	* Sources are mutually aligned, but are not currently at an
				91	* alignment boundary. Round down the addresses and then mask off
				92	* the bytes that preceed the start point.
				93	*/
				94	bic src1, src1, #7
				95	bic src2, src2, #7
				96	lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
				97	ldr data1, [src1], #8
				98	neg tmp1, tmp1 /* Bits to alignment -64. */
				99	ldr data2, [src2], #8
				100	mov tmp2, #~0
				101	/* Big-endian. Early bytes are at MSB. */
				102	CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
				103	/* Little-endian. Early bytes are at LSB. */
				104	CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
				105
				106	orr data1, data1, tmp2
				107	orr data2, data2, tmp2
				108	b .Lstart_realigned
				109
				110	.Lmisaligned8:
				111	/*
				112	* Get the align offset length to compare per byte first.
				113	* After this process, one string's address will be aligned.
				114	*/
				115	and tmp1, src1, #7
				116	neg tmp1, tmp1
				117	add tmp1, tmp1, #8
				118	and tmp2, src2, #7
				119	neg tmp2, tmp2
				120	add tmp2, tmp2, #8
				121	subs tmp3, tmp1, tmp2
				122	csel pos, tmp1, tmp2, hi /Choose the maximum. /
				123	.Ltinycmp:
				124	ldrb data1w, [src1], #1
				125	ldrb data2w, [src2], #1
				126	subs pos, pos, #1
				127	ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */
				128	ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
				129	b.eq .Ltinycmp
				130	cbnz pos, 1f /find the null or unequal.../
				131	cmp data1w, #1
				132	ccmp data1w, data2w, #0, cs
				133	b.eq .Lstart_align /the last bytes are equal..../
				134	1:
				135	sub result, data1, data2
				136	ret
				137
				138	.Lstart_align:
				139	ands xzr, src1, #7
				140	b.eq .Lrecal_offset
				141	/process more leading bytes to make str1 aligned.../
				142	add src1, src1, tmp3
				143	add src2, src2, tmp3
				144	/load 8 bytes from aligned str1 and non-aligned str2../
				145	ldr data1, [src1], #8
				146	ldr data2, [src2], #8
				147
				148	sub tmp1, data1, zeroones
				149	orr tmp2, data1, #REP8_7f
				150	bic has_nul, tmp1, tmp2
				151	eor diff, data1, data2 /* Non-zero if differences found. */
				152	orr syndrome, diff, has_nul
				153	cbnz syndrome, .Lcal_cmpresult
				154	/How far is the current str2 from the alignment boundary.../
				155	and tmp3, tmp3, #7
				156	.Lrecal_offset:
				157	neg pos, tmp3
				158	.Lloopcmp_proc:
				159	/*
				160	* Divide the eight bytes into two parts. First,backwards the src2
				161	* to an alignment boundary,load eight bytes from the SRC2 alignment
				162	* boundary,then compare with the relative bytes from SRC1.
				163	* If all 8 bytes are equal,then start the second part's comparison.
				164	* Otherwise finish the comparison.
				165	* This special handle can garantee all the accesses are in the
				166	* thread/task space in avoid to overrange access.
				167	*/
				168	ldr data1, [src1,pos]
				169	ldr data2, [src2,pos]
				170	sub tmp1, data1, zeroones
				171	orr tmp2, data1, #REP8_7f
				172	bic has_nul, tmp1, tmp2
				173	eor diff, data1, data2 /* Non-zero if differences found. */
				174	orr syndrome, diff, has_nul
				175	cbnz syndrome, .Lcal_cmpresult
				176
				177	/The second part process/
				178	ldr data1, [src1], #8
				179	ldr data2, [src2], #8
				180	sub tmp1, data1, zeroones
				181	orr tmp2, data1, #REP8_7f
				182	bic has_nul, tmp1, tmp2
				183	eor diff, data1, data2 /* Non-zero if differences found. */
				184	orr syndrome, diff, has_nul
				185	cbz syndrome, .Lloopcmp_proc
				186
				187	.Lcal_cmpresult:
				188	/*
				189	* reversed the byte-order as big-endian,then CLZ can find the most
				190	* significant zero bits.
				191	*/
				192	CPU_LE( rev syndrome, syndrome )
				193	CPU_LE( rev data1, data1 )
				194	CPU_LE( rev data2, data2 )
				195
				196	/*
				197	* For big-endian we cannot use the trick with the syndrome value
				198	* as carry-propagation can corrupt the upper bits if the trailing
				199	* bytes in the string contain 0x01.
				200	* However, if there is no NUL byte in the dword, we can generate
				201	* the result directly. We ca not just subtract the bytes as the
				202	* MSB might be significant.
				203	*/
				204	CPU_BE( cbnz has_nul, 1f )
				205	CPU_BE( cmp data1, data2 )
				206	CPU_BE( cset result, ne )
				207	CPU_BE( cneg result, result, lo )
				208	CPU_BE( ret )
				209	CPU_BE( 1: )
				210	/Re-compute the NUL-byte detection, using a byte-reversed value. /
				211	CPU_BE( rev tmp3, data1 )
				212	CPU_BE( sub tmp1, tmp3, zeroones )
				213	CPU_BE( orr tmp2, tmp3, #REP8_7f )
				214	CPU_BE( bic has_nul, tmp1, tmp2 )
				215	CPU_BE( rev has_nul, has_nul )
				216	CPU_BE( orr syndrome, diff, has_nul )
				217
				218	clz pos, syndrome
				219	/*
				220	* The MS-non-zero bit of the syndrome marks either the first bit
				221	* that is different, or the top bit of the first zero byte.
				222	* Shifting left now will bring the critical information into the
				223	* top bits.
				224	*/
				225	lsl data1, data1, pos
				226	lsl data2, data2, pos
				227	/*
				228	* But we need to zero-extend (char is unsigned) the value and then
				229	* perform a signed 32-bit subtraction.
				230	*/
				231	lsr data1, data1, #56
				232	sub result, data1, data2, lsr #56
				233	ret
				234	ENDPROC(strcmp)