Blame - arch/arm64/lib/strnlen.S - kernel/msm-5.4

blob: eae38da6e0bb3911a5cad1fb2f6da46b4bb0090a [file] [log] [blame]

zhichang.yuan	0a42cb0	2014-04-28 13:11:34 +0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2013 ARM Ltd.
				3	* Copyright (C) 2013 Linaro.
				4	*
				5	* This code is based on glibc cortex strings work originally authored by Linaro
				6	* and re-licensed under GPLv2 for the Linux kernel. The original code can
				7	* be found @
				8	*
				9	* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
				10	* files/head:/src/aarch64/
				11	*
				12	* This program is free software; you can redistribute it and/or modify
				13	* it under the terms of the GNU General Public License version 2 as
				14	* published by the Free Software Foundation.
				15	*
				16	* This program is distributed in the hope that it will be useful,
				17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				19	* GNU General Public License for more details.
				20	*
				21	* You should have received a copy of the GNU General Public License
				22	* along with this program. If not, see <http://www.gnu.org/licenses/>.
				23	*/
				24
				25	#include <linux/linkage.h>
				26	#include <asm/assembler.h>
				27
				28	/*
				29	* determine the length of a fixed-size string
				30	*
				31	* Parameters:
				32	* x0 - const string pointer
				33	* x1 - maximal string length
				34	* Returns:
				35	* x0 - the return length of specific string
				36	*/
				37
				38	/* Arguments and results. */
				39	srcin .req x0
				40	len .req x0
				41	limit .req x1
				42
				43	/* Locals and temporaries. */
				44	src .req x2
				45	data1 .req x3
				46	data2 .req x4
				47	data2a .req x5
				48	has_nul1 .req x6
				49	has_nul2 .req x7
				50	tmp1 .req x8
				51	tmp2 .req x9
				52	tmp3 .req x10
				53	tmp4 .req x11
				54	zeroones .req x12
				55	pos .req x13
				56	limit_wd .req x14
				57
				58	#define REP8_01 0x0101010101010101
				59	#define REP8_7f 0x7f7f7f7f7f7f7f7f
				60	#define REP8_80 0x8080808080808080
				61
				62	ENTRY(strnlen)
				63	cbz limit, .Lhit_limit
				64	mov zeroones, #REP8_01
				65	bic src, srcin, #15
				66	ands tmp1, srcin, #15
				67	b.ne .Lmisaligned
				68	/* Calculate the number of full and partial words -1. */
				69	sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */
				70	lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */
				71
				72	/*
				73	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
				74	* (=> (X - 1) & ~(X \| 0x7f)) is non-zero iff a byte is zero, and
				75	* can be done in parallel across the entire word.
				76	*/
				77	/*
				78	* The inner loop deals with two Dwords at a time. This has a
				79	* slightly higher start-up cost, but we should win quite quickly,
				80	* especially on cores with a high number of issue slots per
				81	* cycle, as we get much better parallelism out of the operations.
				82	*/
				83	.Lloop:
				84	ldp data1, data2, [src], #16
				85	.Lrealigned:
				86	sub tmp1, data1, zeroones
				87	orr tmp2, data1, #REP8_7f
				88	sub tmp3, data2, zeroones
				89	orr tmp4, data2, #REP8_7f
				90	bic has_nul1, tmp1, tmp2
				91	bic has_nul2, tmp3, tmp4
				92	subs limit_wd, limit_wd, #1
				93	orr tmp1, has_nul1, has_nul2
				94	ccmp tmp1, #0, #0, pl /* NZCV = 0000 */
				95	b.eq .Lloop
				96
				97	cbz tmp1, .Lhit_limit /* No null in final Qword. */
				98
				99	/*
				100	* We know there's a null in the final Qword. The easiest thing
				101	* to do now is work out the length of the string and return
				102	* MIN (len, limit).
				103	*/
				104	sub len, src, srcin
				105	cbz has_nul1, .Lnul_in_data2
				106	CPU_BE( mov data2, data1 ) /perpare data to re-calculate the syndrome/
				107
				108	sub len, len, #8
				109	mov has_nul2, has_nul1
				110	.Lnul_in_data2:
				111	/*
				112	* For big-endian, carry propagation (if the final byte in the
				113	* string is 0x01) means we cannot use has_nul directly. The
				114	* easiest way to get the correct byte is to byte-swap the data
				115	* and calculate the syndrome a second time.
				116	*/
				117	CPU_BE( rev data2, data2 )
				118	CPU_BE( sub tmp1, data2, zeroones )
				119	CPU_BE( orr tmp2, data2, #REP8_7f )
				120	CPU_BE( bic has_nul2, tmp1, tmp2 )
				121
				122	sub len, len, #8
				123	rev has_nul2, has_nul2
				124	clz pos, has_nul2
				125	add len, len, pos, lsr #3 /* Bits to bytes. */
				126	cmp len, limit
				127	csel len, len, limit, ls /* Return the lower value. */
				128	ret
				129
				130	.Lmisaligned:
				131	/*
				132	* Deal with a partial first word.
				133	* We're doing two things in parallel here;
				134	* 1) Calculate the number of words (but avoiding overflow if
				135	* limit is near ULONG_MAX) - to do this we need to work out
				136	* limit + tmp1 - 1 as a 65-bit value before shifting it;
				137	* 2) Load and mask the initial data words - we force the bytes
				138	* before the ones we are interested in to 0xff - this ensures
				139	* early bytes will not hit any zero detection.
				140	*/
				141	ldp data1, data2, [src], #16
				142
				143	sub limit_wd, limit, #1
				144	and tmp3, limit_wd, #15
				145	lsr limit_wd, limit_wd, #4
				146
				147	add tmp3, tmp3, tmp1
				148	add limit_wd, limit_wd, tmp3, lsr #4
				149
				150	neg tmp4, tmp1
				151	lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */
				152
				153	mov tmp2, #~0
				154	/* Big-endian. Early bytes are at MSB. */
				155	CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
				156	/* Little-endian. Early bytes are at LSB. */
				157	CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
				158
				159	cmp tmp1, #8
				160
				161	orr data1, data1, tmp2
				162	orr data2a, data2, tmp2
				163
				164	csinv data1, data1, xzr, le
				165	csel data2, data2, data2a, le
				166	b .Lrealigned
				167
				168	.Lhit_limit:
				169	mov len, limit
				170	ret
Thierry Reding	7f4e346	2016-02-16 11:16:31 +0100	[diff] [blame]	171	ENDPIPROC(strnlen)