Blame - arch/alpha/lib/stxncpy.S - kernel/msm-4.9

blob: da1a72740d29da7d8c00e26a81da4cbf80d5d74d [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* arch/alpha/lib/stxncpy.S
				3	* Contributed by Richard Henderson (rth@tamu.edu)
				4	*
				5	* Copy no more than COUNT bytes of the null-terminated string from
				6	* SRC to DST.
				7	*
				8	* This is an internal routine used by strncpy, stpncpy, and strncat.
				9	* As such, it uses special linkage conventions to make implementation
				10	* of these public functions more efficient.
				11	*
				12	* On input:
				13	* t9 = return address
				14	* a0 = DST
				15	* a1 = SRC
				16	* a2 = COUNT
				17	*
				18	* Furthermore, COUNT may not be zero.
				19	*
				20	* On output:
				21	* t0 = last word written
				22	* t10 = bitmask (with one bit set) indicating the byte position of
				23	* the end of the range specified by COUNT
				24	* t12 = bitmask (with one bit set) indicating the last byte written
				25	* a0 = unaligned address of the last word written
				26	* a2 = the number of full words left in COUNT
				27	*
				28	* Furthermore, v0, a3-a5, t11, and $at are untouched.
				29	*/
				30
				31	#include <asm/regdef.h>
				32
				33	.set noat
				34	.set noreorder
				35
				36	.text
				37
				38	/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
				39	doesn't like putting the entry point for a procedure somewhere in the
				40	middle of the procedure descriptor. Work around this by putting the
				41	aligned copy in its own procedure descriptor */
				42
				43	.ent stxncpy_aligned
				44	.align 3
				45	stxncpy_aligned:
				46	.frame sp, 0, t9, 0
				47	.prologue 0
				48
				49	/* On entry to this basic block:
				50	t0 == the first destination word for masking back in
				51	t1 == the first source word. */
				52
				53	/* Create the 1st output word and detect 0's in the 1st input word. */
				54	lda t2, -1 # e1 : build a mask against false zero
				55	mskqh t2, a1, t2 # e0 : detection in the src word
				56	mskqh t1, a1, t3 # e0 :
				57	ornot t1, t2, t2 # .. e1 :
				58	mskql t0, a1, t0 # e0 : assemble the first output word
				59	cmpbge zero, t2, t8 # .. e1 : bits set iff null found
				60	or t0, t3, t0 # e0 :
				61	beq a2, $a_eoc # .. e1 :
				62	bne t8, $a_eos # .. e1 :
				63
				64	/* On entry to this basic block:
				65	t0 == a source word not containing a null. */
				66
				67	$a_loop:
				68	stq_u t0, 0(a0) # e0 :
				69	addq a0, 8, a0 # .. e1 :
				70	ldq_u t0, 0(a1) # e0 :
				71	addq a1, 8, a1 # .. e1 :
				72	subq a2, 1, a2 # e0 :
				73	cmpbge zero, t0, t8 # .. e1 (stall)
				74	beq a2, $a_eoc # e1 :
				75	beq t8, $a_loop # e1 :
				76
				77	/* Take care of the final (partial) word store. At this point
				78	the end-of-count bit is set in t8 iff it applies.
				79
				80	On entry to this basic block we have:
				81	t0 == the source word containing the null
				82	t8 == the cmpbge mask that found it. */
				83
				84	$a_eos:
				85	negq t8, t12 # e0 : find low bit set
				86	and t8, t12, t12 # e1 (stall)
				87
				88	/* For the sake of the cache, don't read a destination word
				89	if we're not going to need it. */
				90	and t12, 0x80, t6 # e0 :
				91	bne t6, 1f # .. e1 (zdb)
				92
				93	/* We're doing a partial word store and so need to combine
				94	our source and original destination words. */
				95	ldq_u t1, 0(a0) # e0 :
				96	subq t12, 1, t6 # .. e1 :
				97	or t12, t6, t8 # e0 :
				98	unop #
				99	zapnot t0, t8, t0 # e0 : clear src bytes > null
				100	zap t1, t8, t1 # .. e1 : clear dst bytes <= null
				101	or t0, t1, t0 # e1 :
				102
				103	1: stq_u t0, 0(a0) # e0 :
				104	ret (t9) # e1 :
				105
				106	/* Add the end-of-count bit to the eos detection bitmask. */
				107	$a_eoc:
				108	or t10, t8, t8
				109	br $a_eos
				110
				111	.end stxncpy_aligned
				112
				113	.align 3
				114	.ent __stxncpy
				115	.globl __stxncpy
				116	__stxncpy:
				117	.frame sp, 0, t9, 0
				118	.prologue 0
				119
				120	/* Are source and destination co-aligned? */
				121	xor a0, a1, t1 # e0 :
				122	and a0, 7, t0 # .. e1 : find dest misalignment
				123	and t1, 7, t1 # e0 :
				124	addq a2, t0, a2 # .. e1 : bias count by dest misalignment
				125	subq a2, 1, a2 # e0 :
				126	and a2, 7, t2 # e1 :
				127	srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8
				128	addq zero, 1, t10 # .. e1 :
				129	sll t10, t2, t10 # e0 : t10 = bitmask of last count byte
				130	bne t1, $unaligned # .. e1 :
				131
				132	/* We are co-aligned; take care of a partial first word. */
				133
				134	ldq_u t1, 0(a1) # e0 : load first src word
				135	addq a1, 8, a1 # .. e1 :
				136
				137	beq t0, stxncpy_aligned # avoid loading dest word if not needed
				138	ldq_u t0, 0(a0) # e0 :
				139	br stxncpy_aligned # .. e1 :
				140
				141
				142	/* The source and destination are not co-aligned. Align the destination
				143	and cope. We have to be very careful about not reading too much and
				144	causing a SEGV. */
				145
				146	.align 3
				147	$u_head:
				148	/* We know just enough now to be able to assemble the first
				149	full source word. We can still find a zero at the end of it
				150	that prevents us from outputting the whole thing.
				151
				152	On entry to this basic block:
				153	t0 == the first dest word, unmasked
				154	t1 == the shifted low bits of the first source word
				155	t6 == bytemask that is -1 in dest word bytes */
				156
				157	ldq_u t2, 8(a1) # e0 : load second src word
				158	addq a1, 8, a1 # .. e1 :
				159	mskql t0, a0, t0 # e0 : mask trailing garbage in dst
				160	extqh t2, a1, t4 # e0 :
				161	or t1, t4, t1 # e1 : first aligned src word complete
				162	mskqh t1, a0, t1 # e0 : mask leading garbage in src
				163	or t0, t1, t0 # e0 : first output word complete
				164	or t0, t6, t6 # e1 : mask original data for zero test
				165	cmpbge zero, t6, t8 # e0 :
				166	beq a2, $u_eocfin # .. e1 :
				167	lda t6, -1 # e0 :
				168	bne t8, $u_final # .. e1 :
				169
				170	mskql t6, a1, t6 # e0 : mask out bits already seen
				171	nop # .. e1 :
				172	stq_u t0, 0(a0) # e0 : store first output word
				173	or t6, t2, t2 # .. e1 :
				174	cmpbge zero, t2, t8 # e0 : find nulls in second partial
				175	addq a0, 8, a0 # .. e1 :
				176	subq a2, 1, a2 # e0 :
				177	bne t8, $u_late_head_exit # .. e1 :
				178
				179	/* Finally, we've got all the stupid leading edge cases taken care
				180	of and we can set up to enter the main loop. */
				181
				182	extql t2, a1, t1 # e0 : position hi-bits of lo word
				183	beq a2, $u_eoc # .. e1 :
				184	ldq_u t2, 8(a1) # e0 : read next high-order source word
				185	addq a1, 8, a1 # .. e1 :
				186	extqh t2, a1, t0 # e0 : position lo-bits of hi word (stall)
				187	cmpbge zero, t2, t8 # .. e1 :
				188	nop # e0 :
				189	bne t8, $u_eos # .. e1 :
				190
				191	/* Unaligned copy main loop. In order to avoid reading too much,
				192	the loop is structured to detect zeros in aligned source words.
				193	This has, unfortunately, effectively pulled half of a loop
				194	iteration out into the head and half into the tail, but it does
				195	prevent nastiness from accumulating in the very thing we want
				196	to run as fast as possible.
				197
				198	On entry to this basic block:
				199	t0 == the shifted low-order bits from the current source word
				200	t1 == the shifted high-order bits from the previous source word
				201	t2 == the unshifted current source word
				202
				203	We further know that t2 does not contain a null terminator. */
				204
				205	.align 3
				206	$u_loop:
				207	or t0, t1, t0 # e0 : current dst word now complete
				208	subq a2, 1, a2 # .. e1 : decrement word count
				209	stq_u t0, 0(a0) # e0 : save the current word
				210	addq a0, 8, a0 # .. e1 :
				211	extql t2, a1, t1 # e0 : extract high bits for next time
				212	beq a2, $u_eoc # .. e1 :
				213	ldq_u t2, 8(a1) # e0 : load high word for next time
				214	addq a1, 8, a1 # .. e1 :
				215	nop # e0 :
				216	cmpbge zero, t2, t8 # e1 : test new word for eos (stall)
				217	extqh t2, a1, t0 # e0 : extract low bits for current word
				218	beq t8, $u_loop # .. e1 :
				219
				220	/* We've found a zero somewhere in the source word we just read.
				221	If it resides in the lower half, we have one (probably partial)
				222	word to write out, and if it resides in the upper half, we
				223	have one full and one partial word left to write out.
				224
				225	On entry to this basic block:
				226	t0 == the shifted low-order bits from the current source word
				227	t1 == the shifted high-order bits from the previous source word
				228	t2 == the unshifted current source word. */
				229	$u_eos:
				230	or t0, t1, t0 # e0 : first (partial) source word complete
				231	nop # .. e1 :
				232	cmpbge zero, t0, t8 # e0 : is the null in this first bit?
				233	bne t8, $u_final # .. e1 (zdb)
				234
				235	stq_u t0, 0(a0) # e0 : the null was in the high-order bits
				236	addq a0, 8, a0 # .. e1 :
				237	subq a2, 1, a2 # e1 :
				238
				239	$u_late_head_exit:
				240	extql t2, a1, t0 # .. e0 :
				241	cmpbge zero, t0, t8 # e0 :
				242	or t8, t10, t6 # e1 :
				243	cmoveq a2, t6, t8 # e0 :
				244	nop # .. e1 :
				245
				246	/* Take care of a final (probably partial) result word.
				247	On entry to this basic block:
				248	t0 == assembled source word
				249	t8 == cmpbge mask that found the null. */
				250	$u_final:
				251	negq t8, t6 # e0 : isolate low bit set
				252	and t6, t8, t12 # e1 :
				253
				254	and t12, 0x80, t6 # e0 : avoid dest word load if we can
				255	bne t6, 1f # .. e1 (zdb)
				256
				257	ldq_u t1, 0(a0) # e0 :
				258	subq t12, 1, t6 # .. e1 :
				259	or t6, t12, t8 # e0 :
				260	zapnot t0, t8, t0 # .. e1 : kill source bytes > null
				261	zap t1, t8, t1 # e0 : kill dest bytes <= null
				262	or t0, t1, t0 # e1 :
				263
				264	1: stq_u t0, 0(a0) # e0 :
				265	ret (t9) # .. e1 :
				266
				267	/* Got to end-of-count before end of string.
				268	On entry to this basic block:
				269	t1 == the shifted high-order bits from the previous source word */
				270	$u_eoc:
				271	and a1, 7, t6 # e1 :
				272	sll t10, t6, t6 # e0 :
				273	and t6, 0xff, t6 # e0 :
				274	bne t6, 1f # .. e1 :
				275
				276	ldq_u t2, 8(a1) # e0 : load final src word
				277	nop # .. e1 :
				278	extqh t2, a1, t0 # e0 : extract low bits for last word
				279	or t1, t0, t1 # e1 :
				280
				281	1: cmpbge zero, t1, t8
				282	mov t1, t0
				283
				284	$u_eocfin: # end-of-count, final word
				285	or t10, t8, t8
				286	br $u_final
				287
				288	/* Unaligned copy entry point. */
				289	.align 3
				290	$unaligned:
				291
				292	ldq_u t1, 0(a1) # e0 : load first source word
				293
				294	and a0, 7, t4 # .. e1 : find dest misalignment
				295	and a1, 7, t5 # e0 : find src misalignment
				296
				297	/* Conditionally load the first destination word and a bytemask
				298	with 0xff indicating that the destination byte is sacrosanct. */
				299
				300	mov zero, t0 # .. e1 :
				301	mov zero, t6 # e0 :
				302	beq t4, 1f # .. e1 :
				303	ldq_u t0, 0(a0) # e0 :
				304	lda t6, -1 # .. e1 :
				305	mskql t6, a0, t6 # e0 :
				306	subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr
				307
				308	/* If source misalignment is larger than dest misalignment, we need
				309	extra startup checks to avoid SEGV. */
				310
				311	1: cmplt t4, t5, t12 # e1 :
				312	extql t1, a1, t1 # .. e0 : shift src into place
				313	lda t2, -1 # e0 : for creating masks later
				314	beq t12, $u_head # .. e1 :
				315
				316	extql t2, a1, t2 # e0 :
				317	cmpbge zero, t1, t8 # .. e1 : is there a zero?
				318	andnot t2, t6, t12 # e0 : dest mask for a single word copy
				319	or t8, t10, t5 # .. e1 : test for end-of-count too
				320	cmpbge zero, t12, t3 # e0 :
				321	cmoveq a2, t5, t8 # .. e1 :
				322	andnot t8, t3, t8 # e0 :
				323	beq t8, $u_head # .. e1 (zdb)
				324
				325	/* At this point we've found a zero in the first partial word of
				326	the source. We need to isolate the valid source data and mask
				327	it into the original destination data. (Incidentally, we know
				328	that we'll need at least one byte of that original dest word.) */
				329
				330	ldq_u t0, 0(a0) # e0 :
				331	negq t8, t6 # .. e1 : build bitmask of bytes <= zero
				332	mskqh t1, t4, t1 # e0 :
				333	and t6, t8, t2 # .. e1 :
				334	subq t2, 1, t6 # e0 :
				335	or t6, t2, t8 # e1 :
				336
				337	zapnot t12, t8, t12 # e0 : prepare source word; mirror changes
				338	zapnot t1, t8, t1 # .. e1 : to source validity mask
				339
				340	andnot t0, t12, t0 # e0 : zero place for source to reside
				341	or t0, t1, t0 # e1 : and put it there
				342	stq_u t0, 0(a0) # e0 :
				343	ret (t9) # .. e1 :
				344
				345	.end __stxncpy