Blame - arch/sparc/lib/checksum_32.S - kernel/msm-4.19

blob: c9d8b62321116aa7f472904afae12793c57c0527 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/* checksum.S: Sparc optimized checksum code.
				2	*
				3	* Copyright(C) 1995 Linus Torvalds
				4	* Copyright(C) 1995 Miguel de Icaza
				5	* Copyright(C) 1996 David S. Miller
				6	* Copyright(C) 1997 Jakub Jelinek
				7	*
				8	* derived from:
				9	* Linux/Alpha checksum c-code
				10	* Linux/ix86 inline checksum assembly
				11	* RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
				12	* David Mosberger-Tang for optimized reference c-code
				13	* BSD4.4 portable checksum routine
				14	*/
				15
				16	#include <asm/errno.h>
Al Viro	d3867f04	2016-01-16 21:39:30 -0500	[diff] [blame]	17	#include <asm/export.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18
				19	#define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \
				20	ldd [buf + offset + 0x00], t0; \
				21	ldd [buf + offset + 0x08], t2; \
				22	addxcc t0, sum, sum; \
				23	addxcc t1, sum, sum; \
				24	ldd [buf + offset + 0x10], t4; \
				25	addxcc t2, sum, sum; \
				26	addxcc t3, sum, sum; \
				27	ldd [buf + offset + 0x18], t0; \
				28	addxcc t4, sum, sum; \
				29	addxcc t5, sum, sum; \
				30	addxcc t0, sum, sum; \
				31	addxcc t1, sum, sum;
				32
				33	#define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3) \
				34	ldd [buf - offset - 0x08], t0; \
				35	ldd [buf - offset - 0x00], t2; \
				36	addxcc t0, sum, sum; \
				37	addxcc t1, sum, sum; \
				38	addxcc t2, sum, sum; \
				39	addxcc t3, sum, sum;
				40
				41	/* Do end cruft out of band to get better cache patterns. */
				42	csum_partial_end_cruft:
				43	be 1f ! caller asks %o1 & 0x8
				44	andcc %o1, 4, %g0 ! nope, check for word remaining
				45	ldd [%o0], %g2 ! load two
				46	addcc %g2, %o2, %o2 ! add first word to sum
				47	addxcc %g3, %o2, %o2 ! add second word as well
				48	add %o0, 8, %o0 ! advance buf ptr
				49	addx %g0, %o2, %o2 ! add in final carry
				50	andcc %o1, 4, %g0 ! check again for word remaining
				51	1: be 1f ! nope, skip this code
				52	andcc %o1, 3, %o1 ! check for trailing bytes
				53	ld [%o0], %g2 ! load it
				54	addcc %g2, %o2, %o2 ! add to sum
				55	add %o0, 4, %o0 ! advance buf ptr
				56	addx %g0, %o2, %o2 ! add in final carry
				57	andcc %o1, 3, %g0 ! check again for trailing bytes
				58	1: be 1f ! no trailing bytes, return
				59	addcc %o1, -1, %g0 ! only one byte remains?
				60	bne 2f ! at least two bytes more
				61	subcc %o1, 2, %o1 ! only two bytes more?
				62	b 4f ! only one byte remains
				63	or %g0, %g0, %o4 ! clear fake hword value
				64	2: lduh [%o0], %o4 ! get hword
				65	be 6f ! jmp if only hword remains
				66	add %o0, 2, %o0 ! advance buf ptr either way
				67	sll %o4, 16, %o4 ! create upper hword
				68	4: ldub [%o0], %o5 ! get final byte
				69	sll %o5, 8, %o5 ! put into place
				70	or %o5, %o4, %o4 ! coalese with hword (if any)
				71	6: addcc %o4, %o2, %o2 ! add to sum
				72	1: retl ! get outta here
				73	addx %g0, %o2, %o0 ! add final carry into retval
				74
				75	/* Also do alignment out of band to get better cache patterns. */
				76	csum_partial_fix_alignment:
				77	cmp %o1, 6
				78	bl cpte - 0x4
				79	andcc %o0, 0x2, %g0
				80	be 1f
				81	andcc %o0, 0x4, %g0
				82	lduh [%o0 + 0x00], %g2
				83	sub %o1, 2, %o1
				84	add %o0, 2, %o0
				85	sll %g2, 16, %g2
				86	addcc %g2, %o2, %o2
				87	srl %o2, 16, %g3
				88	addx %g0, %g3, %g2
				89	sll %o2, 16, %o2
				90	sll %g2, 16, %g3
				91	srl %o2, 16, %o2
				92	andcc %o0, 0x4, %g0
				93	or %g3, %o2, %o2
				94	1: be cpa
				95	andcc %o1, 0xffffff80, %o3
				96	ld [%o0 + 0x00], %g2
				97	sub %o1, 4, %o1
				98	addcc %g2, %o2, %o2
				99	add %o0, 4, %o0
				100	addx %g0, %o2, %o2
				101	b cpa
				102	andcc %o1, 0xffffff80, %o3
				103
				104	/* The common case is to get called with a nicely aligned
				105	* buffer of size 0x20. Follow the code path for that case.
				106	*/
				107	.globl csum_partial
Al Viro	d3867f04	2016-01-16 21:39:30 -0500	[diff] [blame]	108	EXPORT_SYMBOL(csum_partial)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	109	csum_partial: /* %o0=buf, %o1=len, %o2=sum */
				110	andcc %o0, 0x7, %g0 ! alignment problems?
				111	bne csum_partial_fix_alignment ! yep, handle it
				112	sethi %hi(cpte - 8), %g7 ! prepare table jmp ptr
				113	andcc %o1, 0xffffff80, %o3 ! num loop iterations
				114	cpa: be 3f ! none to do
				115	andcc %o1, 0x70, %g1 ! clears carry flag too
				116	5: CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				117	CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				118	CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				119	CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				120	addx %g0, %o2, %o2 ! sink in final carry
				121	subcc %o3, 128, %o3 ! detract from loop iters
				122	bne 5b ! more to do
				123	add %o0, 128, %o0 ! advance buf ptr
				124	andcc %o1, 0x70, %g1 ! clears carry flag too
				125	3: be cpte ! nope
				126	andcc %o1, 0xf, %g0 ! anything left at all?
				127	srl %g1, 1, %o4 ! compute offset
				128	sub %g7, %g1, %g7 ! adjust jmp ptr
				129	sub %g7, %o4, %g7 ! final jmp ptr adjust
				130	jmp %g7 + %lo(cpte - 8) ! enter the table
				131	add %o0, %g1, %o0 ! advance buf ptr
				132	cptbl: CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5)
				133	CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5)
				134	CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5)
				135	CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5)
				136	CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5)
				137	CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5)
				138	CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5)
				139	addx %g0, %o2, %o2 ! fetch final carry
				140	andcc %o1, 0xf, %g0 ! anything left at all?
				141	cpte: bne csum_partial_end_cruft ! yep, handle it
				142	andcc %o1, 8, %g0 ! check how much
				143	cpout: retl ! get outta here
				144	mov %o2, %o0 ! return computed csum
				145
				146	.globl __csum_partial_copy_start, __csum_partial_copy_end
				147	__csum_partial_copy_start:
				148
				149	/* Work around cpp -rob */
				150	#define ALLOC #alloc
				151	#define EXECINSTR #execinstr
				152	#define EX(x,y,a,b) \
				153	98: x,y; \
				154	.section .fixup,ALLOC,EXECINSTR; \
				155	.align 4; \
				156	99: ba 30f; \
				157	a, b, %o3; \
				158	.section __ex_table,ALLOC; \
				159	.align 4; \
				160	.word 98b, 99b; \
				161	.text; \
				162	.align 4
				163
				164	#define EX2(x,y) \
				165	98: x,y; \
				166	.section __ex_table,ALLOC; \
				167	.align 4; \
				168	.word 98b, 30f; \
				169	.text; \
				170	.align 4
				171
				172	#define EX3(x,y) \
				173	98: x,y; \
				174	.section __ex_table,ALLOC; \
				175	.align 4; \
				176	.word 98b, 96f; \
				177	.text; \
				178	.align 4
				179
				180	#define EXT(start,end,handler) \
				181	.section __ex_table,ALLOC; \
				182	.align 4; \
				183	.word start, 0, end, handler; \
				184	.text; \
				185	.align 4
				186
				187	/* This aligned version executes typically in 8.5 superscalar cycles, this
				188	* is the best I can do. I say 8.5 because the final add will pair with
				189	* the next ldd in the main unrolled loop. Thus the pipe is always full.
				190	* If you change these macros (including order of instructions),
				191	* please check the fixup code below as well.
				192	*/
				193	#define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
				194	ldd [src + off + 0x00], t0; \
				195	ldd [src + off + 0x08], t2; \
				196	addxcc t0, sum, sum; \
				197	ldd [src + off + 0x10], t4; \
				198	addxcc t1, sum, sum; \
				199	ldd [src + off + 0x18], t6; \
				200	addxcc t2, sum, sum; \
				201	std t0, [dst + off + 0x00]; \
				202	addxcc t3, sum, sum; \
				203	std t2, [dst + off + 0x08]; \
				204	addxcc t4, sum, sum; \
				205	std t4, [dst + off + 0x10]; \
				206	addxcc t5, sum, sum; \
				207	std t6, [dst + off + 0x18]; \
				208	addxcc t6, sum, sum; \
				209	addxcc t7, sum, sum;
				210
				211	/* 12 superscalar cycles seems to be the limit for this case,
				212	* because of this we thus do all the ldd's together to get
				213	* Viking MXCC into streaming mode. Ho hum...
				214	*/
				215	#define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
				216	ldd [src + off + 0x00], t0; \
				217	ldd [src + off + 0x08], t2; \
				218	ldd [src + off + 0x10], t4; \
				219	ldd [src + off + 0x18], t6; \
				220	st t0, [dst + off + 0x00]; \
				221	addxcc t0, sum, sum; \
				222	st t1, [dst + off + 0x04]; \
				223	addxcc t1, sum, sum; \
				224	st t2, [dst + off + 0x08]; \
				225	addxcc t2, sum, sum; \
				226	st t3, [dst + off + 0x0c]; \
				227	addxcc t3, sum, sum; \
				228	st t4, [dst + off + 0x10]; \
				229	addxcc t4, sum, sum; \
				230	st t5, [dst + off + 0x14]; \
				231	addxcc t5, sum, sum; \
				232	st t6, [dst + off + 0x18]; \
				233	addxcc t6, sum, sum; \
				234	st t7, [dst + off + 0x1c]; \
				235	addxcc t7, sum, sum;
				236
				237	/* Yuck, 6 superscalar cycles... */
				238	#define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \
				239	ldd [src - off - 0x08], t0; \
				240	ldd [src - off - 0x00], t2; \
				241	addxcc t0, sum, sum; \
				242	st t0, [dst - off - 0x08]; \
				243	addxcc t1, sum, sum; \
				244	st t1, [dst - off - 0x04]; \
				245	addxcc t2, sum, sum; \
				246	st t2, [dst - off - 0x00]; \
				247	addxcc t3, sum, sum; \
				248	st t3, [dst - off + 0x04];
				249
				250	/* Handle the end cruft code out of band for better cache patterns. */
				251	cc_end_cruft:
				252	be 1f
				253	andcc %o3, 4, %g0
				254	EX(ldd [%o0 + 0x00], %g2, and %o3, 0xf)
				255	add %o1, 8, %o1
				256	addcc %g2, %g7, %g7
				257	add %o0, 8, %o0
				258	addxcc %g3, %g7, %g7
				259	EX2(st %g2, [%o1 - 0x08])
				260	addx %g0, %g7, %g7
				261	andcc %o3, 4, %g0
				262	EX2(st %g3, [%o1 - 0x04])
				263	1: be 1f
				264	andcc %o3, 3, %o3
				265	EX(ld [%o0 + 0x00], %g2, add %o3, 4)
				266	add %o1, 4, %o1
				267	addcc %g2, %g7, %g7
				268	EX2(st %g2, [%o1 - 0x04])
				269	addx %g0, %g7, %g7
				270	andcc %o3, 3, %g0
				271	add %o0, 4, %o0
				272	1: be 1f
				273	addcc %o3, -1, %g0
				274	bne 2f
				275	subcc %o3, 2, %o3
				276	b 4f
				277	or %g0, %g0, %o4
				278	2: EX(lduh [%o0 + 0x00], %o4, add %o3, 2)
				279	add %o0, 2, %o0
				280	EX2(sth %o4, [%o1 + 0x00])
				281	be 6f
				282	add %o1, 2, %o1
				283	sll %o4, 16, %o4
				284	4: EX(ldub [%o0 + 0x00], %o5, add %g0, 1)
				285	EX2(stb %o5, [%o1 + 0x00])
				286	sll %o5, 8, %o5
				287	or %o5, %o4, %o4
				288	6: addcc %o4, %g7, %g7
				289	1: retl
				290	addx %g0, %g7, %o0
				291
				292	/* Also, handle the alignment code out of band. */
				293	cc_dword_align:
Tkhai Kirill	b105428	2011-05-10 02:31:41 +0000	[diff] [blame]	294	cmp %g1, 16
				295	bge 1f
				296	srl %g1, 1, %o3
				297	2: cmp %o3, 0
				298	be,a ccte
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	299	andcc %g1, 0xf, %o3
Tkhai Kirill	b105428	2011-05-10 02:31:41 +0000	[diff] [blame]	300	andcc %o3, %o0, %g0 ! Check %o0 only (%o1 has the same last 2 bits)
				301	be,a 2b
				302	srl %o3, 1, %o3
				303	1: andcc %o0, 0x1, %g0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	304	bne ccslow
				305	andcc %o0, 0x2, %g0
				306	be 1f
				307	andcc %o0, 0x4, %g0
				308	EX(lduh [%o0 + 0x00], %g4, add %g1, 0)
				309	sub %g1, 2, %g1
				310	EX2(sth %g4, [%o1 + 0x00])
				311	add %o0, 2, %o0
				312	sll %g4, 16, %g4
				313	addcc %g4, %g7, %g7
				314	add %o1, 2, %o1
				315	srl %g7, 16, %g3
				316	addx %g0, %g3, %g4
				317	sll %g7, 16, %g7
				318	sll %g4, 16, %g3
				319	srl %g7, 16, %g7
				320	andcc %o0, 0x4, %g0
				321	or %g3, %g7, %g7
				322	1: be 3f
				323	andcc %g1, 0xffffff80, %g0
				324	EX(ld [%o0 + 0x00], %g4, add %g1, 0)
				325	sub %g1, 4, %g1
				326	EX2(st %g4, [%o1 + 0x00])
				327	add %o0, 4, %o0
				328	addcc %g4, %g7, %g7
				329	add %o1, 4, %o1
				330	addx %g0, %g7, %g7
				331	b 3f
				332	andcc %g1, 0xffffff80, %g0
				333
				334	/* Sun, you just can't beat me, you just can't. Stop trying,
				335	* give up. I'm serious, I am going to kick the living shit
				336	* out of you, game over, lights out.
				337	*/
				338	.align 8
				339	.globl __csum_partial_copy_sparc_generic
Al Viro	d3867f04	2016-01-16 21:39:30 -0500	[diff] [blame]	340	EXPORT_SYMBOL(__csum_partial_copy_sparc_generic)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	341	__csum_partial_copy_sparc_generic:
				342	/* %o0=src, %o1=dest, %g1=len, %g7=sum */
				343	xor %o0, %o1, %o4 ! get changing bits
				344	andcc %o4, 3, %g0 ! check for mismatched alignment
				345	bne ccslow ! better this than unaligned/fixups
				346	andcc %o0, 7, %g0 ! need to align things?
				347	bne cc_dword_align ! yes, we check for short lengths there
				348	andcc %g1, 0xffffff80, %g0 ! can we use unrolled loop?
				349	3: be 3f ! nope, less than one loop remains
				350	andcc %o1, 4, %g0 ! dest aligned on 4 or 8 byte boundary?
				351	be ccdbl + 4 ! 8 byte aligned, kick ass
				352	5: CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				353	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				354	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				355	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				356	10: EXT(5b, 10b, 20f) ! note for exception handling
				357	sub %g1, 128, %g1 ! detract from length
				358	addx %g0, %g7, %g7 ! add in last carry bit
				359	andcc %g1, 0xffffff80, %g0 ! more to csum?
				360	add %o0, 128, %o0 ! advance src ptr
				361	bne 5b ! we did not go negative, continue looping
				362	add %o1, 128, %o1 ! advance dest ptr
				363	3: andcc %g1, 0x70, %o2 ! can use table?
				364	ccmerge:be ccte ! nope, go and check for end cruft
				365	andcc %g1, 0xf, %o3 ! get low bits of length (clears carry btw)
				366	srl %o2, 1, %o4 ! begin negative offset computation
				367	sethi %hi(12f), %o5 ! set up table ptr end
				368	add %o0, %o2, %o0 ! advance src ptr
				369	sub %o5, %o4, %o5 ! continue table calculation
				370	sll %o2, 1, %g2 ! constant multiplies are fun...
				371	sub %o5, %g2, %o5 ! some more adjustments
				372	jmp %o5 + %lo(12f) ! jump into it, duff style, wheee...
				373	add %o1, %o2, %o1 ! advance dest ptr (carry is clear btw)
				374	cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
				375	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5)
				376	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5)
				377	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5)
				378	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
				379	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
				380	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
				381	12: EXT(cctbl, 12b, 22f) ! note for exception table handling
				382	addx %g0, %g7, %g7
				383	andcc %o3, 0xf, %g0 ! check for low bits set
				384	ccte: bne cc_end_cruft ! something left, handle it out of band
				385	andcc %o3, 8, %g0 ! begin checks for that code
				386	retl ! return
				387	mov %g7, %o0 ! give em the computed checksum
				388	ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				389	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				390	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				391	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				392	11: EXT(ccdbl, 11b, 21f) ! note for exception table handling
				393	sub %g1, 128, %g1 ! detract from length
				394	addx %g0, %g7, %g7 ! add in last carry bit
				395	andcc %g1, 0xffffff80, %g0 ! more to csum?
				396	add %o0, 128, %o0 ! advance src ptr
				397	bne ccdbl ! we did not go negative, continue looping
				398	add %o1, 128, %o1 ! advance dest ptr
				399	b ccmerge ! finish it off, above
				400	andcc %g1, 0x70, %o2 ! can use table? (clears carry btw)
				401
				402	ccslow: cmp %g1, 0
				403	mov 0, %g5
				404	bleu 4f
				405	andcc %o0, 1, %o5
				406	be,a 1f
				407	srl %g1, 1, %g4
				408	sub %g1, 1, %g1
				409	EX(ldub [%o0], %g5, add %g1, 1)
				410	add %o0, 1, %o0
				411	EX2(stb %g5, [%o1])
				412	srl %g1, 1, %g4
				413	add %o1, 1, %o1
				414	1: cmp %g4, 0
				415	be,a 3f
				416	andcc %g1, 1, %g0
				417	andcc %o0, 2, %g0
				418	be,a 1f
				419	srl %g4, 1, %g4
				420	EX(lduh [%o0], %o4, add %g1, 0)
				421	sub %g1, 2, %g1
				422	srl %o4, 8, %g2
				423	sub %g4, 1, %g4
				424	EX2(stb %g2, [%o1])
				425	add %o4, %g5, %g5
				426	EX2(stb %o4, [%o1 + 1])
				427	add %o0, 2, %o0
				428	srl %g4, 1, %g4
				429	add %o1, 2, %o1
				430	1: cmp %g4, 0
				431	be,a 2f
				432	andcc %g1, 2, %g0
				433	EX3(ld [%o0], %o4)
				434	5: srl %o4, 24, %g2
				435	srl %o4, 16, %g3
				436	EX2(stb %g2, [%o1])
				437	srl %o4, 8, %g2
				438	EX2(stb %g3, [%o1 + 1])
				439	add %o0, 4, %o0
				440	EX2(stb %g2, [%o1 + 2])
				441	addcc %o4, %g5, %g5
				442	EX2(stb %o4, [%o1 + 3])
				443	addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it
				444	add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl
				445	subcc %g4, 1, %g4 ! tricks
				446	bne,a 5b
				447	EX3(ld [%o0], %o4)
				448	sll %g5, 16, %g2
				449	srl %g5, 16, %g5
				450	srl %g2, 16, %g2
				451	andcc %g1, 2, %g0
				452	add %g2, %g5, %g5
				453	2: be,a 3f
				454	andcc %g1, 1, %g0
				455	EX(lduh [%o0], %o4, and %g1, 3)
				456	andcc %g1, 1, %g0
				457	srl %o4, 8, %g2
				458	add %o0, 2, %o0
				459	EX2(stb %g2, [%o1])
				460	add %g5, %o4, %g5
				461	EX2(stb %o4, [%o1 + 1])
				462	add %o1, 2, %o1
				463	3: be,a 1f
				464	sll %g5, 16, %o4
				465	EX(ldub [%o0], %g2, add %g0, 1)
				466	sll %g2, 8, %o4
				467	EX2(stb %g2, [%o1])
				468	add %g5, %o4, %g5
				469	sll %g5, 16, %o4
				470	1: addcc %o4, %g5, %g5
				471	srl %g5, 16, %o4
				472	addx %g0, %o4, %g5
				473	orcc %o5, %g0, %g0
				474	be 4f
				475	srl %g5, 8, %o4
				476	and %g5, 0xff, %g2
				477	and %o4, 0xff, %o4
				478	sll %g2, 8, %g2
				479	or %g2, %o4, %g5
				480	4: addcc %g7, %g5, %g7
				481	retl
				482	addx %g0, %g7, %o0
				483	__csum_partial_copy_end:
				484
				485	/* We do these strange calculations for the csum_*_from_user case only, ie.
				486	* we only bother with faults on loads... */
				487
				488	/* o2 = ((g2%20)&3)*8
				489	* o3 = g1 - (g2/20)32 - o2 /
				490	20:
				491	cmp %g2, 20
				492	blu,a 1f
				493	and %g2, 3, %o2
				494	sub %g1, 32, %g1
				495	b 20b
				496	sub %g2, 20, %g2
				497	1:
				498	sll %o2, 3, %o2
				499	b 31f
				500	sub %g1, %o2, %o3
				501
				502	/* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8)
				503	* o3 = g1 - (g2/16)32 - o2 /
				504	21:
				505	andcc %g2, 15, %o3
				506	srl %g2, 4, %g2
				507	be,a 1f
				508	clr %o2
				509	add %o3, 1, %o3
				510	and %o3, 14, %o3
				511	sll %o3, 3, %o2
				512	1:
				513	sll %g2, 5, %g2
				514	sub %g1, %g2, %o3
				515	b 31f
				516	sub %o3, %o2, %o3
				517
				518	/* o0 += (g2/10)*16 - 0x70
				519	* 01 += (g2/10)*16 - 0x70
				520	* o2 = (g2 % 10) ? 8 : 0
				521	* o3 += 0x70 - (g2/10)16 - o2 /
				522	22:
				523	cmp %g2, 10
				524	blu,a 1f
				525	sub %o0, 0x70, %o0
				526	add %o0, 16, %o0
				527	add %o1, 16, %o1
				528	sub %o3, 16, %o3
				529	b 22b
				530	sub %g2, 10, %g2
				531	1:
				532	sub %o1, 0x70, %o1
				533	add %o3, 0x70, %o3
				534	clr %o2
				535	tst %g2
				536	bne,a 1f
				537	mov 8, %o2
				538	1:
				539	b 31f
				540	sub %o3, %o2, %o3
				541	96:
				542	and %g1, 3, %g1
				543	sll %g4, 2, %g4
				544	add %g1, %g4, %o3
				545	30:
				546	/* %o1 is dst
				547	* %o3 is # bytes to zero out
				548	* %o4 is faulting address
				549	* %o5 is %pc where fault occurred */
				550	clr %o2
				551	31:
				552	/* %o0 is src
				553	* %o1 is dst
				554	* %o2 is # of bytes to copy from src to dst
				555	* %o3 is # bytes to zero out
				556	* %o4 is faulting address
				557	* %o5 is %pc where fault occurred */
				558	save %sp, -104, %sp
				559	mov %i5, %o0
				560	mov %i7, %o1
				561	mov %i4, %o2
				562	call lookup_fault
				563	mov %g7, %i4
				564	cmp %o0, 2
				565	bne 1f
				566	add %g0, -EFAULT, %i5
				567	tst %i2
				568	be 2f
				569	mov %i0, %o1
				570	mov %i1, %o0
				571	5:
David S. Miller	4d14a45	2009-12-10 23:32:10 -0800	[diff] [blame]	572	call memcpy
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	573	mov %i2, %o2
				574	tst %o0
				575	bne,a 2f
				576	add %i3, %i2, %i3
				577	add %i1, %i2, %i1
				578	2:
				579	mov %i1, %o0
				580	6:
				581	call __bzero
				582	mov %i3, %o1
				583	1:
				584	ld [%sp + 168], %o2 ! struct_ptr of parent
				585	st %i5, [%o2]
				586	ret
				587	restore
				588
				589	.section __ex_table,#alloc
				590	.align 4
				591	.word 5b,2
				592	.word 6b,2