Blame - arch/sparc/lib/checksum.S - kernel/msm

blob: 77f228533d47850ddfe39e4d8d24e748d1b90d11 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/* checksum.S: Sparc optimized checksum code.
				2	*
				3	* Copyright(C) 1995 Linus Torvalds
				4	* Copyright(C) 1995 Miguel de Icaza
				5	* Copyright(C) 1996 David S. Miller
				6	* Copyright(C) 1997 Jakub Jelinek
				7	*
				8	* derived from:
				9	* Linux/Alpha checksum c-code
				10	* Linux/ix86 inline checksum assembly
				11	* RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
				12	* David Mosberger-Tang for optimized reference c-code
				13	* BSD4.4 portable checksum routine
				14	*/
				15
				16	#include <asm/errno.h>
				17
				18	#define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \
				19	ldd [buf + offset + 0x00], t0; \
				20	ldd [buf + offset + 0x08], t2; \
				21	addxcc t0, sum, sum; \
				22	addxcc t1, sum, sum; \
				23	ldd [buf + offset + 0x10], t4; \
				24	addxcc t2, sum, sum; \
				25	addxcc t3, sum, sum; \
				26	ldd [buf + offset + 0x18], t0; \
				27	addxcc t4, sum, sum; \
				28	addxcc t5, sum, sum; \
				29	addxcc t0, sum, sum; \
				30	addxcc t1, sum, sum;
				31
				32	#define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3) \
				33	ldd [buf - offset - 0x08], t0; \
				34	ldd [buf - offset - 0x00], t2; \
				35	addxcc t0, sum, sum; \
				36	addxcc t1, sum, sum; \
				37	addxcc t2, sum, sum; \
				38	addxcc t3, sum, sum;
				39
				40	/* Do end cruft out of band to get better cache patterns. */
				41	csum_partial_end_cruft:
				42	be 1f ! caller asks %o1 & 0x8
				43	andcc %o1, 4, %g0 ! nope, check for word remaining
				44	ldd [%o0], %g2 ! load two
				45	addcc %g2, %o2, %o2 ! add first word to sum
				46	addxcc %g3, %o2, %o2 ! add second word as well
				47	add %o0, 8, %o0 ! advance buf ptr
				48	addx %g0, %o2, %o2 ! add in final carry
				49	andcc %o1, 4, %g0 ! check again for word remaining
				50	1: be 1f ! nope, skip this code
				51	andcc %o1, 3, %o1 ! check for trailing bytes
				52	ld [%o0], %g2 ! load it
				53	addcc %g2, %o2, %o2 ! add to sum
				54	add %o0, 4, %o0 ! advance buf ptr
				55	addx %g0, %o2, %o2 ! add in final carry
				56	andcc %o1, 3, %g0 ! check again for trailing bytes
				57	1: be 1f ! no trailing bytes, return
				58	addcc %o1, -1, %g0 ! only one byte remains?
				59	bne 2f ! at least two bytes more
				60	subcc %o1, 2, %o1 ! only two bytes more?
				61	b 4f ! only one byte remains
				62	or %g0, %g0, %o4 ! clear fake hword value
				63	2: lduh [%o0], %o4 ! get hword
				64	be 6f ! jmp if only hword remains
				65	add %o0, 2, %o0 ! advance buf ptr either way
				66	sll %o4, 16, %o4 ! create upper hword
				67	4: ldub [%o0], %o5 ! get final byte
				68	sll %o5, 8, %o5 ! put into place
				69	or %o5, %o4, %o4 ! coalese with hword (if any)
				70	6: addcc %o4, %o2, %o2 ! add to sum
				71	1: retl ! get outta here
				72	addx %g0, %o2, %o0 ! add final carry into retval
				73
				74	/* Also do alignment out of band to get better cache patterns. */
				75	csum_partial_fix_alignment:
				76	cmp %o1, 6
				77	bl cpte - 0x4
				78	andcc %o0, 0x2, %g0
				79	be 1f
				80	andcc %o0, 0x4, %g0
				81	lduh [%o0 + 0x00], %g2
				82	sub %o1, 2, %o1
				83	add %o0, 2, %o0
				84	sll %g2, 16, %g2
				85	addcc %g2, %o2, %o2
				86	srl %o2, 16, %g3
				87	addx %g0, %g3, %g2
				88	sll %o2, 16, %o2
				89	sll %g2, 16, %g3
				90	srl %o2, 16, %o2
				91	andcc %o0, 0x4, %g0
				92	or %g3, %o2, %o2
				93	1: be cpa
				94	andcc %o1, 0xffffff80, %o3
				95	ld [%o0 + 0x00], %g2
				96	sub %o1, 4, %o1
				97	addcc %g2, %o2, %o2
				98	add %o0, 4, %o0
				99	addx %g0, %o2, %o2
				100	b cpa
				101	andcc %o1, 0xffffff80, %o3
				102
				103	/* The common case is to get called with a nicely aligned
				104	* buffer of size 0x20. Follow the code path for that case.
				105	*/
				106	.globl csum_partial
				107	csum_partial: /* %o0=buf, %o1=len, %o2=sum */
				108	andcc %o0, 0x7, %g0 ! alignment problems?
				109	bne csum_partial_fix_alignment ! yep, handle it
				110	sethi %hi(cpte - 8), %g7 ! prepare table jmp ptr
				111	andcc %o1, 0xffffff80, %o3 ! num loop iterations
				112	cpa: be 3f ! none to do
				113	andcc %o1, 0x70, %g1 ! clears carry flag too
				114	5: CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				115	CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				116	CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				117	CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				118	addx %g0, %o2, %o2 ! sink in final carry
				119	subcc %o3, 128, %o3 ! detract from loop iters
				120	bne 5b ! more to do
				121	add %o0, 128, %o0 ! advance buf ptr
				122	andcc %o1, 0x70, %g1 ! clears carry flag too
				123	3: be cpte ! nope
				124	andcc %o1, 0xf, %g0 ! anything left at all?
				125	srl %g1, 1, %o4 ! compute offset
				126	sub %g7, %g1, %g7 ! adjust jmp ptr
				127	sub %g7, %o4, %g7 ! final jmp ptr adjust
				128	jmp %g7 + %lo(cpte - 8) ! enter the table
				129	add %o0, %g1, %o0 ! advance buf ptr
				130	cptbl: CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5)
				131	CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5)
				132	CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5)
				133	CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5)
				134	CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5)
				135	CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5)
				136	CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5)
				137	addx %g0, %o2, %o2 ! fetch final carry
				138	andcc %o1, 0xf, %g0 ! anything left at all?
				139	cpte: bne csum_partial_end_cruft ! yep, handle it
				140	andcc %o1, 8, %g0 ! check how much
				141	cpout: retl ! get outta here
				142	mov %o2, %o0 ! return computed csum
				143
				144	.globl __csum_partial_copy_start, __csum_partial_copy_end
				145	__csum_partial_copy_start:
				146
				147	/* Work around cpp -rob */
				148	#define ALLOC #alloc
				149	#define EXECINSTR #execinstr
				150	#define EX(x,y,a,b) \
				151	98: x,y; \
				152	.section .fixup,ALLOC,EXECINSTR; \
				153	.align 4; \
				154	99: ba 30f; \
				155	a, b, %o3; \
				156	.section __ex_table,ALLOC; \
				157	.align 4; \
				158	.word 98b, 99b; \
				159	.text; \
				160	.align 4
				161
				162	#define EX2(x,y) \
				163	98: x,y; \
				164	.section __ex_table,ALLOC; \
				165	.align 4; \
				166	.word 98b, 30f; \
				167	.text; \
				168	.align 4
				169
				170	#define EX3(x,y) \
				171	98: x,y; \
				172	.section __ex_table,ALLOC; \
				173	.align 4; \
				174	.word 98b, 96f; \
				175	.text; \
				176	.align 4
				177
				178	#define EXT(start,end,handler) \
				179	.section __ex_table,ALLOC; \
				180	.align 4; \
				181	.word start, 0, end, handler; \
				182	.text; \
				183	.align 4
				184
				185	/* This aligned version executes typically in 8.5 superscalar cycles, this
				186	* is the best I can do. I say 8.5 because the final add will pair with
				187	* the next ldd in the main unrolled loop. Thus the pipe is always full.
				188	* If you change these macros (including order of instructions),
				189	* please check the fixup code below as well.
				190	*/
				191	#define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
				192	ldd [src + off + 0x00], t0; \
				193	ldd [src + off + 0x08], t2; \
				194	addxcc t0, sum, sum; \
				195	ldd [src + off + 0x10], t4; \
				196	addxcc t1, sum, sum; \
				197	ldd [src + off + 0x18], t6; \
				198	addxcc t2, sum, sum; \
				199	std t0, [dst + off + 0x00]; \
				200	addxcc t3, sum, sum; \
				201	std t2, [dst + off + 0x08]; \
				202	addxcc t4, sum, sum; \
				203	std t4, [dst + off + 0x10]; \
				204	addxcc t5, sum, sum; \
				205	std t6, [dst + off + 0x18]; \
				206	addxcc t6, sum, sum; \
				207	addxcc t7, sum, sum;
				208
				209	/* 12 superscalar cycles seems to be the limit for this case,
				210	* because of this we thus do all the ldd's together to get
				211	* Viking MXCC into streaming mode. Ho hum...
				212	*/
				213	#define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
				214	ldd [src + off + 0x00], t0; \
				215	ldd [src + off + 0x08], t2; \
				216	ldd [src + off + 0x10], t4; \
				217	ldd [src + off + 0x18], t6; \
				218	st t0, [dst + off + 0x00]; \
				219	addxcc t0, sum, sum; \
				220	st t1, [dst + off + 0x04]; \
				221	addxcc t1, sum, sum; \
				222	st t2, [dst + off + 0x08]; \
				223	addxcc t2, sum, sum; \
				224	st t3, [dst + off + 0x0c]; \
				225	addxcc t3, sum, sum; \
				226	st t4, [dst + off + 0x10]; \
				227	addxcc t4, sum, sum; \
				228	st t5, [dst + off + 0x14]; \
				229	addxcc t5, sum, sum; \
				230	st t6, [dst + off + 0x18]; \
				231	addxcc t6, sum, sum; \
				232	st t7, [dst + off + 0x1c]; \
				233	addxcc t7, sum, sum;
				234
				235	/* Yuck, 6 superscalar cycles... */
				236	#define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \
				237	ldd [src - off - 0x08], t0; \
				238	ldd [src - off - 0x00], t2; \
				239	addxcc t0, sum, sum; \
				240	st t0, [dst - off - 0x08]; \
				241	addxcc t1, sum, sum; \
				242	st t1, [dst - off - 0x04]; \
				243	addxcc t2, sum, sum; \
				244	st t2, [dst - off - 0x00]; \
				245	addxcc t3, sum, sum; \
				246	st t3, [dst - off + 0x04];
				247
				248	/* Handle the end cruft code out of band for better cache patterns. */
				249	cc_end_cruft:
				250	be 1f
				251	andcc %o3, 4, %g0
				252	EX(ldd [%o0 + 0x00], %g2, and %o3, 0xf)
				253	add %o1, 8, %o1
				254	addcc %g2, %g7, %g7
				255	add %o0, 8, %o0
				256	addxcc %g3, %g7, %g7
				257	EX2(st %g2, [%o1 - 0x08])
				258	addx %g0, %g7, %g7
				259	andcc %o3, 4, %g0
				260	EX2(st %g3, [%o1 - 0x04])
				261	1: be 1f
				262	andcc %o3, 3, %o3
				263	EX(ld [%o0 + 0x00], %g2, add %o3, 4)
				264	add %o1, 4, %o1
				265	addcc %g2, %g7, %g7
				266	EX2(st %g2, [%o1 - 0x04])
				267	addx %g0, %g7, %g7
				268	andcc %o3, 3, %g0
				269	add %o0, 4, %o0
				270	1: be 1f
				271	addcc %o3, -1, %g0
				272	bne 2f
				273	subcc %o3, 2, %o3
				274	b 4f
				275	or %g0, %g0, %o4
				276	2: EX(lduh [%o0 + 0x00], %o4, add %o3, 2)
				277	add %o0, 2, %o0
				278	EX2(sth %o4, [%o1 + 0x00])
				279	be 6f
				280	add %o1, 2, %o1
				281	sll %o4, 16, %o4
				282	4: EX(ldub [%o0 + 0x00], %o5, add %g0, 1)
				283	EX2(stb %o5, [%o1 + 0x00])
				284	sll %o5, 8, %o5
				285	or %o5, %o4, %o4
				286	6: addcc %o4, %g7, %g7
				287	1: retl
				288	addx %g0, %g7, %o0
				289
				290	/* Also, handle the alignment code out of band. */
				291	cc_dword_align:
				292	cmp %g1, 6
				293	bl,a ccte
				294	andcc %g1, 0xf, %o3
				295	andcc %o0, 0x1, %g0
				296	bne ccslow
				297	andcc %o0, 0x2, %g0
				298	be 1f
				299	andcc %o0, 0x4, %g0
				300	EX(lduh [%o0 + 0x00], %g4, add %g1, 0)
				301	sub %g1, 2, %g1
				302	EX2(sth %g4, [%o1 + 0x00])
				303	add %o0, 2, %o0
				304	sll %g4, 16, %g4
				305	addcc %g4, %g7, %g7
				306	add %o1, 2, %o1
				307	srl %g7, 16, %g3
				308	addx %g0, %g3, %g4
				309	sll %g7, 16, %g7
				310	sll %g4, 16, %g3
				311	srl %g7, 16, %g7
				312	andcc %o0, 0x4, %g0
				313	or %g3, %g7, %g7
				314	1: be 3f
				315	andcc %g1, 0xffffff80, %g0
				316	EX(ld [%o0 + 0x00], %g4, add %g1, 0)
				317	sub %g1, 4, %g1
				318	EX2(st %g4, [%o1 + 0x00])
				319	add %o0, 4, %o0
				320	addcc %g4, %g7, %g7
				321	add %o1, 4, %o1
				322	addx %g0, %g7, %g7
				323	b 3f
				324	andcc %g1, 0xffffff80, %g0
				325
				326	/* Sun, you just can't beat me, you just can't. Stop trying,
				327	* give up. I'm serious, I am going to kick the living shit
				328	* out of you, game over, lights out.
				329	*/
				330	.align 8
				331	.globl __csum_partial_copy_sparc_generic
				332	__csum_partial_copy_sparc_generic:
				333	/* %o0=src, %o1=dest, %g1=len, %g7=sum */
				334	xor %o0, %o1, %o4 ! get changing bits
				335	andcc %o4, 3, %g0 ! check for mismatched alignment
				336	bne ccslow ! better this than unaligned/fixups
				337	andcc %o0, 7, %g0 ! need to align things?
				338	bne cc_dword_align ! yes, we check for short lengths there
				339	andcc %g1, 0xffffff80, %g0 ! can we use unrolled loop?
				340	3: be 3f ! nope, less than one loop remains
				341	andcc %o1, 4, %g0 ! dest aligned on 4 or 8 byte boundary?
				342	be ccdbl + 4 ! 8 byte aligned, kick ass
				343	5: CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				344	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				345	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				346	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				347	10: EXT(5b, 10b, 20f) ! note for exception handling
				348	sub %g1, 128, %g1 ! detract from length
				349	addx %g0, %g7, %g7 ! add in last carry bit
				350	andcc %g1, 0xffffff80, %g0 ! more to csum?
				351	add %o0, 128, %o0 ! advance src ptr
				352	bne 5b ! we did not go negative, continue looping
				353	add %o1, 128, %o1 ! advance dest ptr
				354	3: andcc %g1, 0x70, %o2 ! can use table?
				355	ccmerge:be ccte ! nope, go and check for end cruft
				356	andcc %g1, 0xf, %o3 ! get low bits of length (clears carry btw)
				357	srl %o2, 1, %o4 ! begin negative offset computation
				358	sethi %hi(12f), %o5 ! set up table ptr end
				359	add %o0, %o2, %o0 ! advance src ptr
				360	sub %o5, %o4, %o5 ! continue table calculation
				361	sll %o2, 1, %g2 ! constant multiplies are fun...
				362	sub %o5, %g2, %o5 ! some more adjustments
				363	jmp %o5 + %lo(12f) ! jump into it, duff style, wheee...
				364	add %o1, %o2, %o1 ! advance dest ptr (carry is clear btw)
				365	cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
				366	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5)
				367	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5)
				368	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5)
				369	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
				370	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
				371	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
				372	12: EXT(cctbl, 12b, 22f) ! note for exception table handling
				373	addx %g0, %g7, %g7
				374	andcc %o3, 0xf, %g0 ! check for low bits set
				375	ccte: bne cc_end_cruft ! something left, handle it out of band
				376	andcc %o3, 8, %g0 ! begin checks for that code
				377	retl ! return
				378	mov %g7, %o0 ! give em the computed checksum
				379	ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				380	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				381	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				382	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				383	11: EXT(ccdbl, 11b, 21f) ! note for exception table handling
				384	sub %g1, 128, %g1 ! detract from length
				385	addx %g0, %g7, %g7 ! add in last carry bit
				386	andcc %g1, 0xffffff80, %g0 ! more to csum?
				387	add %o0, 128, %o0 ! advance src ptr
				388	bne ccdbl ! we did not go negative, continue looping
				389	add %o1, 128, %o1 ! advance dest ptr
				390	b ccmerge ! finish it off, above
				391	andcc %g1, 0x70, %o2 ! can use table? (clears carry btw)
				392
				393	ccslow: cmp %g1, 0
				394	mov 0, %g5
				395	bleu 4f
				396	andcc %o0, 1, %o5
				397	be,a 1f
				398	srl %g1, 1, %g4
				399	sub %g1, 1, %g1
				400	EX(ldub [%o0], %g5, add %g1, 1)
				401	add %o0, 1, %o0
				402	EX2(stb %g5, [%o1])
				403	srl %g1, 1, %g4
				404	add %o1, 1, %o1
				405	1: cmp %g4, 0
				406	be,a 3f
				407	andcc %g1, 1, %g0
				408	andcc %o0, 2, %g0
				409	be,a 1f
				410	srl %g4, 1, %g4
				411	EX(lduh [%o0], %o4, add %g1, 0)
				412	sub %g1, 2, %g1
				413	srl %o4, 8, %g2
				414	sub %g4, 1, %g4
				415	EX2(stb %g2, [%o1])
				416	add %o4, %g5, %g5
				417	EX2(stb %o4, [%o1 + 1])
				418	add %o0, 2, %o0
				419	srl %g4, 1, %g4
				420	add %o1, 2, %o1
				421	1: cmp %g4, 0
				422	be,a 2f
				423	andcc %g1, 2, %g0
				424	EX3(ld [%o0], %o4)
				425	5: srl %o4, 24, %g2
				426	srl %o4, 16, %g3
				427	EX2(stb %g2, [%o1])
				428	srl %o4, 8, %g2
				429	EX2(stb %g3, [%o1 + 1])
				430	add %o0, 4, %o0
				431	EX2(stb %g2, [%o1 + 2])
				432	addcc %o4, %g5, %g5
				433	EX2(stb %o4, [%o1 + 3])
				434	addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it
				435	add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl
				436	subcc %g4, 1, %g4 ! tricks
				437	bne,a 5b
				438	EX3(ld [%o0], %o4)
				439	sll %g5, 16, %g2
				440	srl %g5, 16, %g5
				441	srl %g2, 16, %g2
				442	andcc %g1, 2, %g0
				443	add %g2, %g5, %g5
				444	2: be,a 3f
				445	andcc %g1, 1, %g0
				446	EX(lduh [%o0], %o4, and %g1, 3)
				447	andcc %g1, 1, %g0
				448	srl %o4, 8, %g2
				449	add %o0, 2, %o0
				450	EX2(stb %g2, [%o1])
				451	add %g5, %o4, %g5
				452	EX2(stb %o4, [%o1 + 1])
				453	add %o1, 2, %o1
				454	3: be,a 1f
				455	sll %g5, 16, %o4
				456	EX(ldub [%o0], %g2, add %g0, 1)
				457	sll %g2, 8, %o4
				458	EX2(stb %g2, [%o1])
				459	add %g5, %o4, %g5
				460	sll %g5, 16, %o4
				461	1: addcc %o4, %g5, %g5
				462	srl %g5, 16, %o4
				463	addx %g0, %o4, %g5
				464	orcc %o5, %g0, %g0
				465	be 4f
				466	srl %g5, 8, %o4
				467	and %g5, 0xff, %g2
				468	and %o4, 0xff, %o4
				469	sll %g2, 8, %g2
				470	or %g2, %o4, %g5
				471	4: addcc %g7, %g5, %g7
				472	retl
				473	addx %g0, %g7, %o0
				474	__csum_partial_copy_end:
				475
				476	/* We do these strange calculations for the csum_*_from_user case only, ie.
				477	* we only bother with faults on loads... */
				478
				479	/* o2 = ((g2%20)&3)*8
				480	* o3 = g1 - (g2/20)32 - o2 /
				481	20:
				482	cmp %g2, 20
				483	blu,a 1f
				484	and %g2, 3, %o2
				485	sub %g1, 32, %g1
				486	b 20b
				487	sub %g2, 20, %g2
				488	1:
				489	sll %o2, 3, %o2
				490	b 31f
				491	sub %g1, %o2, %o3
				492
				493	/* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8)
				494	* o3 = g1 - (g2/16)32 - o2 /
				495	21:
				496	andcc %g2, 15, %o3
				497	srl %g2, 4, %g2
				498	be,a 1f
				499	clr %o2
				500	add %o3, 1, %o3
				501	and %o3, 14, %o3
				502	sll %o3, 3, %o2
				503	1:
				504	sll %g2, 5, %g2
				505	sub %g1, %g2, %o3
				506	b 31f
				507	sub %o3, %o2, %o3
				508
				509	/* o0 += (g2/10)*16 - 0x70
				510	* 01 += (g2/10)*16 - 0x70
				511	* o2 = (g2 % 10) ? 8 : 0
				512	* o3 += 0x70 - (g2/10)16 - o2 /
				513	22:
				514	cmp %g2, 10
				515	blu,a 1f
				516	sub %o0, 0x70, %o0
				517	add %o0, 16, %o0
				518	add %o1, 16, %o1
				519	sub %o3, 16, %o3
				520	b 22b
				521	sub %g2, 10, %g2
				522	1:
				523	sub %o1, 0x70, %o1
				524	add %o3, 0x70, %o3
				525	clr %o2
				526	tst %g2
				527	bne,a 1f
				528	mov 8, %o2
				529	1:
				530	b 31f
				531	sub %o3, %o2, %o3
				532	96:
				533	and %g1, 3, %g1
				534	sll %g4, 2, %g4
				535	add %g1, %g4, %o3
				536	30:
				537	/* %o1 is dst
				538	* %o3 is # bytes to zero out
				539	* %o4 is faulting address
				540	* %o5 is %pc where fault occurred */
				541	clr %o2
				542	31:
				543	/* %o0 is src
				544	* %o1 is dst
				545	* %o2 is # of bytes to copy from src to dst
				546	* %o3 is # bytes to zero out
				547	* %o4 is faulting address
				548	* %o5 is %pc where fault occurred */
				549	save %sp, -104, %sp
				550	mov %i5, %o0
				551	mov %i7, %o1
				552	mov %i4, %o2
				553	call lookup_fault
				554	mov %g7, %i4
				555	cmp %o0, 2
				556	bne 1f
				557	add %g0, -EFAULT, %i5
				558	tst %i2
				559	be 2f
				560	mov %i0, %o1
				561	mov %i1, %o0
				562	5:
				563	call __memcpy
				564	mov %i2, %o2
				565	tst %o0
				566	bne,a 2f
				567	add %i3, %i2, %i3
				568	add %i1, %i2, %i1
				569	2:
				570	mov %i1, %o0
				571	6:
				572	call __bzero
				573	mov %i3, %o1
				574	1:
				575	ld [%sp + 168], %o2 ! struct_ptr of parent
				576	st %i5, [%o2]
				577	ret
				578	restore
				579
				580	.section __ex_table,#alloc
				581	.align 4
				582	.word 5b,2
				583	.word 6b,2