Blame - arch/sparc/lib/checksum_32.S - kernel/msm-4.9

blob: 0084c3361e15afa77530071ada7a6e30c3ece3b5 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/* checksum.S: Sparc optimized checksum code.
				2	*
				3	* Copyright(C) 1995 Linus Torvalds
				4	* Copyright(C) 1995 Miguel de Icaza
				5	* Copyright(C) 1996 David S. Miller
				6	* Copyright(C) 1997 Jakub Jelinek
				7	*
				8	* derived from:
				9	* Linux/Alpha checksum c-code
				10	* Linux/ix86 inline checksum assembly
				11	* RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
				12	* David Mosberger-Tang for optimized reference c-code
				13	* BSD4.4 portable checksum routine
				14	*/
				15
				16	#include <asm/errno.h>
				17
				18	#define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \
				19	ldd [buf + offset + 0x00], t0; \
				20	ldd [buf + offset + 0x08], t2; \
				21	addxcc t0, sum, sum; \
				22	addxcc t1, sum, sum; \
				23	ldd [buf + offset + 0x10], t4; \
				24	addxcc t2, sum, sum; \
				25	addxcc t3, sum, sum; \
				26	ldd [buf + offset + 0x18], t0; \
				27	addxcc t4, sum, sum; \
				28	addxcc t5, sum, sum; \
				29	addxcc t0, sum, sum; \
				30	addxcc t1, sum, sum;
				31
				32	#define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3) \
				33	ldd [buf - offset - 0x08], t0; \
				34	ldd [buf - offset - 0x00], t2; \
				35	addxcc t0, sum, sum; \
				36	addxcc t1, sum, sum; \
				37	addxcc t2, sum, sum; \
				38	addxcc t3, sum, sum;
				39
				40	/* Do end cruft out of band to get better cache patterns. */
				41	csum_partial_end_cruft:
				42	be 1f ! caller asks %o1 & 0x8
				43	andcc %o1, 4, %g0 ! nope, check for word remaining
				44	ldd [%o0], %g2 ! load two
				45	addcc %g2, %o2, %o2 ! add first word to sum
				46	addxcc %g3, %o2, %o2 ! add second word as well
				47	add %o0, 8, %o0 ! advance buf ptr
				48	addx %g0, %o2, %o2 ! add in final carry
				49	andcc %o1, 4, %g0 ! check again for word remaining
				50	1: be 1f ! nope, skip this code
				51	andcc %o1, 3, %o1 ! check for trailing bytes
				52	ld [%o0], %g2 ! load it
				53	addcc %g2, %o2, %o2 ! add to sum
				54	add %o0, 4, %o0 ! advance buf ptr
				55	addx %g0, %o2, %o2 ! add in final carry
				56	andcc %o1, 3, %g0 ! check again for trailing bytes
				57	1: be 1f ! no trailing bytes, return
				58	addcc %o1, -1, %g0 ! only one byte remains?
				59	bne 2f ! at least two bytes more
				60	subcc %o1, 2, %o1 ! only two bytes more?
				61	b 4f ! only one byte remains
				62	or %g0, %g0, %o4 ! clear fake hword value
				63	2: lduh [%o0], %o4 ! get hword
				64	be 6f ! jmp if only hword remains
				65	add %o0, 2, %o0 ! advance buf ptr either way
				66	sll %o4, 16, %o4 ! create upper hword
				67	4: ldub [%o0], %o5 ! get final byte
				68	sll %o5, 8, %o5 ! put into place
				69	or %o5, %o4, %o4 ! coalese with hword (if any)
				70	6: addcc %o4, %o2, %o2 ! add to sum
				71	1: retl ! get outta here
				72	addx %g0, %o2, %o0 ! add final carry into retval
				73
				74	/* Also do alignment out of band to get better cache patterns. */
				75	csum_partial_fix_alignment:
				76	cmp %o1, 6
				77	bl cpte - 0x4
				78	andcc %o0, 0x2, %g0
				79	be 1f
				80	andcc %o0, 0x4, %g0
				81	lduh [%o0 + 0x00], %g2
				82	sub %o1, 2, %o1
				83	add %o0, 2, %o0
				84	sll %g2, 16, %g2
				85	addcc %g2, %o2, %o2
				86	srl %o2, 16, %g3
				87	addx %g0, %g3, %g2
				88	sll %o2, 16, %o2
				89	sll %g2, 16, %g3
				90	srl %o2, 16, %o2
				91	andcc %o0, 0x4, %g0
				92	or %g3, %o2, %o2
				93	1: be cpa
				94	andcc %o1, 0xffffff80, %o3
				95	ld [%o0 + 0x00], %g2
				96	sub %o1, 4, %o1
				97	addcc %g2, %o2, %o2
				98	add %o0, 4, %o0
				99	addx %g0, %o2, %o2
				100	b cpa
				101	andcc %o1, 0xffffff80, %o3
				102
				103	/* The common case is to get called with a nicely aligned
				104	* buffer of size 0x20. Follow the code path for that case.
				105	*/
				106	.globl csum_partial
				107	csum_partial: /* %o0=buf, %o1=len, %o2=sum */
				108	andcc %o0, 0x7, %g0 ! alignment problems?
				109	bne csum_partial_fix_alignment ! yep, handle it
				110	sethi %hi(cpte - 8), %g7 ! prepare table jmp ptr
				111	andcc %o1, 0xffffff80, %o3 ! num loop iterations
				112	cpa: be 3f ! none to do
				113	andcc %o1, 0x70, %g1 ! clears carry flag too
				114	5: CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				115	CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				116	CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				117	CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
				118	addx %g0, %o2, %o2 ! sink in final carry
				119	subcc %o3, 128, %o3 ! detract from loop iters
				120	bne 5b ! more to do
				121	add %o0, 128, %o0 ! advance buf ptr
				122	andcc %o1, 0x70, %g1 ! clears carry flag too
				123	3: be cpte ! nope
				124	andcc %o1, 0xf, %g0 ! anything left at all?
				125	srl %g1, 1, %o4 ! compute offset
				126	sub %g7, %g1, %g7 ! adjust jmp ptr
				127	sub %g7, %o4, %g7 ! final jmp ptr adjust
				128	jmp %g7 + %lo(cpte - 8) ! enter the table
				129	add %o0, %g1, %o0 ! advance buf ptr
				130	cptbl: CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5)
				131	CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5)
				132	CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5)
				133	CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5)
				134	CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5)
				135	CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5)
				136	CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5)
				137	addx %g0, %o2, %o2 ! fetch final carry
				138	andcc %o1, 0xf, %g0 ! anything left at all?
				139	cpte: bne csum_partial_end_cruft ! yep, handle it
				140	andcc %o1, 8, %g0 ! check how much
				141	cpout: retl ! get outta here
				142	mov %o2, %o0 ! return computed csum
				143
				144	.globl __csum_partial_copy_start, __csum_partial_copy_end
				145	__csum_partial_copy_start:
				146
				147	/* Work around cpp -rob */
				148	#define ALLOC #alloc
				149	#define EXECINSTR #execinstr
				150	#define EX(x,y,a,b) \
				151	98: x,y; \
				152	.section .fixup,ALLOC,EXECINSTR; \
				153	.align 4; \
				154	99: ba 30f; \
				155	a, b, %o3; \
				156	.section __ex_table,ALLOC; \
				157	.align 4; \
				158	.word 98b, 99b; \
				159	.text; \
				160	.align 4
				161
				162	#define EX2(x,y) \
				163	98: x,y; \
				164	.section __ex_table,ALLOC; \
				165	.align 4; \
				166	.word 98b, 30f; \
				167	.text; \
				168	.align 4
				169
				170	#define EX3(x,y) \
				171	98: x,y; \
				172	.section __ex_table,ALLOC; \
				173	.align 4; \
				174	.word 98b, 96f; \
				175	.text; \
				176	.align 4
				177
				178	#define EXT(start,end,handler) \
				179	.section __ex_table,ALLOC; \
				180	.align 4; \
				181	.word start, 0, end, handler; \
				182	.text; \
				183	.align 4
				184
				185	/* This aligned version executes typically in 8.5 superscalar cycles, this
				186	* is the best I can do. I say 8.5 because the final add will pair with
				187	* the next ldd in the main unrolled loop. Thus the pipe is always full.
				188	* If you change these macros (including order of instructions),
				189	* please check the fixup code below as well.
				190	*/
				191	#define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
				192	ldd [src + off + 0x00], t0; \
				193	ldd [src + off + 0x08], t2; \
				194	addxcc t0, sum, sum; \
				195	ldd [src + off + 0x10], t4; \
				196	addxcc t1, sum, sum; \
				197	ldd [src + off + 0x18], t6; \
				198	addxcc t2, sum, sum; \
				199	std t0, [dst + off + 0x00]; \
				200	addxcc t3, sum, sum; \
				201	std t2, [dst + off + 0x08]; \
				202	addxcc t4, sum, sum; \
				203	std t4, [dst + off + 0x10]; \
				204	addxcc t5, sum, sum; \
				205	std t6, [dst + off + 0x18]; \
				206	addxcc t6, sum, sum; \
				207	addxcc t7, sum, sum;
				208
				209	/* 12 superscalar cycles seems to be the limit for this case,
				210	* because of this we thus do all the ldd's together to get
				211	* Viking MXCC into streaming mode. Ho hum...
				212	*/
				213	#define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
				214	ldd [src + off + 0x00], t0; \
				215	ldd [src + off + 0x08], t2; \
				216	ldd [src + off + 0x10], t4; \
				217	ldd [src + off + 0x18], t6; \
				218	st t0, [dst + off + 0x00]; \
				219	addxcc t0, sum, sum; \
				220	st t1, [dst + off + 0x04]; \
				221	addxcc t1, sum, sum; \
				222	st t2, [dst + off + 0x08]; \
				223	addxcc t2, sum, sum; \
				224	st t3, [dst + off + 0x0c]; \
				225	addxcc t3, sum, sum; \
				226	st t4, [dst + off + 0x10]; \
				227	addxcc t4, sum, sum; \
				228	st t5, [dst + off + 0x14]; \
				229	addxcc t5, sum, sum; \
				230	st t6, [dst + off + 0x18]; \
				231	addxcc t6, sum, sum; \
				232	st t7, [dst + off + 0x1c]; \
				233	addxcc t7, sum, sum;
				234
				235	/* Yuck, 6 superscalar cycles... */
				236	#define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \
				237	ldd [src - off - 0x08], t0; \
				238	ldd [src - off - 0x00], t2; \
				239	addxcc t0, sum, sum; \
				240	st t0, [dst - off - 0x08]; \
				241	addxcc t1, sum, sum; \
				242	st t1, [dst - off - 0x04]; \
				243	addxcc t2, sum, sum; \
				244	st t2, [dst - off - 0x00]; \
				245	addxcc t3, sum, sum; \
				246	st t3, [dst - off + 0x04];
				247
				248	/* Handle the end cruft code out of band for better cache patterns. */
				249	cc_end_cruft:
				250	be 1f
				251	andcc %o3, 4, %g0
				252	EX(ldd [%o0 + 0x00], %g2, and %o3, 0xf)
				253	add %o1, 8, %o1
				254	addcc %g2, %g7, %g7
				255	add %o0, 8, %o0
				256	addxcc %g3, %g7, %g7
				257	EX2(st %g2, [%o1 - 0x08])
				258	addx %g0, %g7, %g7
				259	andcc %o3, 4, %g0
				260	EX2(st %g3, [%o1 - 0x04])
				261	1: be 1f
				262	andcc %o3, 3, %o3
				263	EX(ld [%o0 + 0x00], %g2, add %o3, 4)
				264	add %o1, 4, %o1
				265	addcc %g2, %g7, %g7
				266	EX2(st %g2, [%o1 - 0x04])
				267	addx %g0, %g7, %g7
				268	andcc %o3, 3, %g0
				269	add %o0, 4, %o0
				270	1: be 1f
				271	addcc %o3, -1, %g0
				272	bne 2f
				273	subcc %o3, 2, %o3
				274	b 4f
				275	or %g0, %g0, %o4
				276	2: EX(lduh [%o0 + 0x00], %o4, add %o3, 2)
				277	add %o0, 2, %o0
				278	EX2(sth %o4, [%o1 + 0x00])
				279	be 6f
				280	add %o1, 2, %o1
				281	sll %o4, 16, %o4
				282	4: EX(ldub [%o0 + 0x00], %o5, add %g0, 1)
				283	EX2(stb %o5, [%o1 + 0x00])
				284	sll %o5, 8, %o5
				285	or %o5, %o4, %o4
				286	6: addcc %o4, %g7, %g7
				287	1: retl
				288	addx %g0, %g7, %o0
				289
				290	/* Also, handle the alignment code out of band. */
				291	cc_dword_align:
Tkhai Kirill	b105428	2011-05-10 02:31:41 +0000	[diff] [blame]	292	cmp %g1, 16
				293	bge 1f
				294	srl %g1, 1, %o3
				295	2: cmp %o3, 0
				296	be,a ccte
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	297	andcc %g1, 0xf, %o3
Tkhai Kirill	b105428	2011-05-10 02:31:41 +0000	[diff] [blame]	298	andcc %o3, %o0, %g0 ! Check %o0 only (%o1 has the same last 2 bits)
				299	be,a 2b
				300	srl %o3, 1, %o3
				301	1: andcc %o0, 0x1, %g0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	302	bne ccslow
				303	andcc %o0, 0x2, %g0
				304	be 1f
				305	andcc %o0, 0x4, %g0
				306	EX(lduh [%o0 + 0x00], %g4, add %g1, 0)
				307	sub %g1, 2, %g1
				308	EX2(sth %g4, [%o1 + 0x00])
				309	add %o0, 2, %o0
				310	sll %g4, 16, %g4
				311	addcc %g4, %g7, %g7
				312	add %o1, 2, %o1
				313	srl %g7, 16, %g3
				314	addx %g0, %g3, %g4
				315	sll %g7, 16, %g7
				316	sll %g4, 16, %g3
				317	srl %g7, 16, %g7
				318	andcc %o0, 0x4, %g0
				319	or %g3, %g7, %g7
				320	1: be 3f
				321	andcc %g1, 0xffffff80, %g0
				322	EX(ld [%o0 + 0x00], %g4, add %g1, 0)
				323	sub %g1, 4, %g1
				324	EX2(st %g4, [%o1 + 0x00])
				325	add %o0, 4, %o0
				326	addcc %g4, %g7, %g7
				327	add %o1, 4, %o1
				328	addx %g0, %g7, %g7
				329	b 3f
				330	andcc %g1, 0xffffff80, %g0
				331
				332	/* Sun, you just can't beat me, you just can't. Stop trying,
				333	* give up. I'm serious, I am going to kick the living shit
				334	* out of you, game over, lights out.
				335	*/
				336	.align 8
				337	.globl __csum_partial_copy_sparc_generic
				338	__csum_partial_copy_sparc_generic:
				339	/* %o0=src, %o1=dest, %g1=len, %g7=sum */
				340	xor %o0, %o1, %o4 ! get changing bits
				341	andcc %o4, 3, %g0 ! check for mismatched alignment
				342	bne ccslow ! better this than unaligned/fixups
				343	andcc %o0, 7, %g0 ! need to align things?
				344	bne cc_dword_align ! yes, we check for short lengths there
				345	andcc %g1, 0xffffff80, %g0 ! can we use unrolled loop?
				346	3: be 3f ! nope, less than one loop remains
				347	andcc %o1, 4, %g0 ! dest aligned on 4 or 8 byte boundary?
				348	be ccdbl + 4 ! 8 byte aligned, kick ass
				349	5: CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				350	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				351	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				352	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				353	10: EXT(5b, 10b, 20f) ! note for exception handling
				354	sub %g1, 128, %g1 ! detract from length
				355	addx %g0, %g7, %g7 ! add in last carry bit
				356	andcc %g1, 0xffffff80, %g0 ! more to csum?
				357	add %o0, 128, %o0 ! advance src ptr
				358	bne 5b ! we did not go negative, continue looping
				359	add %o1, 128, %o1 ! advance dest ptr
				360	3: andcc %g1, 0x70, %o2 ! can use table?
				361	ccmerge:be ccte ! nope, go and check for end cruft
				362	andcc %g1, 0xf, %o3 ! get low bits of length (clears carry btw)
				363	srl %o2, 1, %o4 ! begin negative offset computation
				364	sethi %hi(12f), %o5 ! set up table ptr end
				365	add %o0, %o2, %o0 ! advance src ptr
				366	sub %o5, %o4, %o5 ! continue table calculation
				367	sll %o2, 1, %g2 ! constant multiplies are fun...
				368	sub %o5, %g2, %o5 ! some more adjustments
				369	jmp %o5 + %lo(12f) ! jump into it, duff style, wheee...
				370	add %o1, %o2, %o1 ! advance dest ptr (carry is clear btw)
				371	cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
				372	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5)
				373	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5)
				374	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5)
				375	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
				376	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
				377	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
				378	12: EXT(cctbl, 12b, 22f) ! note for exception table handling
				379	addx %g0, %g7, %g7
				380	andcc %o3, 0xf, %g0 ! check for low bits set
				381	ccte: bne cc_end_cruft ! something left, handle it out of band
				382	andcc %o3, 8, %g0 ! begin checks for that code
				383	retl ! return
				384	mov %g7, %o0 ! give em the computed checksum
				385	ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				386	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				387	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				388	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
				389	11: EXT(ccdbl, 11b, 21f) ! note for exception table handling
				390	sub %g1, 128, %g1 ! detract from length
				391	addx %g0, %g7, %g7 ! add in last carry bit
				392	andcc %g1, 0xffffff80, %g0 ! more to csum?
				393	add %o0, 128, %o0 ! advance src ptr
				394	bne ccdbl ! we did not go negative, continue looping
				395	add %o1, 128, %o1 ! advance dest ptr
				396	b ccmerge ! finish it off, above
				397	andcc %g1, 0x70, %o2 ! can use table? (clears carry btw)
				398
				399	ccslow: cmp %g1, 0
				400	mov 0, %g5
				401	bleu 4f
				402	andcc %o0, 1, %o5
				403	be,a 1f
				404	srl %g1, 1, %g4
				405	sub %g1, 1, %g1
				406	EX(ldub [%o0], %g5, add %g1, 1)
				407	add %o0, 1, %o0
				408	EX2(stb %g5, [%o1])
				409	srl %g1, 1, %g4
				410	add %o1, 1, %o1
				411	1: cmp %g4, 0
				412	be,a 3f
				413	andcc %g1, 1, %g0
				414	andcc %o0, 2, %g0
				415	be,a 1f
				416	srl %g4, 1, %g4
				417	EX(lduh [%o0], %o4, add %g1, 0)
				418	sub %g1, 2, %g1
				419	srl %o4, 8, %g2
				420	sub %g4, 1, %g4
				421	EX2(stb %g2, [%o1])
				422	add %o4, %g5, %g5
				423	EX2(stb %o4, [%o1 + 1])
				424	add %o0, 2, %o0
				425	srl %g4, 1, %g4
				426	add %o1, 2, %o1
				427	1: cmp %g4, 0
				428	be,a 2f
				429	andcc %g1, 2, %g0
				430	EX3(ld [%o0], %o4)
				431	5: srl %o4, 24, %g2
				432	srl %o4, 16, %g3
				433	EX2(stb %g2, [%o1])
				434	srl %o4, 8, %g2
				435	EX2(stb %g3, [%o1 + 1])
				436	add %o0, 4, %o0
				437	EX2(stb %g2, [%o1 + 2])
				438	addcc %o4, %g5, %g5
				439	EX2(stb %o4, [%o1 + 3])
				440	addx %g5, %g0, %g5 ! I am now to lazy to optimize this (question it
				441	add %o1, 4, %o1 ! is worthy). Maybe some day - with the sll/srl
				442	subcc %g4, 1, %g4 ! tricks
				443	bne,a 5b
				444	EX3(ld [%o0], %o4)
				445	sll %g5, 16, %g2
				446	srl %g5, 16, %g5
				447	srl %g2, 16, %g2
				448	andcc %g1, 2, %g0
				449	add %g2, %g5, %g5
				450	2: be,a 3f
				451	andcc %g1, 1, %g0
				452	EX(lduh [%o0], %o4, and %g1, 3)
				453	andcc %g1, 1, %g0
				454	srl %o4, 8, %g2
				455	add %o0, 2, %o0
				456	EX2(stb %g2, [%o1])
				457	add %g5, %o4, %g5
				458	EX2(stb %o4, [%o1 + 1])
				459	add %o1, 2, %o1
				460	3: be,a 1f
				461	sll %g5, 16, %o4
				462	EX(ldub [%o0], %g2, add %g0, 1)
				463	sll %g2, 8, %o4
				464	EX2(stb %g2, [%o1])
				465	add %g5, %o4, %g5
				466	sll %g5, 16, %o4
				467	1: addcc %o4, %g5, %g5
				468	srl %g5, 16, %o4
				469	addx %g0, %o4, %g5
				470	orcc %o5, %g0, %g0
				471	be 4f
				472	srl %g5, 8, %o4
				473	and %g5, 0xff, %g2
				474	and %o4, 0xff, %o4
				475	sll %g2, 8, %g2
				476	or %g2, %o4, %g5
				477	4: addcc %g7, %g5, %g7
				478	retl
				479	addx %g0, %g7, %o0
				480	__csum_partial_copy_end:
				481
				482	/* We do these strange calculations for the csum_*_from_user case only, ie.
				483	* we only bother with faults on loads... */
				484
				485	/* o2 = ((g2%20)&3)*8
				486	* o3 = g1 - (g2/20)32 - o2 /
				487	20:
				488	cmp %g2, 20
				489	blu,a 1f
				490	and %g2, 3, %o2
				491	sub %g1, 32, %g1
				492	b 20b
				493	sub %g2, 20, %g2
				494	1:
				495	sll %o2, 3, %o2
				496	b 31f
				497	sub %g1, %o2, %o3
				498
				499	/* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8)
				500	* o3 = g1 - (g2/16)32 - o2 /
				501	21:
				502	andcc %g2, 15, %o3
				503	srl %g2, 4, %g2
				504	be,a 1f
				505	clr %o2
				506	add %o3, 1, %o3
				507	and %o3, 14, %o3
				508	sll %o3, 3, %o2
				509	1:
				510	sll %g2, 5, %g2
				511	sub %g1, %g2, %o3
				512	b 31f
				513	sub %o3, %o2, %o3
				514
				515	/* o0 += (g2/10)*16 - 0x70
				516	* 01 += (g2/10)*16 - 0x70
				517	* o2 = (g2 % 10) ? 8 : 0
				518	* o3 += 0x70 - (g2/10)16 - o2 /
				519	22:
				520	cmp %g2, 10
				521	blu,a 1f
				522	sub %o0, 0x70, %o0
				523	add %o0, 16, %o0
				524	add %o1, 16, %o1
				525	sub %o3, 16, %o3
				526	b 22b
				527	sub %g2, 10, %g2
				528	1:
				529	sub %o1, 0x70, %o1
				530	add %o3, 0x70, %o3
				531	clr %o2
				532	tst %g2
				533	bne,a 1f
				534	mov 8, %o2
				535	1:
				536	b 31f
				537	sub %o3, %o2, %o3
				538	96:
				539	and %g1, 3, %g1
				540	sll %g4, 2, %g4
				541	add %g1, %g4, %o3
				542	30:
				543	/* %o1 is dst
				544	* %o3 is # bytes to zero out
				545	* %o4 is faulting address
				546	* %o5 is %pc where fault occurred */
				547	clr %o2
				548	31:
				549	/* %o0 is src
				550	* %o1 is dst
				551	* %o2 is # of bytes to copy from src to dst
				552	* %o3 is # bytes to zero out
				553	* %o4 is faulting address
				554	* %o5 is %pc where fault occurred */
				555	save %sp, -104, %sp
				556	mov %i5, %o0
				557	mov %i7, %o1
				558	mov %i4, %o2
				559	call lookup_fault
				560	mov %g7, %i4
				561	cmp %o0, 2
				562	bne 1f
				563	add %g0, -EFAULT, %i5
				564	tst %i2
				565	be 2f
				566	mov %i0, %o1
				567	mov %i1, %o0
				568	5:
David S. Miller	4d14a45	2009-12-10 23:32:10 -0800	[diff] [blame]	569	call memcpy
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	570	mov %i2, %o2
				571	tst %o0
				572	bne,a 2f
				573	add %i3, %i2, %i3
				574	add %i1, %i2, %i1
				575	2:
				576	mov %i1, %o0
				577	6:
				578	call __bzero
				579	mov %i3, %o1
				580	1:
				581	ld [%sp + 168], %o2 ! struct_ptr of parent
				582	st %i5, [%o2]
				583	ret
				584	restore
				585
				586	.section __ex_table,#alloc
				587	.align 4
				588	.word 5b,2
				589	.word 6b,2