Blame - arch/sh/lib/checksum.S - kernel/msm

blob: cbdd0d40e545f0fa0d5cea817a472f46175f5a47 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
				2	*
				3	* INET An implementation of the TCP/IP protocol suite for the LINUX
				4	* operating system. INET is implemented using the BSD Socket
				5	* interface as the means of communication with the user level.
				6	*
				7	* IP/TCP/UDP checksumming routines
				8	*
				9	* Authors: Jorge Cwik, <jorge@laser.satlink.net>
				10	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
				11	* Tom May, <ftom@netcom.com>
				12	* Pentium Pro/II routines:
				13	* Alexander Kjeldaas <astor@guardian.no>
				14	* Finn Arne Gangstad <finnag@guardian.no>
				15	* Lots of code moved from tcp.c and ip.c; see those files
				16	* for more names.
				17	*
				18	* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
				19	* handling.
				20	* Andi Kleen, add zeroing on error
				21	* converted to pure assembler
				22	*
				23	* SuperH version: Copyright (C) 1999 Niibe Yutaka
				24	*
				25	* This program is free software; you can redistribute it and/or
				26	* modify it under the terms of the GNU General Public License
				27	* as published by the Free Software Foundation; either version
				28	* 2 of the License, or (at your option) any later version.
				29	*/
				30
				31	#include <asm/errno.h>
				32	#include <linux/linkage.h>
				33
				34	/*
				35	* computes a partial checksum, e.g. for TCP/UDP fragments
				36	*/
				37
				38	/*
				39	* unsigned int csum_partial(const unsigned char *buf, int len,
				40	* unsigned int sum);
				41	*/
				42
				43	.text
				44	ENTRY(csum_partial)
				45	/*
				46	* Experiments with Ethernet and SLIP connections show that buff
				47	* is aligned on either a 2-byte or 4-byte boundary. We get at
				48	* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
				49	* Fortunately, it is easy to convert 2-byte alignment to 4-byte
				50	* alignment for the unrolled loop.
				51	*/
				52	mov r5, r1
				53	mov r4, r0
				54	tst #2, r0 ! Check alignment.
				55	bt 2f ! Jump if alignment is ok.
				56	!
				57	add #-2, r5 ! Alignment uses up two bytes.
				58	cmp/pz r5 !
				59	bt/s 1f ! Jump if we had at least two bytes.
				60	clrt
				61	bra 6f
				62	add #2, r5 ! r5 was < 2. Deal with it.
				63	1:
				64	mov r5, r1 ! Save new len for later use.
				65	mov.w @r4+, r0
				66	extu.w r0, r0
				67	addc r0, r6
				68	bf 2f
				69	add #1, r6
				70	2:
				71	mov #-5, r0
				72	shld r0, r5
				73	tst r5, r5
				74	bt/s 4f ! if it's =0, go to 4f
				75	clrt
				76	.align 2
				77	3:
				78	mov.l @r4+, r0
				79	mov.l @r4+, r2
				80	mov.l @r4+, r3
				81	addc r0, r6
				82	mov.l @r4+, r0
				83	addc r2, r6
				84	mov.l @r4+, r2
				85	addc r3, r6
				86	mov.l @r4+, r3
				87	addc r0, r6
				88	mov.l @r4+, r0
				89	addc r2, r6
				90	mov.l @r4+, r2
				91	addc r3, r6
				92	addc r0, r6
				93	addc r2, r6
				94	movt r0
				95	dt r5
				96	bf/s 3b
				97	cmp/eq #1, r0
				98	! here, we know r5==0
				99	addc r5, r6 ! add carry to r6
				100	4:
				101	mov r1, r0
				102	and #0x1c, r0
				103	tst r0, r0
				104	bt/s 6f
				105	mov r0, r5
				106	shlr2 r5
				107	mov #0, r2
				108	5:
				109	addc r2, r6
				110	mov.l @r4+, r2
				111	movt r0
				112	dt r5
				113	bf/s 5b
				114	cmp/eq #1, r0
				115	addc r2, r6
				116	addc r5, r6 ! r5==0 here, so it means add carry-bit
				117	6:
				118	mov r1, r5
				119	mov #3, r0
				120	and r0, r5
				121	tst r5, r5
				122	bt 9f ! if it's =0 go to 9f
				123	mov #2, r1
				124	cmp/hs r1, r5
				125	bf 7f
				126	mov.w @r4+, r0
				127	extu.w r0, r0
				128	cmp/eq r1, r5
				129	bt/s 8f
				130	clrt
				131	shll16 r0
				132	addc r0, r6
				133	7:
				134	mov.b @r4+, r0
				135	extu.b r0, r0
				136	#ifndef __LITTLE_ENDIAN__
				137	shll8 r0
				138	#endif
				139	8:
				140	addc r0, r6
				141	mov #0, r0
				142	addc r0, r6
				143	9:
				144	rts
				145	mov r6, r0
				146
				147	/*
				148	unsigned int csum_partial_copy_generic (const char src, char dst, int len,
				149	int sum, int src_err_ptr, int dst_err_ptr)
				150	*/
				151
				152	/*
				153	* Copy from ds while checksumming, otherwise like csum_partial
				154	*
				155	* The macros SRC and DST specify the type of access for the instruction.
				156	* thus we can call a custom exception handler for all access types.
				157	*
				158	* FIXME: could someone double-check whether I haven't mixed up some SRC and
				159	* DST definitions? It's damn hard to trigger all cases. I hope I got
				160	* them all but there's no guarantee.
				161	*/
				162
				163	#define SRC(...) \
				164	9999: __VA_ARGS__ ; \
				165	.section __ex_table, "a"; \
				166	.long 9999b, 6001f ; \
				167	.previous
				168
				169	#define DST(...) \
				170	9999: __VA_ARGS__ ; \
				171	.section __ex_table, "a"; \
				172	.long 9999b, 6002f ; \
				173	.previous
				174
				175	!
				176	! r4: const char *SRC
				177	! r5: char *DST
				178	! r6: int LEN
				179	! r7: int SUM
				180	!
				181	! on stack:
				182	! int *SRC_ERR_PTR
				183	! int *DST_ERR_PTR
				184	!
				185	ENTRY(csum_partial_copy_generic)
				186	mov.l r5,@-r15
				187	mov.l r6,@-r15
				188
				189	mov #3,r0 ! Check src and dest are equally aligned
				190	mov r4,r1
				191	and r0,r1
				192	and r5,r0
				193	cmp/eq r1,r0
				194	bf 3f ! Different alignments, use slow version
				195	tst #1,r0 ! Check dest word aligned
				196	bf 3f ! If not, do it the slow way
				197
				198	mov #2,r0
				199	tst r0,r5 ! Check dest alignment.
				200	bt 2f ! Jump if alignment is ok.
				201	add #-2,r6 ! Alignment uses up two bytes.
				202	cmp/pz r6 ! Jump if we had at least two bytes.
				203	bt/s 1f
				204	clrt
Ollie Wild	24ab54c	2006-09-27 14:46:24 +0900	[diff] [blame]	205	add #2,r6 ! r6 was < 2. Deal with it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	206	bra 4f
Ollie Wild	24ab54c	2006-09-27 14:46:24 +0900	[diff] [blame]	207	mov r6,r2
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	208
				209	3: ! Handle different src and dest alignments.
				210	! This is not common, so simple byte by byte copy will do.
				211	mov r6,r2
				212	shlr r6
				213	tst r6,r6
				214	bt 4f
				215	clrt
				216	.align 2
				217	5:
				218	SRC( mov.b @r4+,r1 )
				219	SRC( mov.b @r4+,r0 )
				220	extu.b r1,r1
				221	DST( mov.b r1,@r5 )
				222	DST( mov.b r0,@(1,r5) )
				223	extu.b r0,r0
				224	add #2,r5
				225
				226	#ifdef __LITTLE_ENDIAN__
				227	shll8 r0
				228	#else
				229	shll8 r1
				230	#endif
				231	or r1,r0
				232
				233	addc r0,r7
				234	movt r0
				235	dt r6
				236	bf/s 5b
				237	cmp/eq #1,r0
				238	mov #0,r0
				239	addc r0, r7
				240
				241	mov r2, r0
				242	tst #1, r0
				243	bt 7f
				244	bra 5f
				245	clrt
				246
				247	! src and dest equally aligned, but to a two byte boundary.
				248	! Handle first two bytes as a special case
				249	.align 2
				250	1:
				251	SRC( mov.w @r4+,r0 )
				252	DST( mov.w r0,@r5 )
				253	add #2,r5
				254	extu.w r0,r0
				255	addc r0,r7
				256	mov #0,r0
				257	addc r0,r7
				258	2:
				259	mov r6,r2
				260	mov #-5,r0
				261	shld r0,r6
				262	tst r6,r6
				263	bt/s 2f
				264	clrt
				265	.align 2
				266	1:
				267	SRC( mov.l @r4+,r0 )
				268	SRC( mov.l @r4+,r1 )
				269	addc r0,r7
				270	DST( mov.l r0,@r5 )
				271	DST( mov.l r1,@(4,r5) )
				272	addc r1,r7
				273
				274	SRC( mov.l @r4+,r0 )
				275	SRC( mov.l @r4+,r1 )
				276	addc r0,r7
				277	DST( mov.l r0,@(8,r5) )
				278	DST( mov.l r1,@(12,r5) )
				279	addc r1,r7
				280
				281	SRC( mov.l @r4+,r0 )
				282	SRC( mov.l @r4+,r1 )
				283	addc r0,r7
				284	DST( mov.l r0,@(16,r5) )
				285	DST( mov.l r1,@(20,r5) )
				286	addc r1,r7
				287
				288	SRC( mov.l @r4+,r0 )
				289	SRC( mov.l @r4+,r1 )
				290	addc r0,r7
				291	DST( mov.l r0,@(24,r5) )
				292	DST( mov.l r1,@(28,r5) )
				293	addc r1,r7
				294	add #32,r5
				295	movt r0
				296	dt r6
				297	bf/s 1b
				298	cmp/eq #1,r0
				299	mov #0,r0
				300	addc r0,r7
				301
				302	2: mov r2,r6
				303	mov #0x1c,r0
				304	and r0,r6
				305	cmp/pl r6
				306	bf/s 4f
				307	clrt
				308	shlr2 r6
				309	3:
				310	SRC( mov.l @r4+,r0 )
				311	addc r0,r7
				312	DST( mov.l r0,@r5 )
				313	add #4,r5
				314	movt r0
				315	dt r6
				316	bf/s 3b
				317	cmp/eq #1,r0
				318	mov #0,r0
				319	addc r0,r7
				320	4: mov r2,r6
				321	mov #3,r0
				322	and r0,r6
				323	cmp/pl r6
				324	bf 7f
				325	mov #2,r1
				326	cmp/hs r1,r6
				327	bf 5f
				328	SRC( mov.w @r4+,r0 )
				329	DST( mov.w r0,@r5 )
				330	extu.w r0,r0
				331	add #2,r5
				332	cmp/eq r1,r6
				333	bt/s 6f
				334	clrt
				335	shll16 r0
				336	addc r0,r7
				337	5:
				338	SRC( mov.b @r4+,r0 )
				339	DST( mov.b r0,@r5 )
				340	extu.b r0,r0
				341	#ifndef __LITTLE_ENDIAN__
				342	shll8 r0
				343	#endif
				344	6: addc r0,r7
				345	mov #0,r0
				346	addc r0,r7
				347	7:
				348	5000:
				349
				350	# Exception handler:
				351	.section .fixup, "ax"
				352
				353	6001:
				354	mov.l @(8,r15),r0 ! src_err_ptr
				355	mov #-EFAULT,r1
				356	mov.l r1,@r0
				357
				358	! zero the complete destination - computing the rest
				359	! is too much work
				360	mov.l @(4,r15),r5 ! dst
				361	mov.l @r15,r6 ! len
				362	mov #0,r7
				363	1: mov.b r7,@r5
				364	dt r6
				365	bf/s 1b
				366	add #1,r5
				367	mov.l 8000f,r0
				368	jmp @r0
				369	nop
				370	.align 2
				371	8000: .long 5000b
				372
				373	6002:
				374	mov.l @(12,r15),r0 ! dst_err_ptr
				375	mov #-EFAULT,r1
				376	mov.l r1,@r0
				377	mov.l 8001f,r0
				378	jmp @r0
				379	nop
				380	.align 2
				381	8001: .long 5000b
				382
				383	.previous
				384	add #8,r15
				385	rts
				386	mov r7,r0