Blame - arch/sh/lib/checksum.S - kernel/msm-4.9

blob: 7c50dfe68c07707cab8b8e8793a98a423741b68e [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
				2	*
				3	* INET An implementation of the TCP/IP protocol suite for the LINUX
				4	* operating system. INET is implemented using the BSD Socket
				5	* interface as the means of communication with the user level.
				6	*
				7	* IP/TCP/UDP checksumming routines
				8	*
				9	* Authors: Jorge Cwik, <jorge@laser.satlink.net>
				10	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
				11	* Tom May, <ftom@netcom.com>
				12	* Pentium Pro/II routines:
				13	* Alexander Kjeldaas <astor@guardian.no>
				14	* Finn Arne Gangstad <finnag@guardian.no>
				15	* Lots of code moved from tcp.c and ip.c; see those files
				16	* for more names.
				17	*
				18	* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
				19	* handling.
				20	* Andi Kleen, add zeroing on error
				21	* converted to pure assembler
				22	*
				23	* SuperH version: Copyright (C) 1999 Niibe Yutaka
				24	*
				25	* This program is free software; you can redistribute it and/or
				26	* modify it under the terms of the GNU General Public License
				27	* as published by the Free Software Foundation; either version
				28	* 2 of the License, or (at your option) any later version.
				29	*/
				30
				31	#include <asm/errno.h>
				32	#include <linux/linkage.h>
				33
				34	/*
				35	* computes a partial checksum, e.g. for TCP/UDP fragments
				36	*/
				37
				38	/*
				39	* unsigned int csum_partial(const unsigned char *buf, int len,
				40	* unsigned int sum);
				41	*/
				42
				43	.text
				44	ENTRY(csum_partial)
				45	/*
				46	* Experiments with Ethernet and SLIP connections show that buff
				47	* is aligned on either a 2-byte or 4-byte boundary. We get at
				48	* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
				49	* Fortunately, it is easy to convert 2-byte alignment to 4-byte
				50	* alignment for the unrolled loop.
				51	*/
				52	mov r5, r1
				53	mov r4, r0
				54	tst #2, r0 ! Check alignment.
				55	bt 2f ! Jump if alignment is ok.
				56	!
				57	add #-2, r5 ! Alignment uses up two bytes.
				58	cmp/pz r5 !
				59	bt/s 1f ! Jump if we had at least two bytes.
				60	clrt
				61	bra 6f
				62	add #2, r5 ! r5 was < 2. Deal with it.
				63	1:
				64	mov r5, r1 ! Save new len for later use.
				65	mov.w @r4+, r0
				66	extu.w r0, r0
				67	addc r0, r6
				68	bf 2f
				69	add #1, r6
				70	2:
				71	mov #-5, r0
				72	shld r0, r5
				73	tst r5, r5
				74	bt/s 4f ! if it's =0, go to 4f
				75	clrt
				76	.align 2
				77	3:
				78	mov.l @r4+, r0
				79	mov.l @r4+, r2
				80	mov.l @r4+, r3
				81	addc r0, r6
				82	mov.l @r4+, r0
				83	addc r2, r6
				84	mov.l @r4+, r2
				85	addc r3, r6
				86	mov.l @r4+, r3
				87	addc r0, r6
				88	mov.l @r4+, r0
				89	addc r2, r6
				90	mov.l @r4+, r2
				91	addc r3, r6
				92	addc r0, r6
				93	addc r2, r6
				94	movt r0
				95	dt r5
				96	bf/s 3b
				97	cmp/eq #1, r0
				98	! here, we know r5==0
				99	addc r5, r6 ! add carry to r6
				100	4:
				101	mov r1, r0
				102	and #0x1c, r0
				103	tst r0, r0
				104	bt/s 6f
				105	mov r0, r5
				106	shlr2 r5
				107	mov #0, r2
				108	5:
				109	addc r2, r6
				110	mov.l @r4+, r2
				111	movt r0
				112	dt r5
				113	bf/s 5b
				114	cmp/eq #1, r0
				115	addc r2, r6
				116	addc r5, r6 ! r5==0 here, so it means add carry-bit
				117	6:
				118	mov r1, r5
				119	mov #3, r0
				120	and r0, r5
				121	tst r5, r5
				122	bt 9f ! if it's =0 go to 9f
				123	mov #2, r1
				124	cmp/hs r1, r5
				125	bf 7f
				126	mov.w @r4+, r0
				127	extu.w r0, r0
				128	cmp/eq r1, r5
				129	bt/s 8f
				130	clrt
				131	shll16 r0
				132	addc r0, r6
				133	7:
				134	mov.b @r4+, r0
				135	extu.b r0, r0
				136	#ifndef __LITTLE_ENDIAN__
				137	shll8 r0
				138	#endif
				139	8:
				140	addc r0, r6
				141	mov #0, r0
				142	addc r0, r6
				143	9:
				144	rts
				145	mov r6, r0
				146
				147	/*
				148	unsigned int csum_partial_copy_generic (const char src, char dst, int len,
				149	int sum, int src_err_ptr, int dst_err_ptr)
				150	*/
				151
				152	/*
				153	* Copy from ds while checksumming, otherwise like csum_partial
				154	*
				155	* The macros SRC and DST specify the type of access for the instruction.
				156	* thus we can call a custom exception handler for all access types.
				157	*
				158	* FIXME: could someone double-check whether I haven't mixed up some SRC and
				159	* DST definitions? It's damn hard to trigger all cases. I hope I got
				160	* them all but there's no guarantee.
				161	*/
				162
				163	#define SRC(...) \
				164	9999: __VA_ARGS__ ; \
				165	.section __ex_table, "a"; \
				166	.long 9999b, 6001f ; \
				167	.previous
				168
				169	#define DST(...) \
				170	9999: __VA_ARGS__ ; \
				171	.section __ex_table, "a"; \
				172	.long 9999b, 6002f ; \
				173	.previous
				174
				175	!
				176	! r4: const char *SRC
				177	! r5: char *DST
				178	! r6: int LEN
				179	! r7: int SUM
				180	!
				181	! on stack:
				182	! int *SRC_ERR_PTR
				183	! int *DST_ERR_PTR
				184	!
				185	ENTRY(csum_partial_copy_generic)
				186	mov.l r5,@-r15
				187	mov.l r6,@-r15
				188
				189	mov #3,r0 ! Check src and dest are equally aligned
				190	mov r4,r1
				191	and r0,r1
				192	and r5,r0
				193	cmp/eq r1,r0
				194	bf 3f ! Different alignments, use slow version
				195	tst #1,r0 ! Check dest word aligned
				196	bf 3f ! If not, do it the slow way
				197
				198	mov #2,r0
				199	tst r0,r5 ! Check dest alignment.
				200	bt 2f ! Jump if alignment is ok.
				201	add #-2,r6 ! Alignment uses up two bytes.
				202	cmp/pz r6 ! Jump if we had at least two bytes.
				203	bt/s 1f
				204	clrt
				205	bra 4f
				206	add #2,r6 ! r6 was < 2. Deal with it.
				207
				208	3: ! Handle different src and dest alignments.
				209	! This is not common, so simple byte by byte copy will do.
				210	mov r6,r2
				211	shlr r6
				212	tst r6,r6
				213	bt 4f
				214	clrt
				215	.align 2
				216	5:
				217	SRC( mov.b @r4+,r1 )
				218	SRC( mov.b @r4+,r0 )
				219	extu.b r1,r1
				220	DST( mov.b r1,@r5 )
				221	DST( mov.b r0,@(1,r5) )
				222	extu.b r0,r0
				223	add #2,r5
				224
				225	#ifdef __LITTLE_ENDIAN__
				226	shll8 r0
				227	#else
				228	shll8 r1
				229	#endif
				230	or r1,r0
				231
				232	addc r0,r7
				233	movt r0
				234	dt r6
				235	bf/s 5b
				236	cmp/eq #1,r0
				237	mov #0,r0
				238	addc r0, r7
				239
				240	mov r2, r0
				241	tst #1, r0
				242	bt 7f
				243	bra 5f
				244	clrt
				245
				246	! src and dest equally aligned, but to a two byte boundary.
				247	! Handle first two bytes as a special case
				248	.align 2
				249	1:
				250	SRC( mov.w @r4+,r0 )
				251	DST( mov.w r0,@r5 )
				252	add #2,r5
				253	extu.w r0,r0
				254	addc r0,r7
				255	mov #0,r0
				256	addc r0,r7
				257	2:
				258	mov r6,r2
				259	mov #-5,r0
				260	shld r0,r6
				261	tst r6,r6
				262	bt/s 2f
				263	clrt
				264	.align 2
				265	1:
				266	SRC( mov.l @r4+,r0 )
				267	SRC( mov.l @r4+,r1 )
				268	addc r0,r7
				269	DST( mov.l r0,@r5 )
				270	DST( mov.l r1,@(4,r5) )
				271	addc r1,r7
				272
				273	SRC( mov.l @r4+,r0 )
				274	SRC( mov.l @r4+,r1 )
				275	addc r0,r7
				276	DST( mov.l r0,@(8,r5) )
				277	DST( mov.l r1,@(12,r5) )
				278	addc r1,r7
				279
				280	SRC( mov.l @r4+,r0 )
				281	SRC( mov.l @r4+,r1 )
				282	addc r0,r7
				283	DST( mov.l r0,@(16,r5) )
				284	DST( mov.l r1,@(20,r5) )
				285	addc r1,r7
				286
				287	SRC( mov.l @r4+,r0 )
				288	SRC( mov.l @r4+,r1 )
				289	addc r0,r7
				290	DST( mov.l r0,@(24,r5) )
				291	DST( mov.l r1,@(28,r5) )
				292	addc r1,r7
				293	add #32,r5
				294	movt r0
				295	dt r6
				296	bf/s 1b
				297	cmp/eq #1,r0
				298	mov #0,r0
				299	addc r0,r7
				300
				301	2: mov r2,r6
				302	mov #0x1c,r0
				303	and r0,r6
				304	cmp/pl r6
				305	bf/s 4f
				306	clrt
				307	shlr2 r6
				308	3:
				309	SRC( mov.l @r4+,r0 )
				310	addc r0,r7
				311	DST( mov.l r0,@r5 )
				312	add #4,r5
				313	movt r0
				314	dt r6
				315	bf/s 3b
				316	cmp/eq #1,r0
				317	mov #0,r0
				318	addc r0,r7
				319	4: mov r2,r6
				320	mov #3,r0
				321	and r0,r6
				322	cmp/pl r6
				323	bf 7f
				324	mov #2,r1
				325	cmp/hs r1,r6
				326	bf 5f
				327	SRC( mov.w @r4+,r0 )
				328	DST( mov.w r0,@r5 )
				329	extu.w r0,r0
				330	add #2,r5
				331	cmp/eq r1,r6
				332	bt/s 6f
				333	clrt
				334	shll16 r0
				335	addc r0,r7
				336	5:
				337	SRC( mov.b @r4+,r0 )
				338	DST( mov.b r0,@r5 )
				339	extu.b r0,r0
				340	#ifndef __LITTLE_ENDIAN__
				341	shll8 r0
				342	#endif
				343	6: addc r0,r7
				344	mov #0,r0
				345	addc r0,r7
				346	7:
				347	5000:
				348
				349	# Exception handler:
				350	.section .fixup, "ax"
				351
				352	6001:
				353	mov.l @(8,r15),r0 ! src_err_ptr
				354	mov #-EFAULT,r1
				355	mov.l r1,@r0
				356
				357	! zero the complete destination - computing the rest
				358	! is too much work
				359	mov.l @(4,r15),r5 ! dst
				360	mov.l @r15,r6 ! len
				361	mov #0,r7
				362	1: mov.b r7,@r5
				363	dt r6
				364	bf/s 1b
				365	add #1,r5
				366	mov.l 8000f,r0
				367	jmp @r0
				368	nop
				369	.align 2
				370	8000: .long 5000b
				371
				372	6002:
				373	mov.l @(12,r15),r0 ! dst_err_ptr
				374	mov #-EFAULT,r1
				375	mov.l r1,@r0
				376	mov.l 8001f,r0
				377	jmp @r0
				378	nop
				379	.align 2
				380	8001: .long 5000b
				381
				382	.previous
				383	add #8,r15
				384	rts
				385	mov r7,r0