Blame - arch/sh/lib/checksum.S - kernel/msm-4.9

blob: 356c8ec928930fc3d4c00487767c94ee321d67b5 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
				2	*
				3	* INET An implementation of the TCP/IP protocol suite for the LINUX
				4	* operating system. INET is implemented using the BSD Socket
				5	* interface as the means of communication with the user level.
				6	*
				7	* IP/TCP/UDP checksumming routines
				8	*
				9	* Authors: Jorge Cwik, <jorge@laser.satlink.net>
				10	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
				11	* Tom May, <ftom@netcom.com>
				12	* Pentium Pro/II routines:
				13	* Alexander Kjeldaas <astor@guardian.no>
				14	* Finn Arne Gangstad <finnag@guardian.no>
				15	* Lots of code moved from tcp.c and ip.c; see those files
				16	* for more names.
				17	*
				18	* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
				19	* handling.
				20	* Andi Kleen, add zeroing on error
				21	* converted to pure assembler
				22	*
				23	* SuperH version: Copyright (C) 1999 Niibe Yutaka
				24	*
				25	* This program is free software; you can redistribute it and/or
				26	* modify it under the terms of the GNU General Public License
				27	* as published by the Free Software Foundation; either version
				28	* 2 of the License, or (at your option) any later version.
				29	*/
				30
				31	#include <asm/errno.h>
				32	#include <linux/linkage.h>
				33
				34	/*
				35	* computes a partial checksum, e.g. for TCP/UDP fragments
				36	*/
				37
				38	/*
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	39	* asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	40	*/
				41
				42	.text
				43	ENTRY(csum_partial)
				44	/*
				45	* Experiments with Ethernet and SLIP connections show that buff
				46	* is aligned on either a 2-byte or 4-byte boundary. We get at
				47	* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
				48	* Fortunately, it is easy to convert 2-byte alignment to 4-byte
				49	* alignment for the unrolled loop.
				50	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	51	mov r4, r0
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	52	tst #3, r0 ! Check alignment.
				53	bt/s 2f ! Jump if alignment is ok.
				54	mov r4, r7 ! Keep a copy to check for alignment
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	55	!
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	56	tst #1, r0 ! Check alignment.
				57	bt 21f ! Jump if alignment is boundary of 2bytes.
				58
				59	! buf is odd
				60	tst r5, r5
				61	add #-1, r5
				62	bt 9f
				63	mov.b @r4+, r0
				64	extu.b r0, r0
				65	addc r0, r6 ! t=0 from previous tst
				66	mov r6, r0
				67	shll8 r6
				68	shlr16 r0
				69	shlr8 r0
				70	or r0, r6
				71	mov r4, r0
				72	tst #2, r0
				73	bt 2f
				74	21:
				75	! buf is 2 byte aligned (len could be 0)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	76	add #-2, r5 ! Alignment uses up two bytes.
				77	cmp/pz r5 !
				78	bt/s 1f ! Jump if we had at least two bytes.
				79	clrt
				80	bra 6f
				81	add #2, r5 ! r5 was < 2. Deal with it.
				82	1:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	83	mov.w @r4+, r0
				84	extu.w r0, r0
				85	addc r0, r6
				86	bf 2f
				87	add #1, r6
				88	2:
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	89	! buf is 4 byte aligned (len could be 0)
				90	mov r5, r1
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	91	mov #-5, r0
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	92	shld r0, r1
				93	tst r1, r1
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	94	bt/s 4f ! if it's =0, go to 4f
				95	clrt
				96	.align 2
				97	3:
				98	mov.l @r4+, r0
				99	mov.l @r4+, r2
				100	mov.l @r4+, r3
				101	addc r0, r6
				102	mov.l @r4+, r0
				103	addc r2, r6
				104	mov.l @r4+, r2
				105	addc r3, r6
				106	mov.l @r4+, r3
				107	addc r0, r6
				108	mov.l @r4+, r0
				109	addc r2, r6
				110	mov.l @r4+, r2
				111	addc r3, r6
				112	addc r0, r6
				113	addc r2, r6
				114	movt r0
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	115	dt r1
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	116	bf/s 3b
				117	cmp/eq #1, r0
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	118	! here, we know r1==0
				119	addc r1, r6 ! add carry to r6
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	120	4:
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	121	mov r5, r0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	122	and #0x1c, r0
				123	tst r0, r0
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	124	bt 6f
				125	! 4 bytes or more remaining
				126	mov r0, r1
				127	shlr2 r1
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	128	mov #0, r2
				129	5:
				130	addc r2, r6
				131	mov.l @r4+, r2
				132	movt r0
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	133	dt r1
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	134	bf/s 5b
				135	cmp/eq #1, r0
				136	addc r2, r6
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	137	addc r1, r6 ! r1==0 here, so it means add carry-bit
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	138	6:
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	139	! 3 bytes or less remaining
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	140	mov #3, r0
				141	and r0, r5
				142	tst r5, r5
				143	bt 9f ! if it's =0 go to 9f
				144	mov #2, r1
				145	cmp/hs r1, r5
				146	bf 7f
				147	mov.w @r4+, r0
				148	extu.w r0, r0
				149	cmp/eq r1, r5
				150	bt/s 8f
				151	clrt
				152	shll16 r0
				153	addc r0, r6
				154	7:
				155	mov.b @r4+, r0
				156	extu.b r0, r0
				157	#ifndef __LITTLE_ENDIAN__
				158	shll8 r0
				159	#endif
				160	8:
				161	addc r0, r6
				162	mov #0, r0
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	163	addc r0, r6
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	164	9:
Stuart Menefy	cadc4e1	2008-12-12 18:34:38 +0000	[diff] [blame]	165	! Check if the buffer was misaligned, if so realign sum
				166	mov r7, r0
				167	tst #1, r0
				168	bt 10f
				169	mov r6, r0
				170	shll8 r6
				171	shlr16 r0
				172	shlr8 r0
				173	or r0, r6
				174	10:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	175	rts
				176	mov r6, r0
				177
				178	/*
				179	unsigned int csum_partial_copy_generic (const char src, char dst, int len,
				180	int sum, int src_err_ptr, int dst_err_ptr)
				181	*/
				182
				183	/*
				184	* Copy from ds while checksumming, otherwise like csum_partial
				185	*
				186	* The macros SRC and DST specify the type of access for the instruction.
				187	* thus we can call a custom exception handler for all access types.
				188	*
				189	* FIXME: could someone double-check whether I haven't mixed up some SRC and
				190	* DST definitions? It's damn hard to trigger all cases. I hope I got
				191	* them all but there's no guarantee.
				192	*/
				193
				194	#define SRC(...) \
				195	9999: __VA_ARGS__ ; \
				196	.section __ex_table, "a"; \
				197	.long 9999b, 6001f ; \
				198	.previous
				199
				200	#define DST(...) \
				201	9999: __VA_ARGS__ ; \
				202	.section __ex_table, "a"; \
				203	.long 9999b, 6002f ; \
				204	.previous
				205
				206	!
				207	! r4: const char *SRC
				208	! r5: char *DST
				209	! r6: int LEN
				210	! r7: int SUM
				211	!
				212	! on stack:
				213	! int *SRC_ERR_PTR
				214	! int *DST_ERR_PTR
				215	!
				216	ENTRY(csum_partial_copy_generic)
				217	mov.l r5,@-r15
				218	mov.l r6,@-r15
				219
				220	mov #3,r0 ! Check src and dest are equally aligned
				221	mov r4,r1
				222	and r0,r1
				223	and r5,r0
				224	cmp/eq r1,r0
				225	bf 3f ! Different alignments, use slow version
				226	tst #1,r0 ! Check dest word aligned
				227	bf 3f ! If not, do it the slow way
				228
				229	mov #2,r0
				230	tst r0,r5 ! Check dest alignment.
				231	bt 2f ! Jump if alignment is ok.
				232	add #-2,r6 ! Alignment uses up two bytes.
				233	cmp/pz r6 ! Jump if we had at least two bytes.
				234	bt/s 1f
				235	clrt
Ollie Wild	24ab54c	2006-09-27 14:46:24 +0900	[diff] [blame]	236	add #2,r6 ! r6 was < 2. Deal with it.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	237	bra 4f
Ollie Wild	24ab54c	2006-09-27 14:46:24 +0900	[diff] [blame]	238	mov r6,r2
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	239
				240	3: ! Handle different src and dest alignments.
				241	! This is not common, so simple byte by byte copy will do.
				242	mov r6,r2
				243	shlr r6
				244	tst r6,r6
				245	bt 4f
				246	clrt
				247	.align 2
				248	5:
				249	SRC( mov.b @r4+,r1 )
				250	SRC( mov.b @r4+,r0 )
				251	extu.b r1,r1
				252	DST( mov.b r1,@r5 )
				253	DST( mov.b r0,@(1,r5) )
				254	extu.b r0,r0
				255	add #2,r5
				256
				257	#ifdef __LITTLE_ENDIAN__
				258	shll8 r0
				259	#else
				260	shll8 r1
				261	#endif
				262	or r1,r0
				263
				264	addc r0,r7
				265	movt r0
				266	dt r6
				267	bf/s 5b
				268	cmp/eq #1,r0
				269	mov #0,r0
				270	addc r0, r7
				271
				272	mov r2, r0
				273	tst #1, r0
				274	bt 7f
				275	bra 5f
				276	clrt
				277
				278	! src and dest equally aligned, but to a two byte boundary.
				279	! Handle first two bytes as a special case
				280	.align 2
				281	1:
				282	SRC( mov.w @r4+,r0 )
				283	DST( mov.w r0,@r5 )
				284	add #2,r5
				285	extu.w r0,r0
				286	addc r0,r7
				287	mov #0,r0
				288	addc r0,r7
				289	2:
				290	mov r6,r2
				291	mov #-5,r0
				292	shld r0,r6
				293	tst r6,r6
				294	bt/s 2f
				295	clrt
				296	.align 2
				297	1:
				298	SRC( mov.l @r4+,r0 )
				299	SRC( mov.l @r4+,r1 )
				300	addc r0,r7
				301	DST( mov.l r0,@r5 )
				302	DST( mov.l r1,@(4,r5) )
				303	addc r1,r7
				304
				305	SRC( mov.l @r4+,r0 )
				306	SRC( mov.l @r4+,r1 )
				307	addc r0,r7
				308	DST( mov.l r0,@(8,r5) )
				309	DST( mov.l r1,@(12,r5) )
				310	addc r1,r7
				311
				312	SRC( mov.l @r4+,r0 )
				313	SRC( mov.l @r4+,r1 )
				314	addc r0,r7
				315	DST( mov.l r0,@(16,r5) )
				316	DST( mov.l r1,@(20,r5) )
				317	addc r1,r7
				318
				319	SRC( mov.l @r4+,r0 )
				320	SRC( mov.l @r4+,r1 )
				321	addc r0,r7
				322	DST( mov.l r0,@(24,r5) )
				323	DST( mov.l r1,@(28,r5) )
				324	addc r1,r7
				325	add #32,r5
				326	movt r0
				327	dt r6
				328	bf/s 1b
				329	cmp/eq #1,r0
				330	mov #0,r0
				331	addc r0,r7
				332
				333	2: mov r2,r6
				334	mov #0x1c,r0
				335	and r0,r6
				336	cmp/pl r6
				337	bf/s 4f
				338	clrt
				339	shlr2 r6
				340	3:
				341	SRC( mov.l @r4+,r0 )
				342	addc r0,r7
				343	DST( mov.l r0,@r5 )
				344	add #4,r5
				345	movt r0
				346	dt r6
				347	bf/s 3b
				348	cmp/eq #1,r0
				349	mov #0,r0
				350	addc r0,r7
				351	4: mov r2,r6
				352	mov #3,r0
				353	and r0,r6
				354	cmp/pl r6
				355	bf 7f
				356	mov #2,r1
				357	cmp/hs r1,r6
				358	bf 5f
				359	SRC( mov.w @r4+,r0 )
				360	DST( mov.w r0,@r5 )
				361	extu.w r0,r0
				362	add #2,r5
				363	cmp/eq r1,r6
				364	bt/s 6f
				365	clrt
				366	shll16 r0
				367	addc r0,r7
				368	5:
				369	SRC( mov.b @r4+,r0 )
				370	DST( mov.b r0,@r5 )
				371	extu.b r0,r0
				372	#ifndef __LITTLE_ENDIAN__
				373	shll8 r0
				374	#endif
				375	6: addc r0,r7
				376	mov #0,r0
				377	addc r0,r7
				378	7:
				379	5000:
				380
				381	# Exception handler:
				382	.section .fixup, "ax"
				383
				384	6001:
				385	mov.l @(8,r15),r0 ! src_err_ptr
				386	mov #-EFAULT,r1
				387	mov.l r1,@r0
				388
				389	! zero the complete destination - computing the rest
				390	! is too much work
				391	mov.l @(4,r15),r5 ! dst
				392	mov.l @r15,r6 ! len
				393	mov #0,r7
				394	1: mov.b r7,@r5
				395	dt r6
				396	bf/s 1b
				397	add #1,r5
				398	mov.l 8000f,r0
				399	jmp @r0
				400	nop
				401	.align 2
				402	8000: .long 5000b
				403
				404	6002:
				405	mov.l @(12,r15),r0 ! dst_err_ptr
				406	mov #-EFAULT,r1
				407	mov.l r1,@r0
				408	mov.l 8001f,r0
				409	jmp @r0
				410	nop
				411	.align 2
				412	8001: .long 5000b
				413
				414	.previous
				415	add #8,r15
				416	rts
				417	mov r7,r0