Blame - arch/um/sys-i386/checksum.S - kernel/msm-4.9

blob: 62c7e564f22eb4719b8b10c54c290849be55f5f9 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* INET An implementation of the TCP/IP protocol suite for the LINUX
				3	* operating system. INET is implemented using the BSD Socket
				4	* interface as the means of communication with the user level.
				5	*
				6	* IP/TCP/UDP checksumming routines
				7	*
				8	* Authors: Jorge Cwik, <jorge@laser.satlink.net>
				9	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
				10	* Tom May, <ftom@netcom.com>
				11	* Pentium Pro/II routines:
				12	* Alexander Kjeldaas <astor@guardian.no>
				13	* Finn Arne Gangstad <finnag@guardian.no>
				14	* Lots of code moved from tcp.c and ip.c; see those files
				15	* for more names.
				16	*
				17	* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
				18	* handling.
				19	* Andi Kleen, add zeroing on error
				20	* converted to pure assembler
				21	*
				22	* This program is free software; you can redistribute it and/or
				23	* modify it under the terms of the GNU General Public License
				24	* as published by the Free Software Foundation; either version
				25	* 2 of the License, or (at your option) any later version.
				26	*/
				27
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28	#include <asm/errno.h>
				29
				30	/*
				31	* computes a partial checksum, e.g. for TCP/UDP fragments
				32	*/
				33
				34	/*
				35	unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
				36	*/
				37
				38	.text
				39	.align 4
Bodo Stroesser	7d37c6d	2005-05-05 16:15:36 -0700	[diff] [blame]	40	.globl csum_partial
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41
				42	#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
				43
				44	/*
				45	* Experiments with Ethernet and SLIP connections show that buff
				46	* is aligned on either a 2-byte or 4-byte boundary. We get at
				47	* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
				48	* Fortunately, it is easy to convert 2-byte alignment to 4-byte
				49	* alignment for the unrolled loop.
				50	*/
Bodo Stroesser	7d37c6d	2005-05-05 16:15:36 -0700	[diff] [blame]	51	csum_partial:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	52	pushl %esi
				53	pushl %ebx
				54	movl 20(%esp),%eax # Function arg: unsigned int sum
				55	movl 16(%esp),%ecx # Function arg: int len
				56	movl 12(%esp),%esi # Function arg: unsigned char *buff
				57	testl $2, %esi # Check alignment.
				58	jz 2f # Jump if alignment is ok.
				59	subl $2, %ecx # Alignment uses up two bytes.
				60	jae 1f # Jump if we had at least two bytes.
				61	addl $2, %ecx # ecx was < 2. Deal with it.
				62	jmp 4f
				63	1: movw (%esi), %bx
				64	addl $2, %esi
				65	addw %bx, %ax
				66	adcl $0, %eax
				67	2:
				68	movl %ecx, %edx
				69	shrl $5, %ecx
				70	jz 2f
				71	testl %esi, %esi
				72	1: movl (%esi), %ebx
				73	adcl %ebx, %eax
				74	movl 4(%esi), %ebx
				75	adcl %ebx, %eax
				76	movl 8(%esi), %ebx
				77	adcl %ebx, %eax
				78	movl 12(%esi), %ebx
				79	adcl %ebx, %eax
				80	movl 16(%esi), %ebx
				81	adcl %ebx, %eax
				82	movl 20(%esi), %ebx
				83	adcl %ebx, %eax
				84	movl 24(%esi), %ebx
				85	adcl %ebx, %eax
				86	movl 28(%esi), %ebx
				87	adcl %ebx, %eax
				88	lea 32(%esi), %esi
				89	dec %ecx
				90	jne 1b
				91	adcl $0, %eax
				92	2: movl %edx, %ecx
				93	andl $0x1c, %edx
				94	je 4f
				95	shrl $2, %edx # This clears CF
				96	3: adcl (%esi), %eax
				97	lea 4(%esi), %esi
				98	dec %edx
				99	jne 3b
				100	adcl $0, %eax
				101	4: andl $3, %ecx
				102	jz 7f
				103	cmpl $2, %ecx
				104	jb 5f
				105	movw (%esi),%cx
				106	leal 2(%esi),%esi
				107	je 6f
				108	shll $16,%ecx
				109	5: movb (%esi),%cl
				110	6: addl %ecx,%eax
				111	adcl $0, %eax
				112	7:
				113	popl %ebx
				114	popl %esi
				115	ret
				116
				117	#else
				118
				119	/* Version for PentiumII/PPro */
				120
Bodo Stroesser	7d37c6d	2005-05-05 16:15:36 -0700	[diff] [blame]	121	csum_partial:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	122	pushl %esi
				123	pushl %ebx
				124	movl 20(%esp),%eax # Function arg: unsigned int sum
				125	movl 16(%esp),%ecx # Function arg: int len
				126	movl 12(%esp),%esi # Function arg: const unsigned char *buf
				127
				128	testl $2, %esi
				129	jnz 30f
				130	10:
				131	movl %ecx, %edx
				132	movl %ecx, %ebx
				133	andl $0x7c, %ebx
				134	shrl $7, %ecx
				135	addl %ebx,%esi
				136	shrl $2, %ebx
				137	negl %ebx
				138	lea 45f(%ebx,%ebx,2), %ebx
				139	testl %esi, %esi
				140	jmp *%ebx
				141
				142	# Handle 2-byte-aligned regions
				143	20: addw (%esi), %ax
				144	lea 2(%esi), %esi
				145	adcl $0, %eax
				146	jmp 10b
				147
				148	30: subl $2, %ecx
				149	ja 20b
				150	je 32f
				151	movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
				152	addl %ebx, %eax
				153	adcl $0, %eax
				154	jmp 80f
				155	32:
				156	addw (%esi), %ax # csumming 2 bytes, 2-aligned
				157	adcl $0, %eax
				158	jmp 80f
				159
				160	40:
				161	addl -128(%esi), %eax
				162	adcl -124(%esi), %eax
				163	adcl -120(%esi), %eax
				164	adcl -116(%esi), %eax
				165	adcl -112(%esi), %eax
				166	adcl -108(%esi), %eax
				167	adcl -104(%esi), %eax
				168	adcl -100(%esi), %eax
				169	adcl -96(%esi), %eax
				170	adcl -92(%esi), %eax
				171	adcl -88(%esi), %eax
				172	adcl -84(%esi), %eax
				173	adcl -80(%esi), %eax
				174	adcl -76(%esi), %eax
				175	adcl -72(%esi), %eax
				176	adcl -68(%esi), %eax
				177	adcl -64(%esi), %eax
				178	adcl -60(%esi), %eax
				179	adcl -56(%esi), %eax
				180	adcl -52(%esi), %eax
				181	adcl -48(%esi), %eax
				182	adcl -44(%esi), %eax
				183	adcl -40(%esi), %eax
				184	adcl -36(%esi), %eax
				185	adcl -32(%esi), %eax
				186	adcl -28(%esi), %eax
				187	adcl -24(%esi), %eax
				188	adcl -20(%esi), %eax
				189	adcl -16(%esi), %eax
				190	adcl -12(%esi), %eax
				191	adcl -8(%esi), %eax
				192	adcl -4(%esi), %eax
				193	45:
				194	lea 128(%esi), %esi
				195	adcl $0, %eax
				196	dec %ecx
				197	jge 40b
				198	movl %edx, %ecx
				199	50: andl $3, %ecx
				200	jz 80f
				201
				202	# Handle the last 1-3 bytes without jumping
				203	notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
				204	movl $0xffffff,%ebx # by the shll and shrl instructions
				205	shll $3,%ecx
				206	shrl %cl,%ebx
				207	andl -128(%esi),%ebx # esi is 4-aligned so should be ok
				208	addl %ebx,%eax
				209	adcl $0,%eax
				210	80:
				211	popl %ebx
				212	popl %esi
				213	ret
				214
				215	#endif
				216
				217	/*
				218	unsigned int csum_partial_copy_generic (const char src, char dst,
				219	int len, int sum, int src_err_ptr, int dst_err_ptr)
				220	*/
				221
				222	/*
				223	* Copy from ds while checksumming, otherwise like csum_partial
				224	*
				225	* The macros SRC and DST specify the type of access for the instruction.
				226	* thus we can call a custom exception handler for all access types.
				227	*
				228	* FIXME: could someone double-check whether I haven't mixed up some SRC and
				229	* DST definitions? It's damn hard to trigger all cases. I hope I got
				230	* them all but there's no guarantee.
				231	*/
				232
				233	#define SRC(y...) \
				234	9999: y; \
				235	.section __ex_table, "a"; \
				236	.long 9999b, 6001f ; \
				237	.previous
				238
				239	#define DST(y...) \
				240	9999: y; \
				241	.section __ex_table, "a"; \
				242	.long 9999b, 6002f ; \
				243	.previous
				244
				245	.align 4
				246	.globl csum_partial_copy_generic_i386
				247
				248	#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
				249
				250	#define ARGBASE 16
				251	#define FP 12
				252
				253	csum_partial_copy_generic_i386:
				254	subl $4,%esp
				255	pushl %edi
				256	pushl %esi
				257	pushl %ebx
				258	movl ARGBASE+16(%esp),%eax # sum
				259	movl ARGBASE+12(%esp),%ecx # len
				260	movl ARGBASE+4(%esp),%esi # src
				261	movl ARGBASE+8(%esp),%edi # dst
				262
				263	testl $2, %edi # Check alignment.
				264	jz 2f # Jump if alignment is ok.
				265	subl $2, %ecx # Alignment uses up two bytes.
				266	jae 1f # Jump if we had at least two bytes.
				267	addl $2, %ecx # ecx was < 2. Deal with it.
				268	jmp 4f
				269	SRC(1: movw (%esi), %bx )
				270	addl $2, %esi
				271	DST( movw %bx, (%edi) )
				272	addl $2, %edi
				273	addw %bx, %ax
				274	adcl $0, %eax
				275	2:
				276	movl %ecx, FP(%esp)
				277	shrl $5, %ecx
				278	jz 2f
				279	testl %esi, %esi
				280	SRC(1: movl (%esi), %ebx )
				281	SRC( movl 4(%esi), %edx )
				282	adcl %ebx, %eax
				283	DST( movl %ebx, (%edi) )
				284	adcl %edx, %eax
				285	DST( movl %edx, 4(%edi) )
				286
				287	SRC( movl 8(%esi), %ebx )
				288	SRC( movl 12(%esi), %edx )
				289	adcl %ebx, %eax
				290	DST( movl %ebx, 8(%edi) )
				291	adcl %edx, %eax
				292	DST( movl %edx, 12(%edi) )
				293
				294	SRC( movl 16(%esi), %ebx )
				295	SRC( movl 20(%esi), %edx )
				296	adcl %ebx, %eax
				297	DST( movl %ebx, 16(%edi) )
				298	adcl %edx, %eax
				299	DST( movl %edx, 20(%edi) )
				300
				301	SRC( movl 24(%esi), %ebx )
				302	SRC( movl 28(%esi), %edx )
				303	adcl %ebx, %eax
				304	DST( movl %ebx, 24(%edi) )
				305	adcl %edx, %eax
				306	DST( movl %edx, 28(%edi) )
				307
				308	lea 32(%esi), %esi
				309	lea 32(%edi), %edi
				310	dec %ecx
				311	jne 1b
				312	adcl $0, %eax
				313	2: movl FP(%esp), %edx
				314	movl %edx, %ecx
				315	andl $0x1c, %edx
				316	je 4f
				317	shrl $2, %edx # This clears CF
				318	SRC(3: movl (%esi), %ebx )
				319	adcl %ebx, %eax
				320	DST( movl %ebx, (%edi) )
				321	lea 4(%esi), %esi
				322	lea 4(%edi), %edi
				323	dec %edx
				324	jne 3b
				325	adcl $0, %eax
				326	4: andl $3, %ecx
				327	jz 7f
				328	cmpl $2, %ecx
				329	jb 5f
				330	SRC( movw (%esi), %cx )
				331	leal 2(%esi), %esi
				332	DST( movw %cx, (%edi) )
				333	leal 2(%edi), %edi
				334	je 6f
				335	shll $16,%ecx
				336	SRC(5: movb (%esi), %cl )
				337	DST( movb %cl, (%edi) )
				338	6: addl %ecx, %eax
				339	adcl $0, %eax
				340	7:
				341	5000:
				342
				343	# Exception handler:
				344	.section .fixup, "ax"
				345
				346	6001:
				347	movl ARGBASE+20(%esp), %ebx # src_err_ptr
				348	movl $-EFAULT, (%ebx)
				349
				350	# zero the complete destination - computing the rest
				351	# is too much work
				352	movl ARGBASE+8(%esp), %edi # dst
				353	movl ARGBASE+12(%esp), %ecx # len
				354	xorl %eax,%eax
				355	rep ; stosb
				356
				357	jmp 5000b
				358
				359	6002:
				360	movl ARGBASE+24(%esp), %ebx # dst_err_ptr
				361	movl $-EFAULT,(%ebx)
				362	jmp 5000b
				363
				364	.previous
				365
				366	popl %ebx
				367	popl %esi
				368	popl %edi
				369	popl %ecx # equivalent to addl $4,%esp
				370	ret
				371
				372	#else
				373
				374	/* Version for PentiumII/PPro */
				375
				376	#define ROUND1(x) \
				377	SRC(movl x(%esi), %ebx ) ; \
				378	addl %ebx, %eax ; \
				379	DST(movl %ebx, x(%edi) ) ;
				380
				381	#define ROUND(x) \
				382	SRC(movl x(%esi), %ebx ) ; \
				383	adcl %ebx, %eax ; \
				384	DST(movl %ebx, x(%edi) ) ;
				385
				386	#define ARGBASE 12
				387
				388	csum_partial_copy_generic_i386:
				389	pushl %ebx
				390	pushl %edi
				391	pushl %esi
				392	movl ARGBASE+4(%esp),%esi #src
				393	movl ARGBASE+8(%esp),%edi #dst
				394	movl ARGBASE+12(%esp),%ecx #len
				395	movl ARGBASE+16(%esp),%eax #sum
				396	# movl %ecx, %edx
				397	movl %ecx, %ebx
				398	movl %esi, %edx
				399	shrl $6, %ecx
				400	andl $0x3c, %ebx
				401	negl %ebx
				402	subl %ebx, %esi
				403	subl %ebx, %edi
				404	lea -1(%esi),%edx
				405	andl $-32,%edx
				406	lea 3f(%ebx,%ebx), %ebx
				407	testl %esi, %esi
				408	jmp *%ebx
				409	1: addl $64,%esi
				410	addl $64,%edi
				411	SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
				412	ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
				413	ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
				414	ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
				415	ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
				416	3: adcl $0,%eax
				417	addl $64, %edx
				418	dec %ecx
				419	jge 1b
				420	4: movl ARGBASE+12(%esp),%edx #len
				421	andl $3, %edx
				422	jz 7f
				423	cmpl $2, %edx
				424	jb 5f
				425	SRC( movw (%esi), %dx )
				426	leal 2(%esi), %esi
				427	DST( movw %dx, (%edi) )
				428	leal 2(%edi), %edi
				429	je 6f
				430	shll $16,%edx
				431	5:
				432	SRC( movb (%esi), %dl )
				433	DST( movb %dl, (%edi) )
				434	6: addl %edx, %eax
				435	adcl $0, %eax
				436	7:
				437	.section .fixup, "ax"
				438	6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr
				439	movl $-EFAULT, (%ebx)
				440	# zero the complete destination (computing the rest is too much work)
				441	movl ARGBASE+8(%esp),%edi # dst
				442	movl ARGBASE+12(%esp),%ecx # len
				443	xorl %eax,%eax
				444	rep; stosb
				445	jmp 7b
				446	6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr
				447	movl $-EFAULT, (%ebx)
				448	jmp 7b
				449	.previous
				450
				451	popl %esi
				452	popl %edi
				453	popl %ebx
				454	ret
				455
				456	#undef ROUND
				457	#undef ROUND1
				458
				459	#endif