blob: cfbe00e5184a7dcbfb189cfde256942060e9f95f [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IP/TCP/UDP checksumming routines
7 *
8 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
10 * Tom May, <ftom@netcom.com>
11 * Pentium Pro/II routines:
12 * Alexander Kjeldaas <astor@guardian.no>
13 * Finn Arne Gangstad <finnag@guardian.no>
14 * Lots of code moved from tcp.c and ip.c; see those files
15 * for more names.
16 *
17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18 * handling.
19 * Andi Kleen, add zeroing on error
20 * converted to pure assembler
21 * Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
22 *
23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public License
25 * as published by the Free Software Foundation; either version
26 * 2 of the License, or (at your option) any later version.
27 */
28/* $Id$ */
29
30
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include <linux/linkage.h>
32#include <asm/assembler.h>
33#include <asm/errno.h>
34
35/*
36 * computes a partial checksum, e.g. for TCP/UDP fragments
37 */
38
39/*
40unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
41 */
42
43
44#ifdef CONFIG_ISA_DUAL_ISSUE
45
46 /*
47 * Experiments with Ethernet and SLIP connections show that buff
48 * is aligned on either a 2-byte or 4-byte boundary. We get at
49 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
50 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
51 * alignment for the unrolled loop.
52 */
53
54 .text
55ENTRY(csum_partial)
56 ; Function args
57 ; r0: unsigned char *buff
58 ; r1: int len
59 ; r2: unsigned int sum
60
61 push r2 || ldi r2, #0
62 and3 r7, r0, #1 ; Check alignment.
63 beqz r7, 1f ; Jump if alignment is ok.
64 ; 1-byte mis aligned
65 ldub r4, @r0 || addi r0, #1
66 ; clear c-bit || Alignment uses up bytes.
67 cmp r0, r0 || addi r1, #-1
68 ldi r3, #0 || addx r2, r4
69 addx r2, r3
70 .fillinsn
711:
72 and3 r4, r0, #2 ; Check alignment.
73 beqz r4, 2f ; Jump if alignment is ok.
74 ; clear c-bit || Alignment uses up two bytes.
75 cmp r0, r0 || addi r1, #-2
76 bgtz r1, 1f ; Jump if we had at least two bytes.
77 bra 4f || addi r1, #2
78 .fillinsn ; len(r1) was < 2. Deal with it.
791:
80 ; 2-byte aligned
81 lduh r4, @r0 || ldi r3, #0
82 addx r2, r4 || addi r0, #2
83 addx r2, r3
84 .fillinsn
852:
86 ; 4-byte aligned
87 cmp r0, r0 ; clear c-bit
88 srl3 r6, r1, #5
89 beqz r6, 2f
90 .fillinsn
91
921: ld r3, @r0+
93 ld r4, @r0+ ; +4
94 ld r5, @r0+ ; +8
95 ld r3, @r0+ || addx r2, r3 ; +12
96 ld r4, @r0+ || addx r2, r4 ; +16
97 ld r5, @r0+ || addx r2, r5 ; +20
98 ld r3, @r0+ || addx r2, r3 ; +24
99 ld r4, @r0+ || addx r2, r4 ; +28
100 addx r2, r5 || addi r6, #-1
101 addx r2, r3
102 addx r2, r4
103 bnez r6, 1b
104
105 addx r2, r6 ; r6=0
106 cmp r0, r0 ; This clears c-bit
107 .fillinsn
1082: and3 r6, r1, #0x1c ; withdraw len
109 beqz r6, 4f
110 srli r6, #2
111 .fillinsn
112
1133: ld r4, @r0+ || addi r6, #-1
114 addx r2, r4
115 bnez r6, 3b
116
117 addx r2, r6 ; r6=0
118 cmp r0, r0 ; This clears c-bit
119 .fillinsn
1204: and3 r1, r1, #3
121 beqz r1, 7f ; if len == 0 goto end
122 and3 r6, r1, #2
123 beqz r6, 5f ; if len < 2 goto 5f(1byte)
124 lduh r4, @r0 || addi r0, #2
125 addi r1, #-2 || slli r4, #16
126 addx r2, r4
127 beqz r1, 6f
128 .fillinsn
1295: ldub r4, @r0 || ldi r1, #0
130#ifndef __LITTLE_ENDIAN__
131 slli r4, #8
132#endif
133 addx r2, r4
134 .fillinsn
1356: addx r2, r1
136 .fillinsn
1377:
138 and3 r0, r2, #0xffff
139 srli r2, #16
140 add r0, r2
141 srl3 r2, r0, #16
142 beqz r2, 1f
143 addi r0, #1
144 and3 r0, r0, #0xffff
145 .fillinsn
1461:
147 beqz r7, 1f ; swap the upper byte for the lower
148 and3 r2, r0, #0xff
149 srl3 r0, r0, #8
150 slli r2, #8
151 or r0, r2
152 .fillinsn
1531:
154 pop r2 || cmp r0, r0
155 addx r0, r2 || ldi r2, #0
156 addx r0, r2
157 jmp r14
158
159#else /* not CONFIG_ISA_DUAL_ISSUE */
160
161 /*
162 * Experiments with Ethernet and SLIP connections show that buff
163 * is aligned on either a 2-byte or 4-byte boundary. We get at
164 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
165 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
166 * alignment for the unrolled loop.
167 */
168
169 .text
170ENTRY(csum_partial)
171 ; Function args
172 ; r0: unsigned char *buff
173 ; r1: int len
174 ; r2: unsigned int sum
175
176 push r2
177 ldi r2, #0
178 and3 r7, r0, #1 ; Check alignment.
179 beqz r7, 1f ; Jump if alignment is ok.
180 ; 1-byte mis aligned
181 ldub r4, @r0
182 addi r0, #1
183 addi r1, #-1 ; Alignment uses up bytes.
184 cmp r0, r0 ; clear c-bit
185 ldi r3, #0
186 addx r2, r4
187 addx r2, r3
188 .fillinsn
1891:
190 and3 r4, r0, #2 ; Check alignment.
191 beqz r4, 2f ; Jump if alignment is ok.
192 addi r1, #-2 ; Alignment uses up two bytes.
193 cmp r0, r0 ; clear c-bit
194 bgtz r1, 1f ; Jump if we had at least two bytes.
195 addi r1, #2 ; len(r1) was < 2. Deal with it.
196 bra 4f
197 .fillinsn
1981:
199 ; 2-byte aligned
200 lduh r4, @r0
201 addi r0, #2
202 ldi r3, #0
203 addx r2, r4
204 addx r2, r3
205 .fillinsn
2062:
207 ; 4-byte aligned
208 cmp r0, r0 ; clear c-bit
209 srl3 r6, r1, #5
210 beqz r6, 2f
211 .fillinsn
212
2131: ld r3, @r0+
214 ld r4, @r0+ ; +4
215 ld r5, @r0+ ; +8
216 addx r2, r3
217 addx r2, r4
218 addx r2, r5
219 ld r3, @r0+ ; +12
220 ld r4, @r0+ ; +16
221 ld r5, @r0+ ; +20
222 addx r2, r3
223 addx r2, r4
224 addx r2, r5
225 ld r3, @r0+ ; +24
226 ld r4, @r0+ ; +28
227 addi r6, #-1
228 addx r2, r3
229 addx r2, r4
230 bnez r6, 1b
231 addx r2, r6 ; r6=0
232 cmp r0, r0 ; This clears c-bit
233 .fillinsn
234
2352: and3 r6, r1, #0x1c ; withdraw len
236 beqz r6, 4f
237 srli r6, #2
238 .fillinsn
239
2403: ld r4, @r0+
241 addi r6, #-1
242 addx r2, r4
243 bnez r6, 3b
244 addx r2, r6 ; r6=0
245 cmp r0, r0 ; This clears c-bit
246 .fillinsn
247
2484: and3 r1, r1, #3
249 beqz r1, 7f ; if len == 0 goto end
250 and3 r6, r1, #2
251 beqz r6, 5f ; if len < 2 goto 5f(1byte)
252
253 lduh r4, @r0
254 addi r0, #2
255 addi r1, #-2
256 slli r4, #16
257 addx r2, r4
258 beqz r1, 6f
259 .fillinsn
2605: ldub r4, @r0
261#ifndef __LITTLE_ENDIAN__
262 slli r4, #8
263#endif
264 addx r2, r4
265 .fillinsn
2666: ldi r5, #0
267 addx r2, r5
268 .fillinsn
2697:
270 and3 r0, r2, #0xffff
271 srli r2, #16
272 add r0, r2
273 srl3 r2, r0, #16
274 beqz r2, 1f
275 addi r0, #1
276 and3 r0, r0, #0xffff
277 .fillinsn
2781:
279 beqz r7, 1f
280 mv r2, r0
281 srl3 r0, r2, #8
282 and3 r2, r2, #0xff
283 slli r2, #8
284 or r0, r2
285 .fillinsn
2861:
287 pop r2
288 cmp r0, r0
289 addx r0, r2
290 ldi r2, #0
291 addx r0, r2
292 jmp r14
293
294#endif /* not CONFIG_ISA_DUAL_ISSUE */
295
296/*
297unsigned int csum_partial_copy_generic (const char *src, char *dst,
298 int len, int sum, int *src_err_ptr, int *dst_err_ptr)
299 */
300
301/*
302 * Copy from ds while checksumming, otherwise like csum_partial
303 *
304 * The macros SRC and DST specify the type of access for the instruction.
305 * thus we can call a custom exception handler for all access types.
306 *
307 * FIXME: could someone double-check whether I haven't mixed up some SRC and
308 * DST definitions? It's damn hard to trigger all cases. I hope I got
309 * them all but there's no guarantee.
310 */
311
312ENTRY(csum_partial_copy_generic)
313 nop
314 nop
315 nop
316 nop
317 jmp r14
318 nop
319 nop
320 nop
321