blob: cbdd0d40e545f0fa0d5cea817a472f46175f5a47 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
2 *
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * IP/TCP/UDP checksumming routines
8 *
9 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
10 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11 * Tom May, <ftom@netcom.com>
12 * Pentium Pro/II routines:
13 * Alexander Kjeldaas <astor@guardian.no>
14 * Finn Arne Gangstad <finnag@guardian.no>
15 * Lots of code moved from tcp.c and ip.c; see those files
16 * for more names.
17 *
18 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
19 * handling.
20 * Andi Kleen, add zeroing on error
21 * converted to pure assembler
22 *
23 * SuperH version: Copyright (C) 1999 Niibe Yutaka
24 *
25 * This program is free software; you can redistribute it and/or
26 * modify it under the terms of the GNU General Public License
27 * as published by the Free Software Foundation; either version
28 * 2 of the License, or (at your option) any later version.
29 */
30
31#include <asm/errno.h>
32#include <linux/linkage.h>
33
34/*
35 * computes a partial checksum, e.g. for TCP/UDP fragments
36 */
37
38/*
39 * unsigned int csum_partial(const unsigned char *buf, int len,
40 * unsigned int sum);
41 */
42
43.text
44ENTRY(csum_partial)
45 /*
46 * Experiments with Ethernet and SLIP connections show that buff
47 * is aligned on either a 2-byte or 4-byte boundary. We get at
48 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
49 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
50 * alignment for the unrolled loop.
51 */
52 mov r5, r1
53 mov r4, r0
54 tst #2, r0 ! Check alignment.
55 bt 2f ! Jump if alignment is ok.
56 !
57 add #-2, r5 ! Alignment uses up two bytes.
58 cmp/pz r5 !
59 bt/s 1f ! Jump if we had at least two bytes.
60 clrt
61 bra 6f
62 add #2, r5 ! r5 was < 2. Deal with it.
631:
64 mov r5, r1 ! Save new len for later use.
65 mov.w @r4+, r0
66 extu.w r0, r0
67 addc r0, r6
68 bf 2f
69 add #1, r6
702:
71 mov #-5, r0
72 shld r0, r5
73 tst r5, r5
74 bt/s 4f ! if it's =0, go to 4f
75 clrt
76 .align 2
773:
78 mov.l @r4+, r0
79 mov.l @r4+, r2
80 mov.l @r4+, r3
81 addc r0, r6
82 mov.l @r4+, r0
83 addc r2, r6
84 mov.l @r4+, r2
85 addc r3, r6
86 mov.l @r4+, r3
87 addc r0, r6
88 mov.l @r4+, r0
89 addc r2, r6
90 mov.l @r4+, r2
91 addc r3, r6
92 addc r0, r6
93 addc r2, r6
94 movt r0
95 dt r5
96 bf/s 3b
97 cmp/eq #1, r0
98 ! here, we know r5==0
99 addc r5, r6 ! add carry to r6
1004:
101 mov r1, r0
102 and #0x1c, r0
103 tst r0, r0
104 bt/s 6f
105 mov r0, r5
106 shlr2 r5
107 mov #0, r2
1085:
109 addc r2, r6
110 mov.l @r4+, r2
111 movt r0
112 dt r5
113 bf/s 5b
114 cmp/eq #1, r0
115 addc r2, r6
116 addc r5, r6 ! r5==0 here, so it means add carry-bit
1176:
118 mov r1, r5
119 mov #3, r0
120 and r0, r5
121 tst r5, r5
122 bt 9f ! if it's =0 go to 9f
123 mov #2, r1
124 cmp/hs r1, r5
125 bf 7f
126 mov.w @r4+, r0
127 extu.w r0, r0
128 cmp/eq r1, r5
129 bt/s 8f
130 clrt
131 shll16 r0
132 addc r0, r6
1337:
134 mov.b @r4+, r0
135 extu.b r0, r0
136#ifndef __LITTLE_ENDIAN__
137 shll8 r0
138#endif
1398:
140 addc r0, r6
141 mov #0, r0
142 addc r0, r6
1439:
144 rts
145 mov r6, r0
146
147/*
148unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
149 int sum, int *src_err_ptr, int *dst_err_ptr)
150 */
151
152/*
153 * Copy from ds while checksumming, otherwise like csum_partial
154 *
155 * The macros SRC and DST specify the type of access for the instruction.
156 * thus we can call a custom exception handler for all access types.
157 *
158 * FIXME: could someone double-check whether I haven't mixed up some SRC and
159 * DST definitions? It's damn hard to trigger all cases. I hope I got
160 * them all but there's no guarantee.
161 */
162
163#define SRC(...) \
164 9999: __VA_ARGS__ ; \
165 .section __ex_table, "a"; \
166 .long 9999b, 6001f ; \
167 .previous
168
169#define DST(...) \
170 9999: __VA_ARGS__ ; \
171 .section __ex_table, "a"; \
172 .long 9999b, 6002f ; \
173 .previous
174
175!
176! r4: const char *SRC
177! r5: char *DST
178! r6: int LEN
179! r7: int SUM
180!
181! on stack:
182! int *SRC_ERR_PTR
183! int *DST_ERR_PTR
184!
185ENTRY(csum_partial_copy_generic)
186 mov.l r5,@-r15
187 mov.l r6,@-r15
188
189 mov #3,r0 ! Check src and dest are equally aligned
190 mov r4,r1
191 and r0,r1
192 and r5,r0
193 cmp/eq r1,r0
194 bf 3f ! Different alignments, use slow version
195 tst #1,r0 ! Check dest word aligned
196 bf 3f ! If not, do it the slow way
197
198 mov #2,r0
199 tst r0,r5 ! Check dest alignment.
200 bt 2f ! Jump if alignment is ok.
201 add #-2,r6 ! Alignment uses up two bytes.
202 cmp/pz r6 ! Jump if we had at least two bytes.
203 bt/s 1f
204 clrt
Ollie Wild24ab54c2006-09-27 14:46:24 +0900205 add #2,r6 ! r6 was < 2. Deal with it.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 bra 4f
Ollie Wild24ab54c2006-09-27 14:46:24 +0900207 mov r6,r2
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208
2093: ! Handle different src and dest alignments.
210 ! This is not common, so simple byte by byte copy will do.
211 mov r6,r2
212 shlr r6
213 tst r6,r6
214 bt 4f
215 clrt
216 .align 2
2175:
218SRC( mov.b @r4+,r1 )
219SRC( mov.b @r4+,r0 )
220 extu.b r1,r1
221DST( mov.b r1,@r5 )
222DST( mov.b r0,@(1,r5) )
223 extu.b r0,r0
224 add #2,r5
225
226#ifdef __LITTLE_ENDIAN__
227 shll8 r0
228#else
229 shll8 r1
230#endif
231 or r1,r0
232
233 addc r0,r7
234 movt r0
235 dt r6
236 bf/s 5b
237 cmp/eq #1,r0
238 mov #0,r0
239 addc r0, r7
240
241 mov r2, r0
242 tst #1, r0
243 bt 7f
244 bra 5f
245 clrt
246
247 ! src and dest equally aligned, but to a two byte boundary.
248 ! Handle first two bytes as a special case
249 .align 2
2501:
251SRC( mov.w @r4+,r0 )
252DST( mov.w r0,@r5 )
253 add #2,r5
254 extu.w r0,r0
255 addc r0,r7
256 mov #0,r0
257 addc r0,r7
2582:
259 mov r6,r2
260 mov #-5,r0
261 shld r0,r6
262 tst r6,r6
263 bt/s 2f
264 clrt
265 .align 2
2661:
267SRC( mov.l @r4+,r0 )
268SRC( mov.l @r4+,r1 )
269 addc r0,r7
270DST( mov.l r0,@r5 )
271DST( mov.l r1,@(4,r5) )
272 addc r1,r7
273
274SRC( mov.l @r4+,r0 )
275SRC( mov.l @r4+,r1 )
276 addc r0,r7
277DST( mov.l r0,@(8,r5) )
278DST( mov.l r1,@(12,r5) )
279 addc r1,r7
280
281SRC( mov.l @r4+,r0 )
282SRC( mov.l @r4+,r1 )
283 addc r0,r7
284DST( mov.l r0,@(16,r5) )
285DST( mov.l r1,@(20,r5) )
286 addc r1,r7
287
288SRC( mov.l @r4+,r0 )
289SRC( mov.l @r4+,r1 )
290 addc r0,r7
291DST( mov.l r0,@(24,r5) )
292DST( mov.l r1,@(28,r5) )
293 addc r1,r7
294 add #32,r5
295 movt r0
296 dt r6
297 bf/s 1b
298 cmp/eq #1,r0
299 mov #0,r0
300 addc r0,r7
301
3022: mov r2,r6
303 mov #0x1c,r0
304 and r0,r6
305 cmp/pl r6
306 bf/s 4f
307 clrt
308 shlr2 r6
3093:
310SRC( mov.l @r4+,r0 )
311 addc r0,r7
312DST( mov.l r0,@r5 )
313 add #4,r5
314 movt r0
315 dt r6
316 bf/s 3b
317 cmp/eq #1,r0
318 mov #0,r0
319 addc r0,r7
3204: mov r2,r6
321 mov #3,r0
322 and r0,r6
323 cmp/pl r6
324 bf 7f
325 mov #2,r1
326 cmp/hs r1,r6
327 bf 5f
328SRC( mov.w @r4+,r0 )
329DST( mov.w r0,@r5 )
330 extu.w r0,r0
331 add #2,r5
332 cmp/eq r1,r6
333 bt/s 6f
334 clrt
335 shll16 r0
336 addc r0,r7
3375:
338SRC( mov.b @r4+,r0 )
339DST( mov.b r0,@r5 )
340 extu.b r0,r0
341#ifndef __LITTLE_ENDIAN__
342 shll8 r0
343#endif
3446: addc r0,r7
345 mov #0,r0
346 addc r0,r7
3477:
3485000:
349
350# Exception handler:
351.section .fixup, "ax"
352
3536001:
354 mov.l @(8,r15),r0 ! src_err_ptr
355 mov #-EFAULT,r1
356 mov.l r1,@r0
357
358 ! zero the complete destination - computing the rest
359 ! is too much work
360 mov.l @(4,r15),r5 ! dst
361 mov.l @r15,r6 ! len
362 mov #0,r7
3631: mov.b r7,@r5
364 dt r6
365 bf/s 1b
366 add #1,r5
367 mov.l 8000f,r0
368 jmp @r0
369 nop
370 .align 2
3718000: .long 5000b
372
3736002:
374 mov.l @(12,r15),r0 ! dst_err_ptr
375 mov #-EFAULT,r1
376 mov.l r1,@r0
377 mov.l 8001f,r0
378 jmp @r0
379 nop
380 .align 2
3818001: .long 5000b
382
383.previous
384 add #8,r15
385 rts
386 mov r7,r0