blob: 0af0360c76d9505f2ce1965f3fc9e337830d1f46 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IP/TCP/UDP checksumming routines
7 *
8 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
10 * Tom May, <ftom@netcom.com>
11 * Pentium Pro/II routines:
12 * Alexander Kjeldaas <astor@guardian.no>
13 * Finn Arne Gangstad <finnag@guardian.no>
14 * Lots of code moved from tcp.c and ip.c; see those files
15 * for more names.
16 *
17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18 * handling.
19 * Andi Kleen, add zeroing on error
20 * converted to pure assembler
21 * Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
22 *
23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public License
25 * as published by the Free Software Foundation; either version
26 * 2 of the License, or (at your option) any later version.
27 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <linux/linkage.h>
30#include <asm/assembler.h>
31#include <asm/errno.h>
32
33/*
34 * computes a partial checksum, e.g. for TCP/UDP fragments
35 */
36
37/*
38unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
39 */
40
41
42#ifdef CONFIG_ISA_DUAL_ISSUE
43
44 /*
45 * Experiments with Ethernet and SLIP connections show that buff
46 * is aligned on either a 2-byte or 4-byte boundary. We get at
47 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
48 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
49 * alignment for the unrolled loop.
50 */
51
52 .text
53ENTRY(csum_partial)
54 ; Function args
55 ; r0: unsigned char *buff
56 ; r1: int len
57 ; r2: unsigned int sum
58
59 push r2 || ldi r2, #0
60 and3 r7, r0, #1 ; Check alignment.
61 beqz r7, 1f ; Jump if alignment is ok.
62 ; 1-byte mis aligned
63 ldub r4, @r0 || addi r0, #1
64 ; clear c-bit || Alignment uses up bytes.
65 cmp r0, r0 || addi r1, #-1
66 ldi r3, #0 || addx r2, r4
67 addx r2, r3
68 .fillinsn
691:
70 and3 r4, r0, #2 ; Check alignment.
71 beqz r4, 2f ; Jump if alignment is ok.
72 ; clear c-bit || Alignment uses up two bytes.
73 cmp r0, r0 || addi r1, #-2
74 bgtz r1, 1f ; Jump if we had at least two bytes.
75 bra 4f || addi r1, #2
76 .fillinsn ; len(r1) was < 2. Deal with it.
771:
78 ; 2-byte aligned
79 lduh r4, @r0 || ldi r3, #0
80 addx r2, r4 || addi r0, #2
81 addx r2, r3
82 .fillinsn
832:
84 ; 4-byte aligned
85 cmp r0, r0 ; clear c-bit
86 srl3 r6, r1, #5
87 beqz r6, 2f
88 .fillinsn
89
901: ld r3, @r0+
91 ld r4, @r0+ ; +4
92 ld r5, @r0+ ; +8
93 ld r3, @r0+ || addx r2, r3 ; +12
94 ld r4, @r0+ || addx r2, r4 ; +16
95 ld r5, @r0+ || addx r2, r5 ; +20
96 ld r3, @r0+ || addx r2, r3 ; +24
97 ld r4, @r0+ || addx r2, r4 ; +28
98 addx r2, r5 || addi r6, #-1
99 addx r2, r3
100 addx r2, r4
101 bnez r6, 1b
102
103 addx r2, r6 ; r6=0
104 cmp r0, r0 ; This clears c-bit
105 .fillinsn
1062: and3 r6, r1, #0x1c ; withdraw len
107 beqz r6, 4f
108 srli r6, #2
109 .fillinsn
110
1113: ld r4, @r0+ || addi r6, #-1
112 addx r2, r4
113 bnez r6, 3b
114
115 addx r2, r6 ; r6=0
116 cmp r0, r0 ; This clears c-bit
117 .fillinsn
1184: and3 r1, r1, #3
119 beqz r1, 7f ; if len == 0 goto end
120 and3 r6, r1, #2
121 beqz r6, 5f ; if len < 2 goto 5f(1byte)
122 lduh r4, @r0 || addi r0, #2
123 addi r1, #-2 || slli r4, #16
124 addx r2, r4
125 beqz r1, 6f
126 .fillinsn
1275: ldub r4, @r0 || ldi r1, #0
128#ifndef __LITTLE_ENDIAN__
129 slli r4, #8
130#endif
131 addx r2, r4
132 .fillinsn
1336: addx r2, r1
134 .fillinsn
1357:
136 and3 r0, r2, #0xffff
137 srli r2, #16
138 add r0, r2
139 srl3 r2, r0, #16
140 beqz r2, 1f
141 addi r0, #1
142 and3 r0, r0, #0xffff
143 .fillinsn
1441:
145 beqz r7, 1f ; swap the upper byte for the lower
146 and3 r2, r0, #0xff
147 srl3 r0, r0, #8
148 slli r2, #8
149 or r0, r2
150 .fillinsn
1511:
152 pop r2 || cmp r0, r0
153 addx r0, r2 || ldi r2, #0
154 addx r0, r2
155 jmp r14
156
157#else /* not CONFIG_ISA_DUAL_ISSUE */
158
159 /*
160 * Experiments with Ethernet and SLIP connections show that buff
161 * is aligned on either a 2-byte or 4-byte boundary. We get at
162 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
163 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
164 * alignment for the unrolled loop.
165 */
166
167 .text
168ENTRY(csum_partial)
169 ; Function args
170 ; r0: unsigned char *buff
171 ; r1: int len
172 ; r2: unsigned int sum
173
174 push r2
175 ldi r2, #0
176 and3 r7, r0, #1 ; Check alignment.
177 beqz r7, 1f ; Jump if alignment is ok.
178 ; 1-byte mis aligned
179 ldub r4, @r0
180 addi r0, #1
181 addi r1, #-1 ; Alignment uses up bytes.
182 cmp r0, r0 ; clear c-bit
183 ldi r3, #0
184 addx r2, r4
185 addx r2, r3
186 .fillinsn
1871:
188 and3 r4, r0, #2 ; Check alignment.
189 beqz r4, 2f ; Jump if alignment is ok.
190 addi r1, #-2 ; Alignment uses up two bytes.
191 cmp r0, r0 ; clear c-bit
192 bgtz r1, 1f ; Jump if we had at least two bytes.
193 addi r1, #2 ; len(r1) was < 2. Deal with it.
194 bra 4f
195 .fillinsn
1961:
197 ; 2-byte aligned
198 lduh r4, @r0
199 addi r0, #2
200 ldi r3, #0
201 addx r2, r4
202 addx r2, r3
203 .fillinsn
2042:
205 ; 4-byte aligned
206 cmp r0, r0 ; clear c-bit
207 srl3 r6, r1, #5
208 beqz r6, 2f
209 .fillinsn
210
2111: ld r3, @r0+
212 ld r4, @r0+ ; +4
213 ld r5, @r0+ ; +8
214 addx r2, r3
215 addx r2, r4
216 addx r2, r5
217 ld r3, @r0+ ; +12
218 ld r4, @r0+ ; +16
219 ld r5, @r0+ ; +20
220 addx r2, r3
221 addx r2, r4
222 addx r2, r5
223 ld r3, @r0+ ; +24
224 ld r4, @r0+ ; +28
225 addi r6, #-1
226 addx r2, r3
227 addx r2, r4
228 bnez r6, 1b
229 addx r2, r6 ; r6=0
230 cmp r0, r0 ; This clears c-bit
231 .fillinsn
232
2332: and3 r6, r1, #0x1c ; withdraw len
234 beqz r6, 4f
235 srli r6, #2
236 .fillinsn
237
2383: ld r4, @r0+
239 addi r6, #-1
240 addx r2, r4
241 bnez r6, 3b
242 addx r2, r6 ; r6=0
243 cmp r0, r0 ; This clears c-bit
244 .fillinsn
245
2464: and3 r1, r1, #3
247 beqz r1, 7f ; if len == 0 goto end
248 and3 r6, r1, #2
249 beqz r6, 5f ; if len < 2 goto 5f(1byte)
250
251 lduh r4, @r0
252 addi r0, #2
253 addi r1, #-2
254 slli r4, #16
255 addx r2, r4
256 beqz r1, 6f
257 .fillinsn
2585: ldub r4, @r0
259#ifndef __LITTLE_ENDIAN__
260 slli r4, #8
261#endif
262 addx r2, r4
263 .fillinsn
2646: ldi r5, #0
265 addx r2, r5
266 .fillinsn
2677:
268 and3 r0, r2, #0xffff
269 srli r2, #16
270 add r0, r2
271 srl3 r2, r0, #16
272 beqz r2, 1f
273 addi r0, #1
274 and3 r0, r0, #0xffff
275 .fillinsn
2761:
277 beqz r7, 1f
278 mv r2, r0
279 srl3 r0, r2, #8
280 and3 r2, r2, #0xff
281 slli r2, #8
282 or r0, r2
283 .fillinsn
2841:
285 pop r2
286 cmp r0, r0
287 addx r0, r2
288 ldi r2, #0
289 addx r0, r2
290 jmp r14
291
292#endif /* not CONFIG_ISA_DUAL_ISSUE */
293
294/*
295unsigned int csum_partial_copy_generic (const char *src, char *dst,
296 int len, int sum, int *src_err_ptr, int *dst_err_ptr)
297 */
298
299/*
300 * Copy from ds while checksumming, otherwise like csum_partial
301 *
302 * The macros SRC and DST specify the type of access for the instruction.
303 * thus we can call a custom exception handler for all access types.
304 *
305 * FIXME: could someone double-check whether I haven't mixed up some SRC and
306 * DST definitions? It's damn hard to trigger all cases. I hope I got
307 * them all but there's no guarantee.
308 */
309
310ENTRY(csum_partial_copy_generic)
311 nop
312 nop
313 nop
314 nop
315 jmp r14
316 nop
317 nop
318 nop
319
Hirokazu Takatafabb6262007-02-10 01:43:40 -0800320 .end