Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ |
| 2 | * |
| 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
| 4 | * operating system. INET is implemented using the BSD Socket |
| 5 | * interface as the means of communication with the user level. |
| 6 | * |
| 7 | * IP/TCP/UDP checksumming routines |
| 8 | * |
| 9 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> |
| 10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> |
| 11 | * Tom May, <ftom@netcom.com> |
| 12 | * Pentium Pro/II routines: |
| 13 | * Alexander Kjeldaas <astor@guardian.no> |
| 14 | * Finn Arne Gangstad <finnag@guardian.no> |
| 15 | * Lots of code moved from tcp.c and ip.c; see those files |
| 16 | * for more names. |
| 17 | * |
| 18 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception |
| 19 | * handling. |
| 20 | * Andi Kleen, add zeroing on error |
| 21 | * converted to pure assembler |
| 22 | * |
| 23 | * SuperH version: Copyright (C) 1999 Niibe Yutaka |
| 24 | * |
| 25 | * This program is free software; you can redistribute it and/or |
| 26 | * modify it under the terms of the GNU General Public License |
| 27 | * as published by the Free Software Foundation; either version |
| 28 | * 2 of the License, or (at your option) any later version. |
| 29 | */ |
| 30 | |
| 31 | #include <asm/errno.h> |
| 32 | #include <linux/linkage.h> |
| 33 | |
| 34 | /* |
| 35 | * computes a partial checksum, e.g. for TCP/UDP fragments |
| 36 | */ |
| 37 | |
| 38 | /* |
| 39 | * unsigned int csum_partial(const unsigned char *buf, int len, |
| 40 | * unsigned int sum); |
| 41 | */ |
| 42 | |
| 43 | .text |
| 44 | ENTRY(csum_partial) |
| 45 | /* |
| 46 | * Experiments with Ethernet and SLIP connections show that buff |
| 47 | * is aligned on either a 2-byte or 4-byte boundary. We get at |
| 48 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. |
| 49 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte |
| 50 | * alignment for the unrolled loop. |
| 51 | */ |
| 52 | mov r5, r1 |
| 53 | mov r4, r0 |
| 54 | tst #2, r0 ! Check alignment. |
| 55 | bt 2f ! Jump if alignment is ok. |
| 56 | ! |
| 57 | add #-2, r5 ! Alignment uses up two bytes. |
| 58 | cmp/pz r5 ! |
| 59 | bt/s 1f ! Jump if we had at least two bytes. |
| 60 | clrt |
| 61 | bra 6f |
| 62 | add #2, r5 ! r5 was < 2. Deal with it. |
| 63 | 1: |
| 64 | mov r5, r1 ! Save new len for later use. |
| 65 | mov.w @r4+, r0 |
| 66 | extu.w r0, r0 |
| 67 | addc r0, r6 |
| 68 | bf 2f |
| 69 | add #1, r6 |
| 70 | 2: |
| 71 | mov #-5, r0 |
| 72 | shld r0, r5 |
| 73 | tst r5, r5 |
| 74 | bt/s 4f ! if it's =0, go to 4f |
| 75 | clrt |
| 76 | .align 2 |
| 77 | 3: |
| 78 | mov.l @r4+, r0 |
| 79 | mov.l @r4+, r2 |
| 80 | mov.l @r4+, r3 |
| 81 | addc r0, r6 |
| 82 | mov.l @r4+, r0 |
| 83 | addc r2, r6 |
| 84 | mov.l @r4+, r2 |
| 85 | addc r3, r6 |
| 86 | mov.l @r4+, r3 |
| 87 | addc r0, r6 |
| 88 | mov.l @r4+, r0 |
| 89 | addc r2, r6 |
| 90 | mov.l @r4+, r2 |
| 91 | addc r3, r6 |
| 92 | addc r0, r6 |
| 93 | addc r2, r6 |
| 94 | movt r0 |
| 95 | dt r5 |
| 96 | bf/s 3b |
| 97 | cmp/eq #1, r0 |
| 98 | ! here, we know r5==0 |
| 99 | addc r5, r6 ! add carry to r6 |
| 100 | 4: |
| 101 | mov r1, r0 |
| 102 | and #0x1c, r0 |
| 103 | tst r0, r0 |
| 104 | bt/s 6f |
| 105 | mov r0, r5 |
| 106 | shlr2 r5 |
| 107 | mov #0, r2 |
| 108 | 5: |
| 109 | addc r2, r6 |
| 110 | mov.l @r4+, r2 |
| 111 | movt r0 |
| 112 | dt r5 |
| 113 | bf/s 5b |
| 114 | cmp/eq #1, r0 |
| 115 | addc r2, r6 |
| 116 | addc r5, r6 ! r5==0 here, so it means add carry-bit |
| 117 | 6: |
| 118 | mov r1, r5 |
| 119 | mov #3, r0 |
| 120 | and r0, r5 |
| 121 | tst r5, r5 |
| 122 | bt 9f ! if it's =0 go to 9f |
| 123 | mov #2, r1 |
| 124 | cmp/hs r1, r5 |
| 125 | bf 7f |
| 126 | mov.w @r4+, r0 |
| 127 | extu.w r0, r0 |
| 128 | cmp/eq r1, r5 |
| 129 | bt/s 8f |
| 130 | clrt |
| 131 | shll16 r0 |
| 132 | addc r0, r6 |
| 133 | 7: |
| 134 | mov.b @r4+, r0 |
| 135 | extu.b r0, r0 |
| 136 | #ifndef __LITTLE_ENDIAN__ |
| 137 | shll8 r0 |
| 138 | #endif |
| 139 | 8: |
| 140 | addc r0, r6 |
| 141 | mov #0, r0 |
| 142 | addc r0, r6 |
| 143 | 9: |
| 144 | rts |
| 145 | mov r6, r0 |
| 146 | |
| 147 | /* |
| 148 | unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, |
| 149 | int sum, int *src_err_ptr, int *dst_err_ptr) |
| 150 | */ |
| 151 | |
| 152 | /* |
| 153 | * Copy from ds while checksumming, otherwise like csum_partial |
| 154 | * |
| 155 | * The macros SRC and DST specify the type of access for the instruction. |
| 156 | * thus we can call a custom exception handler for all access types. |
| 157 | * |
| 158 | * FIXME: could someone double-check whether I haven't mixed up some SRC and |
| 159 | * DST definitions? It's damn hard to trigger all cases. I hope I got |
| 160 | * them all but there's no guarantee. |
| 161 | */ |
| 162 | |
| 163 | #define SRC(...) \ |
| 164 | 9999: __VA_ARGS__ ; \ |
| 165 | .section __ex_table, "a"; \ |
| 166 | .long 9999b, 6001f ; \ |
| 167 | .previous |
| 168 | |
| 169 | #define DST(...) \ |
| 170 | 9999: __VA_ARGS__ ; \ |
| 171 | .section __ex_table, "a"; \ |
| 172 | .long 9999b, 6002f ; \ |
| 173 | .previous |
| 174 | |
| 175 | ! |
| 176 | ! r4: const char *SRC |
| 177 | ! r5: char *DST |
| 178 | ! r6: int LEN |
| 179 | ! r7: int SUM |
| 180 | ! |
| 181 | ! on stack: |
| 182 | ! int *SRC_ERR_PTR |
| 183 | ! int *DST_ERR_PTR |
| 184 | ! |
| 185 | ENTRY(csum_partial_copy_generic) |
| 186 | mov.l r5,@-r15 |
| 187 | mov.l r6,@-r15 |
| 188 | |
| 189 | mov #3,r0 ! Check src and dest are equally aligned |
| 190 | mov r4,r1 |
| 191 | and r0,r1 |
| 192 | and r5,r0 |
| 193 | cmp/eq r1,r0 |
| 194 | bf 3f ! Different alignments, use slow version |
| 195 | tst #1,r0 ! Check dest word aligned |
| 196 | bf 3f ! If not, do it the slow way |
| 197 | |
| 198 | mov #2,r0 |
| 199 | tst r0,r5 ! Check dest alignment. |
| 200 | bt 2f ! Jump if alignment is ok. |
| 201 | add #-2,r6 ! Alignment uses up two bytes. |
| 202 | cmp/pz r6 ! Jump if we had at least two bytes. |
| 203 | bt/s 1f |
| 204 | clrt |
| 205 | bra 4f |
| 206 | add #2,r6 ! r6 was < 2. Deal with it. |
| 207 | |
| 208 | 3: ! Handle different src and dest alignments. |
| 209 | ! This is not common, so simple byte by byte copy will do. |
| 210 | mov r6,r2 |
| 211 | shlr r6 |
| 212 | tst r6,r6 |
| 213 | bt 4f |
| 214 | clrt |
| 215 | .align 2 |
| 216 | 5: |
| 217 | SRC( mov.b @r4+,r1 ) |
| 218 | SRC( mov.b @r4+,r0 ) |
| 219 | extu.b r1,r1 |
| 220 | DST( mov.b r1,@r5 ) |
| 221 | DST( mov.b r0,@(1,r5) ) |
| 222 | extu.b r0,r0 |
| 223 | add #2,r5 |
| 224 | |
| 225 | #ifdef __LITTLE_ENDIAN__ |
| 226 | shll8 r0 |
| 227 | #else |
| 228 | shll8 r1 |
| 229 | #endif |
| 230 | or r1,r0 |
| 231 | |
| 232 | addc r0,r7 |
| 233 | movt r0 |
| 234 | dt r6 |
| 235 | bf/s 5b |
| 236 | cmp/eq #1,r0 |
| 237 | mov #0,r0 |
| 238 | addc r0, r7 |
| 239 | |
| 240 | mov r2, r0 |
| 241 | tst #1, r0 |
| 242 | bt 7f |
| 243 | bra 5f |
| 244 | clrt |
| 245 | |
| 246 | ! src and dest equally aligned, but to a two byte boundary. |
| 247 | ! Handle first two bytes as a special case |
| 248 | .align 2 |
| 249 | 1: |
| 250 | SRC( mov.w @r4+,r0 ) |
| 251 | DST( mov.w r0,@r5 ) |
| 252 | add #2,r5 |
| 253 | extu.w r0,r0 |
| 254 | addc r0,r7 |
| 255 | mov #0,r0 |
| 256 | addc r0,r7 |
| 257 | 2: |
| 258 | mov r6,r2 |
| 259 | mov #-5,r0 |
| 260 | shld r0,r6 |
| 261 | tst r6,r6 |
| 262 | bt/s 2f |
| 263 | clrt |
| 264 | .align 2 |
| 265 | 1: |
| 266 | SRC( mov.l @r4+,r0 ) |
| 267 | SRC( mov.l @r4+,r1 ) |
| 268 | addc r0,r7 |
| 269 | DST( mov.l r0,@r5 ) |
| 270 | DST( mov.l r1,@(4,r5) ) |
| 271 | addc r1,r7 |
| 272 | |
| 273 | SRC( mov.l @r4+,r0 ) |
| 274 | SRC( mov.l @r4+,r1 ) |
| 275 | addc r0,r7 |
| 276 | DST( mov.l r0,@(8,r5) ) |
| 277 | DST( mov.l r1,@(12,r5) ) |
| 278 | addc r1,r7 |
| 279 | |
| 280 | SRC( mov.l @r4+,r0 ) |
| 281 | SRC( mov.l @r4+,r1 ) |
| 282 | addc r0,r7 |
| 283 | DST( mov.l r0,@(16,r5) ) |
| 284 | DST( mov.l r1,@(20,r5) ) |
| 285 | addc r1,r7 |
| 286 | |
| 287 | SRC( mov.l @r4+,r0 ) |
| 288 | SRC( mov.l @r4+,r1 ) |
| 289 | addc r0,r7 |
| 290 | DST( mov.l r0,@(24,r5) ) |
| 291 | DST( mov.l r1,@(28,r5) ) |
| 292 | addc r1,r7 |
| 293 | add #32,r5 |
| 294 | movt r0 |
| 295 | dt r6 |
| 296 | bf/s 1b |
| 297 | cmp/eq #1,r0 |
| 298 | mov #0,r0 |
| 299 | addc r0,r7 |
| 300 | |
| 301 | 2: mov r2,r6 |
| 302 | mov #0x1c,r0 |
| 303 | and r0,r6 |
| 304 | cmp/pl r6 |
| 305 | bf/s 4f |
| 306 | clrt |
| 307 | shlr2 r6 |
| 308 | 3: |
| 309 | SRC( mov.l @r4+,r0 ) |
| 310 | addc r0,r7 |
| 311 | DST( mov.l r0,@r5 ) |
| 312 | add #4,r5 |
| 313 | movt r0 |
| 314 | dt r6 |
| 315 | bf/s 3b |
| 316 | cmp/eq #1,r0 |
| 317 | mov #0,r0 |
| 318 | addc r0,r7 |
| 319 | 4: mov r2,r6 |
| 320 | mov #3,r0 |
| 321 | and r0,r6 |
| 322 | cmp/pl r6 |
| 323 | bf 7f |
| 324 | mov #2,r1 |
| 325 | cmp/hs r1,r6 |
| 326 | bf 5f |
| 327 | SRC( mov.w @r4+,r0 ) |
| 328 | DST( mov.w r0,@r5 ) |
| 329 | extu.w r0,r0 |
| 330 | add #2,r5 |
| 331 | cmp/eq r1,r6 |
| 332 | bt/s 6f |
| 333 | clrt |
| 334 | shll16 r0 |
| 335 | addc r0,r7 |
| 336 | 5: |
| 337 | SRC( mov.b @r4+,r0 ) |
| 338 | DST( mov.b r0,@r5 ) |
| 339 | extu.b r0,r0 |
| 340 | #ifndef __LITTLE_ENDIAN__ |
| 341 | shll8 r0 |
| 342 | #endif |
| 343 | 6: addc r0,r7 |
| 344 | mov #0,r0 |
| 345 | addc r0,r7 |
| 346 | 7: |
| 347 | 5000: |
| 348 | |
| 349 | # Exception handler: |
| 350 | .section .fixup, "ax" |
| 351 | |
| 352 | 6001: |
| 353 | mov.l @(8,r15),r0 ! src_err_ptr |
| 354 | mov #-EFAULT,r1 |
| 355 | mov.l r1,@r0 |
| 356 | |
| 357 | ! zero the complete destination - computing the rest |
| 358 | ! is too much work |
| 359 | mov.l @(4,r15),r5 ! dst |
| 360 | mov.l @r15,r6 ! len |
| 361 | mov #0,r7 |
| 362 | 1: mov.b r7,@r5 |
| 363 | dt r6 |
| 364 | bf/s 1b |
| 365 | add #1,r5 |
| 366 | mov.l 8000f,r0 |
| 367 | jmp @r0 |
| 368 | nop |
| 369 | .align 2 |
| 370 | 8000: .long 5000b |
| 371 | |
| 372 | 6002: |
| 373 | mov.l @(12,r15),r0 ! dst_err_ptr |
| 374 | mov #-EFAULT,r1 |
| 375 | mov.l r1,@r0 |
| 376 | mov.l 8001f,r0 |
| 377 | jmp @r0 |
| 378 | nop |
| 379 | .align 2 |
| 380 | 8001: .long 5000b |
| 381 | |
| 382 | .previous |
| 383 | add #8,r15 |
| 384 | rts |
| 385 | mov r7,r0 |