Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ |
| 2 | * |
| 3 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
| 4 | * operating system. INET is implemented using the BSD Socket |
| 5 | * interface as the means of communication with the user level. |
| 6 | * |
| 7 | * IP/TCP/UDP checksumming routines |
| 8 | * |
| 9 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> |
| 10 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> |
| 11 | * Tom May, <ftom@netcom.com> |
| 12 | * Pentium Pro/II routines: |
| 13 | * Alexander Kjeldaas <astor@guardian.no> |
| 14 | * Finn Arne Gangstad <finnag@guardian.no> |
| 15 | * Lots of code moved from tcp.c and ip.c; see those files |
| 16 | * for more names. |
| 17 | * |
| 18 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception |
| 19 | * handling. |
| 20 | * Andi Kleen, add zeroing on error |
| 21 | * converted to pure assembler |
| 22 | * |
| 23 | * SuperH version: Copyright (C) 1999 Niibe Yutaka |
| 24 | * |
| 25 | * This program is free software; you can redistribute it and/or |
| 26 | * modify it under the terms of the GNU General Public License |
| 27 | * as published by the Free Software Foundation; either version |
| 28 | * 2 of the License, or (at your option) any later version. |
| 29 | */ |
| 30 | |
| 31 | #include <asm/errno.h> |
| 32 | #include <linux/linkage.h> |
| 33 | |
| 34 | /* |
| 35 | * computes a partial checksum, e.g. for TCP/UDP fragments |
| 36 | */ |
| 37 | |
| 38 | /* |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 39 | * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 40 | */ |
| 41 | |
| 42 | .text |
| 43 | ENTRY(csum_partial) |
| 44 | /* |
| 45 | * Experiments with Ethernet and SLIP connections show that buff |
| 46 | * is aligned on either a 2-byte or 4-byte boundary. We get at |
| 47 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. |
| 48 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte |
| 49 | * alignment for the unrolled loop. |
| 50 | */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 51 | mov r4, r0 |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 52 | tst #3, r0 ! Check alignment. |
| 53 | bt/s 2f ! Jump if alignment is ok. |
| 54 | mov r4, r7 ! Keep a copy to check for alignment |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 55 | ! |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 56 | tst #1, r0 ! Check alignment. |
| 57 | bt 21f ! Jump if alignment is boundary of 2bytes. |
| 58 | |
| 59 | ! buf is odd |
| 60 | tst r5, r5 |
| 61 | add #-1, r5 |
| 62 | bt 9f |
| 63 | mov.b @r4+, r0 |
| 64 | extu.b r0, r0 |
| 65 | addc r0, r6 ! t=0 from previous tst |
| 66 | mov r6, r0 |
| 67 | shll8 r6 |
| 68 | shlr16 r0 |
| 69 | shlr8 r0 |
| 70 | or r0, r6 |
| 71 | mov r4, r0 |
| 72 | tst #2, r0 |
| 73 | bt 2f |
| 74 | 21: |
| 75 | ! buf is 2 byte aligned (len could be 0) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 76 | add #-2, r5 ! Alignment uses up two bytes. |
| 77 | cmp/pz r5 ! |
| 78 | bt/s 1f ! Jump if we had at least two bytes. |
| 79 | clrt |
| 80 | bra 6f |
| 81 | add #2, r5 ! r5 was < 2. Deal with it. |
| 82 | 1: |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 83 | mov.w @r4+, r0 |
| 84 | extu.w r0, r0 |
| 85 | addc r0, r6 |
| 86 | bf 2f |
| 87 | add #1, r6 |
| 88 | 2: |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 89 | ! buf is 4 byte aligned (len could be 0) |
| 90 | mov r5, r1 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 91 | mov #-5, r0 |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 92 | shld r0, r1 |
| 93 | tst r1, r1 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 94 | bt/s 4f ! if it's =0, go to 4f |
| 95 | clrt |
| 96 | .align 2 |
| 97 | 3: |
| 98 | mov.l @r4+, r0 |
| 99 | mov.l @r4+, r2 |
| 100 | mov.l @r4+, r3 |
| 101 | addc r0, r6 |
| 102 | mov.l @r4+, r0 |
| 103 | addc r2, r6 |
| 104 | mov.l @r4+, r2 |
| 105 | addc r3, r6 |
| 106 | mov.l @r4+, r3 |
| 107 | addc r0, r6 |
| 108 | mov.l @r4+, r0 |
| 109 | addc r2, r6 |
| 110 | mov.l @r4+, r2 |
| 111 | addc r3, r6 |
| 112 | addc r0, r6 |
| 113 | addc r2, r6 |
| 114 | movt r0 |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 115 | dt r1 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 116 | bf/s 3b |
| 117 | cmp/eq #1, r0 |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 118 | ! here, we know r1==0 |
| 119 | addc r1, r6 ! add carry to r6 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 120 | 4: |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 121 | mov r5, r0 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 122 | and #0x1c, r0 |
| 123 | tst r0, r0 |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 124 | bt 6f |
| 125 | ! 4 bytes or more remaining |
| 126 | mov r0, r1 |
| 127 | shlr2 r1 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 128 | mov #0, r2 |
| 129 | 5: |
| 130 | addc r2, r6 |
| 131 | mov.l @r4+, r2 |
| 132 | movt r0 |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 133 | dt r1 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 134 | bf/s 5b |
| 135 | cmp/eq #1, r0 |
| 136 | addc r2, r6 |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 137 | addc r1, r6 ! r1==0 here, so it means add carry-bit |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 138 | 6: |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 139 | ! 3 bytes or less remaining |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 140 | mov #3, r0 |
| 141 | and r0, r5 |
| 142 | tst r5, r5 |
| 143 | bt 9f ! if it's =0 go to 9f |
| 144 | mov #2, r1 |
| 145 | cmp/hs r1, r5 |
| 146 | bf 7f |
| 147 | mov.w @r4+, r0 |
| 148 | extu.w r0, r0 |
| 149 | cmp/eq r1, r5 |
| 150 | bt/s 8f |
| 151 | clrt |
| 152 | shll16 r0 |
| 153 | addc r0, r6 |
| 154 | 7: |
| 155 | mov.b @r4+, r0 |
| 156 | extu.b r0, r0 |
| 157 | #ifndef __LITTLE_ENDIAN__ |
| 158 | shll8 r0 |
| 159 | #endif |
| 160 | 8: |
| 161 | addc r0, r6 |
| 162 | mov #0, r0 |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 163 | addc r0, r6 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 164 | 9: |
Stuart Menefy | cadc4e1 | 2008-12-12 18:34:38 +0000 | [diff] [blame] | 165 | ! Check if the buffer was misaligned, if so realign sum |
| 166 | mov r7, r0 |
| 167 | tst #1, r0 |
| 168 | bt 10f |
| 169 | mov r6, r0 |
| 170 | shll8 r6 |
| 171 | shlr16 r0 |
| 172 | shlr8 r0 |
| 173 | or r0, r6 |
| 174 | 10: |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 175 | rts |
| 176 | mov r6, r0 |
| 177 | |
| 178 | /* |
| 179 | unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, |
| 180 | int sum, int *src_err_ptr, int *dst_err_ptr) |
| 181 | */ |
| 182 | |
| 183 | /* |
| 184 | * Copy from ds while checksumming, otherwise like csum_partial |
| 185 | * |
| 186 | * The macros SRC and DST specify the type of access for the instruction. |
| 187 | * thus we can call a custom exception handler for all access types. |
| 188 | * |
| 189 | * FIXME: could someone double-check whether I haven't mixed up some SRC and |
| 190 | * DST definitions? It's damn hard to trigger all cases. I hope I got |
| 191 | * them all but there's no guarantee. |
| 192 | */ |
| 193 | |
| 194 | #define SRC(...) \ |
| 195 | 9999: __VA_ARGS__ ; \ |
| 196 | .section __ex_table, "a"; \ |
| 197 | .long 9999b, 6001f ; \ |
| 198 | .previous |
| 199 | |
| 200 | #define DST(...) \ |
| 201 | 9999: __VA_ARGS__ ; \ |
| 202 | .section __ex_table, "a"; \ |
| 203 | .long 9999b, 6002f ; \ |
| 204 | .previous |
| 205 | |
| 206 | ! |
| 207 | ! r4: const char *SRC |
| 208 | ! r5: char *DST |
| 209 | ! r6: int LEN |
| 210 | ! r7: int SUM |
| 211 | ! |
| 212 | ! on stack: |
| 213 | ! int *SRC_ERR_PTR |
| 214 | ! int *DST_ERR_PTR |
| 215 | ! |
| 216 | ENTRY(csum_partial_copy_generic) |
| 217 | mov.l r5,@-r15 |
| 218 | mov.l r6,@-r15 |
| 219 | |
| 220 | mov #3,r0 ! Check src and dest are equally aligned |
| 221 | mov r4,r1 |
| 222 | and r0,r1 |
| 223 | and r5,r0 |
| 224 | cmp/eq r1,r0 |
| 225 | bf 3f ! Different alignments, use slow version |
| 226 | tst #1,r0 ! Check dest word aligned |
| 227 | bf 3f ! If not, do it the slow way |
| 228 | |
| 229 | mov #2,r0 |
| 230 | tst r0,r5 ! Check dest alignment. |
| 231 | bt 2f ! Jump if alignment is ok. |
| 232 | add #-2,r6 ! Alignment uses up two bytes. |
| 233 | cmp/pz r6 ! Jump if we had at least two bytes. |
| 234 | bt/s 1f |
| 235 | clrt |
Ollie Wild | 24ab54c | 2006-09-27 14:46:24 +0900 | [diff] [blame] | 236 | add #2,r6 ! r6 was < 2. Deal with it. |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 237 | bra 4f |
Ollie Wild | 24ab54c | 2006-09-27 14:46:24 +0900 | [diff] [blame] | 238 | mov r6,r2 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 239 | |
| 240 | 3: ! Handle different src and dest alignments. |
| 241 | ! This is not common, so simple byte by byte copy will do. |
| 242 | mov r6,r2 |
| 243 | shlr r6 |
| 244 | tst r6,r6 |
| 245 | bt 4f |
| 246 | clrt |
| 247 | .align 2 |
| 248 | 5: |
| 249 | SRC( mov.b @r4+,r1 ) |
| 250 | SRC( mov.b @r4+,r0 ) |
| 251 | extu.b r1,r1 |
| 252 | DST( mov.b r1,@r5 ) |
| 253 | DST( mov.b r0,@(1,r5) ) |
| 254 | extu.b r0,r0 |
| 255 | add #2,r5 |
| 256 | |
| 257 | #ifdef __LITTLE_ENDIAN__ |
| 258 | shll8 r0 |
| 259 | #else |
| 260 | shll8 r1 |
| 261 | #endif |
| 262 | or r1,r0 |
| 263 | |
| 264 | addc r0,r7 |
| 265 | movt r0 |
| 266 | dt r6 |
| 267 | bf/s 5b |
| 268 | cmp/eq #1,r0 |
| 269 | mov #0,r0 |
| 270 | addc r0, r7 |
| 271 | |
| 272 | mov r2, r0 |
| 273 | tst #1, r0 |
| 274 | bt 7f |
| 275 | bra 5f |
| 276 | clrt |
| 277 | |
| 278 | ! src and dest equally aligned, but to a two byte boundary. |
| 279 | ! Handle first two bytes as a special case |
| 280 | .align 2 |
| 281 | 1: |
| 282 | SRC( mov.w @r4+,r0 ) |
| 283 | DST( mov.w r0,@r5 ) |
| 284 | add #2,r5 |
| 285 | extu.w r0,r0 |
| 286 | addc r0,r7 |
| 287 | mov #0,r0 |
| 288 | addc r0,r7 |
| 289 | 2: |
| 290 | mov r6,r2 |
| 291 | mov #-5,r0 |
| 292 | shld r0,r6 |
| 293 | tst r6,r6 |
| 294 | bt/s 2f |
| 295 | clrt |
| 296 | .align 2 |
| 297 | 1: |
| 298 | SRC( mov.l @r4+,r0 ) |
| 299 | SRC( mov.l @r4+,r1 ) |
| 300 | addc r0,r7 |
| 301 | DST( mov.l r0,@r5 ) |
| 302 | DST( mov.l r1,@(4,r5) ) |
| 303 | addc r1,r7 |
| 304 | |
| 305 | SRC( mov.l @r4+,r0 ) |
| 306 | SRC( mov.l @r4+,r1 ) |
| 307 | addc r0,r7 |
| 308 | DST( mov.l r0,@(8,r5) ) |
| 309 | DST( mov.l r1,@(12,r5) ) |
| 310 | addc r1,r7 |
| 311 | |
| 312 | SRC( mov.l @r4+,r0 ) |
| 313 | SRC( mov.l @r4+,r1 ) |
| 314 | addc r0,r7 |
| 315 | DST( mov.l r0,@(16,r5) ) |
| 316 | DST( mov.l r1,@(20,r5) ) |
| 317 | addc r1,r7 |
| 318 | |
| 319 | SRC( mov.l @r4+,r0 ) |
| 320 | SRC( mov.l @r4+,r1 ) |
| 321 | addc r0,r7 |
| 322 | DST( mov.l r0,@(24,r5) ) |
| 323 | DST( mov.l r1,@(28,r5) ) |
| 324 | addc r1,r7 |
| 325 | add #32,r5 |
| 326 | movt r0 |
| 327 | dt r6 |
| 328 | bf/s 1b |
| 329 | cmp/eq #1,r0 |
| 330 | mov #0,r0 |
| 331 | addc r0,r7 |
| 332 | |
| 333 | 2: mov r2,r6 |
| 334 | mov #0x1c,r0 |
| 335 | and r0,r6 |
| 336 | cmp/pl r6 |
| 337 | bf/s 4f |
| 338 | clrt |
| 339 | shlr2 r6 |
| 340 | 3: |
| 341 | SRC( mov.l @r4+,r0 ) |
| 342 | addc r0,r7 |
| 343 | DST( mov.l r0,@r5 ) |
| 344 | add #4,r5 |
| 345 | movt r0 |
| 346 | dt r6 |
| 347 | bf/s 3b |
| 348 | cmp/eq #1,r0 |
| 349 | mov #0,r0 |
| 350 | addc r0,r7 |
| 351 | 4: mov r2,r6 |
| 352 | mov #3,r0 |
| 353 | and r0,r6 |
| 354 | cmp/pl r6 |
| 355 | bf 7f |
| 356 | mov #2,r1 |
| 357 | cmp/hs r1,r6 |
| 358 | bf 5f |
| 359 | SRC( mov.w @r4+,r0 ) |
| 360 | DST( mov.w r0,@r5 ) |
| 361 | extu.w r0,r0 |
| 362 | add #2,r5 |
| 363 | cmp/eq r1,r6 |
| 364 | bt/s 6f |
| 365 | clrt |
| 366 | shll16 r0 |
| 367 | addc r0,r7 |
| 368 | 5: |
| 369 | SRC( mov.b @r4+,r0 ) |
| 370 | DST( mov.b r0,@r5 ) |
| 371 | extu.b r0,r0 |
| 372 | #ifndef __LITTLE_ENDIAN__ |
| 373 | shll8 r0 |
| 374 | #endif |
| 375 | 6: addc r0,r7 |
| 376 | mov #0,r0 |
| 377 | addc r0,r7 |
| 378 | 7: |
| 379 | 5000: |
| 380 | |
| 381 | # Exception handler: |
| 382 | .section .fixup, "ax" |
| 383 | |
| 384 | 6001: |
| 385 | mov.l @(8,r15),r0 ! src_err_ptr |
| 386 | mov #-EFAULT,r1 |
| 387 | mov.l r1,@r0 |
| 388 | |
| 389 | ! zero the complete destination - computing the rest |
| 390 | ! is too much work |
| 391 | mov.l @(4,r15),r5 ! dst |
| 392 | mov.l @r15,r6 ! len |
| 393 | mov #0,r7 |
| 394 | 1: mov.b r7,@r5 |
| 395 | dt r6 |
| 396 | bf/s 1b |
| 397 | add #1,r5 |
| 398 | mov.l 8000f,r0 |
| 399 | jmp @r0 |
| 400 | nop |
| 401 | .align 2 |
| 402 | 8000: .long 5000b |
| 403 | |
| 404 | 6002: |
| 405 | mov.l @(12,r15),r0 ! dst_err_ptr |
| 406 | mov #-EFAULT,r1 |
| 407 | mov.l r1,@r0 |
| 408 | mov.l 8001f,r0 |
| 409 | jmp @r0 |
| 410 | nop |
| 411 | .align 2 |
| 412 | 8001: .long 5000b |
| 413 | |
| 414 | .previous |
| 415 | add #8,r15 |
| 416 | rts |
| 417 | mov r7,r0 |