Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 1 | /* |
| 2 | * This file contains assembly-language implementations |
| 3 | * of IP-style 1's complement checksum routines. |
| 4 | * |
| 5 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU General Public License |
| 9 | * as published by the Free Software Foundation; either version |
| 10 | * 2 of the License, or (at your option) any later version. |
| 11 | * |
| 12 | * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). |
| 13 | */ |
| 14 | |
| 15 | #include <linux/sys.h> |
| 16 | #include <asm/processor.h> |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 17 | #include <asm/cache.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 18 | #include <asm/errno.h> |
| 19 | #include <asm/ppc_asm.h> |
Al Viro | 9445aa1 | 2016-01-13 23:33:46 -0500 | [diff] [blame] | 20 | #include <asm/export.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 21 | |
| 22 | .text |
| 23 | |
| 24 | /* |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 25 | * computes the checksum of a memory block at buff, length len, |
| 26 | * and adds in "sum" (32-bit) |
| 27 | * |
Christophe Leroy | 7e39322 | 2016-03-07 18:44:37 +0100 | [diff] [blame] | 28 | * __csum_partial(buff, len, sum) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 29 | */ |
Christophe Leroy | 7e39322 | 2016-03-07 18:44:37 +0100 | [diff] [blame] | 30 | _GLOBAL(__csum_partial) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 31 | subi r3,r3,4 |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 32 | srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 33 | beq 3f /* if we're doing < 4 bytes */ |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 34 | andi. r0,r3,2 /* Align buffer to longword boundary */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 35 | beq+ 1f |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 36 | lhz r0,4(r3) /* do 2 bytes to get aligned */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 37 | subi r4,r4,2 |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 38 | addi r3,r3,2 |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 39 | srwi. r6,r4,2 /* # words to do */ |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 40 | adde r5,r5,r0 |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 41 | beq 3f |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 42 | 1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ |
| 43 | beq 21f |
| 44 | mtctr r6 |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 45 | 2: lwzu r0,4(r3) |
| 46 | adde r5,r5,r0 |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 47 | bdnz 2b |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 48 | 21: srwi. r6,r4,4 /* # blocks of 4 words to do */ |
| 49 | beq 3f |
| 50 | mtctr r6 |
| 51 | 22: lwz r0,4(r3) |
| 52 | lwz r6,8(r3) |
| 53 | lwz r7,12(r3) |
| 54 | lwzu r8,16(r3) |
| 55 | adde r5,r5,r0 |
| 56 | adde r5,r5,r6 |
| 57 | adde r5,r5,r7 |
| 58 | adde r5,r5,r8 |
| 59 | bdnz 22b |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 60 | 3: andi. r0,r4,2 |
| 61 | beq+ 4f |
| 62 | lhz r0,4(r3) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 63 | addi r3,r3,2 |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 64 | adde r5,r5,r0 |
| 65 | 4: andi. r0,r4,1 |
| 66 | beq+ 5f |
| 67 | lbz r0,4(r3) |
| 68 | slwi r0,r0,8 /* Upper byte of word */ |
| 69 | adde r5,r5,r0 |
| 70 | 5: addze r3,r5 /* add in final carry */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 71 | blr |
Al Viro | 9445aa1 | 2016-01-13 23:33:46 -0500 | [diff] [blame] | 72 | EXPORT_SYMBOL(__csum_partial) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 73 | |
| 74 | /* |
| 75 | * Computes the checksum of a memory block at src, length len, |
| 76 | * and adds in "sum" (32-bit), while copying the block to dst. |
| 77 | * If an access exception occurs on src or dst, it stores -EFAULT |
| 78 | * to *src_err or *dst_err respectively, and (for an error on |
| 79 | * src) zeroes the rest of dst. |
| 80 | * |
| 81 | * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) |
| 82 | */ |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 83 | #define CSUM_COPY_16_BYTES_WITHEX(n) \ |
| 84 | 8 ## n ## 0: \ |
| 85 | lwz r7,4(r4); \ |
| 86 | 8 ## n ## 1: \ |
| 87 | lwz r8,8(r4); \ |
| 88 | 8 ## n ## 2: \ |
| 89 | lwz r9,12(r4); \ |
| 90 | 8 ## n ## 3: \ |
| 91 | lwzu r10,16(r4); \ |
| 92 | 8 ## n ## 4: \ |
| 93 | stw r7,4(r6); \ |
| 94 | adde r12,r12,r7; \ |
| 95 | 8 ## n ## 5: \ |
| 96 | stw r8,8(r6); \ |
| 97 | adde r12,r12,r8; \ |
| 98 | 8 ## n ## 6: \ |
| 99 | stw r9,12(r6); \ |
| 100 | adde r12,r12,r9; \ |
| 101 | 8 ## n ## 7: \ |
| 102 | stwu r10,16(r6); \ |
| 103 | adde r12,r12,r10 |
| 104 | |
| 105 | #define CSUM_COPY_16_BYTES_EXCODE(n) \ |
| 106 | .section __ex_table,"a"; \ |
| 107 | .align 2; \ |
| 108 | .long 8 ## n ## 0b,src_error; \ |
| 109 | .long 8 ## n ## 1b,src_error; \ |
| 110 | .long 8 ## n ## 2b,src_error; \ |
| 111 | .long 8 ## n ## 3b,src_error; \ |
| 112 | .long 8 ## n ## 4b,dst_error; \ |
| 113 | .long 8 ## n ## 5b,dst_error; \ |
| 114 | .long 8 ## n ## 6b,dst_error; \ |
| 115 | .long 8 ## n ## 7b,dst_error; \ |
| 116 | .text |
| 117 | |
| 118 | .text |
| 119 | .stabs "arch/powerpc/lib/",N_SO,0,0,0f |
| 120 | .stabs "checksum_32.S",N_SO,0,0,0f |
| 121 | 0: |
| 122 | |
| 123 | CACHELINE_BYTES = L1_CACHE_BYTES |
| 124 | LG_CACHELINE_BYTES = L1_CACHE_SHIFT |
| 125 | CACHELINE_MASK = (L1_CACHE_BYTES-1) |
| 126 | |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 127 | _GLOBAL(csum_partial_copy_generic) |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 128 | stwu r1,-16(r1) |
| 129 | stw r7,12(r1) |
| 130 | stw r8,8(r1) |
| 131 | |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 132 | addic r12,r6,0 |
| 133 | addi r6,r4,-4 |
| 134 | neg r0,r4 |
| 135 | addi r4,r3,-4 |
| 136 | andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ |
Christophe Leroy | 8540571 | 2016-08-26 16:45:13 +0200 | [diff] [blame] | 137 | crset 4*cr7+eq |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 138 | beq 58f |
| 139 | |
| 140 | cmplw 0,r5,r0 /* is this more than total to do? */ |
| 141 | blt 63f /* if not much to do */ |
Christophe Leroy | 8540571 | 2016-08-26 16:45:13 +0200 | [diff] [blame] | 142 | rlwinm r7,r6,3,0x8 |
| 143 | rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ |
| 144 | cmplwi cr7,r7,0 /* is destination address even ? */ |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 145 | andi. r8,r0,3 /* get it word-aligned first */ |
| 146 | mtctr r8 |
| 147 | beq+ 61f |
| 148 | li r3,0 |
| 149 | 70: lbz r9,4(r4) /* do some bytes */ |
| 150 | addi r4,r4,1 |
| 151 | slwi r3,r3,8 |
| 152 | rlwimi r3,r9,0,24,31 |
| 153 | 71: stb r9,4(r6) |
| 154 | addi r6,r6,1 |
| 155 | bdnz 70b |
| 156 | adde r12,r12,r3 |
| 157 | 61: subf r5,r0,r5 |
| 158 | srwi. r0,r0,2 |
| 159 | mtctr r0 |
| 160 | beq 58f |
| 161 | 72: lwzu r9,4(r4) /* do some words */ |
| 162 | adde r12,r12,r9 |
| 163 | 73: stwu r9,4(r6) |
| 164 | bdnz 72b |
| 165 | |
| 166 | 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ |
| 167 | clrlwi r5,r5,32-LG_CACHELINE_BYTES |
| 168 | li r11,4 |
| 169 | beq 63f |
| 170 | |
| 171 | /* Here we decide how far ahead to prefetch the source */ |
| 172 | li r3,4 |
| 173 | cmpwi r0,1 |
| 174 | li r7,0 |
| 175 | ble 114f |
| 176 | li r7,1 |
| 177 | #if MAX_COPY_PREFETCH > 1 |
| 178 | /* Heuristically, for large transfers we prefetch |
| 179 | MAX_COPY_PREFETCH cachelines ahead. For small transfers |
| 180 | we prefetch 1 cacheline ahead. */ |
| 181 | cmpwi r0,MAX_COPY_PREFETCH |
| 182 | ble 112f |
| 183 | li r7,MAX_COPY_PREFETCH |
| 184 | 112: mtctr r7 |
| 185 | 111: dcbt r3,r4 |
| 186 | addi r3,r3,CACHELINE_BYTES |
| 187 | bdnz 111b |
| 188 | #else |
| 189 | dcbt r3,r4 |
| 190 | addi r3,r3,CACHELINE_BYTES |
| 191 | #endif /* MAX_COPY_PREFETCH > 1 */ |
| 192 | |
| 193 | 114: subf r8,r7,r0 |
| 194 | mr r0,r7 |
| 195 | mtctr r8 |
| 196 | |
| 197 | 53: dcbt r3,r4 |
| 198 | 54: dcbz r11,r6 |
| 199 | /* the main body of the cacheline loop */ |
| 200 | CSUM_COPY_16_BYTES_WITHEX(0) |
| 201 | #if L1_CACHE_BYTES >= 32 |
| 202 | CSUM_COPY_16_BYTES_WITHEX(1) |
| 203 | #if L1_CACHE_BYTES >= 64 |
| 204 | CSUM_COPY_16_BYTES_WITHEX(2) |
| 205 | CSUM_COPY_16_BYTES_WITHEX(3) |
| 206 | #if L1_CACHE_BYTES >= 128 |
| 207 | CSUM_COPY_16_BYTES_WITHEX(4) |
| 208 | CSUM_COPY_16_BYTES_WITHEX(5) |
| 209 | CSUM_COPY_16_BYTES_WITHEX(6) |
| 210 | CSUM_COPY_16_BYTES_WITHEX(7) |
| 211 | #endif |
| 212 | #endif |
| 213 | #endif |
| 214 | bdnz 53b |
| 215 | cmpwi r0,0 |
| 216 | li r3,4 |
| 217 | li r7,0 |
| 218 | bne 114b |
| 219 | |
| 220 | 63: srwi. r0,r5,2 |
| 221 | mtctr r0 |
| 222 | beq 64f |
| 223 | 30: lwzu r0,4(r4) |
| 224 | adde r12,r12,r0 |
| 225 | 31: stwu r0,4(r6) |
| 226 | bdnz 30b |
| 227 | |
| 228 | 64: andi. r0,r5,2 |
| 229 | beq+ 65f |
| 230 | 40: lhz r0,4(r4) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 231 | addi r4,r4,2 |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 232 | 41: sth r0,4(r6) |
| 233 | adde r12,r12,r0 |
| 234 | addi r6,r6,2 |
| 235 | 65: andi. r0,r5,1 |
| 236 | beq+ 66f |
| 237 | 50: lbz r0,4(r4) |
| 238 | 51: stb r0,4(r6) |
| 239 | slwi r0,r0,8 |
| 240 | adde r12,r12,r0 |
| 241 | 66: addze r3,r12 |
| 242 | addi r1,r1,16 |
| 243 | beqlr+ cr7 |
Christophe Leroy | 1bc8b81 | 2016-08-02 10:07:05 +0200 | [diff] [blame] | 244 | rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 245 | blr |
| 246 | |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 247 | /* read fault */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 248 | src_error: |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 249 | lwz r7,12(r1) |
| 250 | addi r1,r1,16 |
| 251 | cmpwi cr0,r7,0 |
| 252 | beqlr |
| 253 | li r0,-EFAULT |
| 254 | stw r0,0(r7) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 255 | blr |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 256 | /* write fault */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 257 | dst_error: |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 258 | lwz r8,8(r1) |
| 259 | addi r1,r1,16 |
| 260 | cmpwi cr0,r8,0 |
| 261 | beqlr |
| 262 | li r0,-EFAULT |
| 263 | stw r0,0(r8) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 264 | blr |
| 265 | |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 266 | .section __ex_table,"a" |
| 267 | .align 2 |
| 268 | .long 70b,src_error |
| 269 | .long 71b,dst_error |
| 270 | .long 72b,src_error |
| 271 | .long 73b,dst_error |
| 272 | .long 54b,dst_error |
| 273 | .text |
| 274 | |
| 275 | /* |
| 276 | * this stuff handles faults in the cacheline loop and branches to either |
| 277 | * src_error (if in read part) or dst_error (if in write part) |
| 278 | */ |
| 279 | CSUM_COPY_16_BYTES_EXCODE(0) |
| 280 | #if L1_CACHE_BYTES >= 32 |
| 281 | CSUM_COPY_16_BYTES_EXCODE(1) |
| 282 | #if L1_CACHE_BYTES >= 64 |
| 283 | CSUM_COPY_16_BYTES_EXCODE(2) |
| 284 | CSUM_COPY_16_BYTES_EXCODE(3) |
| 285 | #if L1_CACHE_BYTES >= 128 |
| 286 | CSUM_COPY_16_BYTES_EXCODE(4) |
| 287 | CSUM_COPY_16_BYTES_EXCODE(5) |
| 288 | CSUM_COPY_16_BYTES_EXCODE(6) |
| 289 | CSUM_COPY_16_BYTES_EXCODE(7) |
| 290 | #endif |
| 291 | #endif |
| 292 | #endif |
| 293 | |
| 294 | .section __ex_table,"a" |
| 295 | .align 2 |
| 296 | .long 30b,src_error |
| 297 | .long 31b,dst_error |
| 298 | .long 40b,src_error |
| 299 | .long 41b,dst_error |
| 300 | .long 50b,src_error |
| 301 | .long 51b,dst_error |
Al Viro | 9445aa1 | 2016-01-13 23:33:46 -0500 | [diff] [blame] | 302 | EXPORT_SYMBOL(csum_partial_copy_generic) |