Aurelien Jacquiot | 09831ca | 2011-10-04 11:15:51 -0400 | [diff] [blame] | 1 | ; |
| 2 | ; linux/arch/c6x/lib/csum_64plus.s |
| 3 | ; |
| 4 | ; Port on Texas Instruments TMS320C6x architecture |
| 5 | ; |
| 6 | ; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated |
| 7 | ; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) |
| 8 | ; |
| 9 | ; This program is free software; you can redistribute it and/or modify |
| 10 | ; it under the terms of the GNU General Public License version 2 as |
| 11 | ; published by the Free Software Foundation. |
| 12 | ; |
| 13 | #include <linux/linkage.h> |
| 14 | |
| 15 | ; |
| 16 | ;unsigned int csum_partial_copy(const char *src, char * dst, |
| 17 | ; int len, int sum) |
| 18 | ; |
| 19 | ; A4: src |
| 20 | ; B4: dst |
| 21 | ; A6: len |
| 22 | ; B6: sum |
| 23 | ; return csum in A4 |
| 24 | ; |
| 25 | |
| 26 | .text |
| 27 | ENTRY(csum_partial_copy) |
| 28 | MVC .S2 ILC,B30 |
| 29 | |
| 30 | MV .D1X B6,A31 ; given csum |
| 31 | ZERO .D1 A9 ; csum (a side) |
| 32 | || ZERO .D2 B9 ; csum (b side) |
| 33 | || SHRU .S2X A6,2,B5 ; len / 4 |
| 34 | |
| 35 | ;; Check alignment and size |
| 36 | AND .S1 3,A4,A1 |
| 37 | || AND .S2 3,B4,B0 |
| 38 | OR .L2X B0,A1,B0 ; non aligned condition |
| 39 | || MVC .S2 B5,ILC |
| 40 | || MVK .D2 1,B2 |
| 41 | || MV .D1X B5,A1 ; words condition |
| 42 | [!A1] B .S1 L8 |
| 43 | [B0] BNOP .S1 L6,5 |
| 44 | |
| 45 | SPLOOP 1 |
| 46 | |
| 47 | ;; Main loop for aligned words |
| 48 | LDW .D1T1 *A4++,A7 |
| 49 | NOP 4 |
| 50 | MV .S2X A7,B7 |
| 51 | || EXTU .S1 A7,0,16,A16 |
| 52 | STW .D2T2 B7,*B4++ |
| 53 | || MPYU .M2 B7,B2,B8 |
| 54 | || ADD .L1 A16,A9,A9 |
| 55 | NOP |
| 56 | SPKERNEL 8,0 |
| 57 | || ADD .L2 B8,B9,B9 |
| 58 | |
| 59 | ZERO .D1 A1 |
| 60 | || ADD .L1X A9,B9,A9 ; add csum from a and b sides |
| 61 | |
| 62 | L6: |
| 63 | [!A1] BNOP .S1 L8,5 |
| 64 | |
| 65 | ;; Main loop for non-aligned words |
| 66 | SPLOOP 2 |
| 67 | || MVK .L1 1,A2 |
| 68 | |
| 69 | LDNW .D1T1 *A4++,A7 |
| 70 | NOP 3 |
| 71 | |
| 72 | NOP |
| 73 | MV .S2X A7,B7 |
| 74 | || EXTU .S1 A7,0,16,A16 |
| 75 | || MPYU .M1 A7,A2,A8 |
| 76 | |
| 77 | ADD .L1 A16,A9,A9 |
| 78 | SPKERNEL 6,0 |
| 79 | || STNW .D2T2 B7,*B4++ |
| 80 | || ADD .L1 A8,A9,A9 |
| 81 | |
| 82 | L8: AND .S2X 2,A6,B5 |
| 83 | CMPGT .L2 B5,0,B0 |
| 84 | [!B0] BNOP .S1 L82,4 |
| 85 | |
| 86 | ;; Manage half-word |
| 87 | ZERO .L1 A7 |
| 88 | || ZERO .D1 A8 |
| 89 | |
| 90 | #ifdef CONFIG_CPU_BIG_ENDIAN |
| 91 | |
| 92 | LDBU .D1T1 *A4++,A7 |
| 93 | LDBU .D1T1 *A4++,A8 |
| 94 | NOP 3 |
| 95 | SHL .S1 A7,8,A0 |
| 96 | ADD .S1 A8,A9,A9 |
| 97 | STB .D2T1 A7,*B4++ |
| 98 | || ADD .S1 A0,A9,A9 |
| 99 | STB .D2T1 A8,*B4++ |
| 100 | |
| 101 | #else |
| 102 | |
| 103 | LDBU .D1T1 *A4++,A7 |
| 104 | LDBU .D1T1 *A4++,A8 |
| 105 | NOP 3 |
| 106 | ADD .S1 A7,A9,A9 |
| 107 | SHL .S1 A8,8,A0 |
| 108 | |
| 109 | STB .D2T1 A7,*B4++ |
| 110 | || ADD .S1 A0,A9,A9 |
| 111 | STB .D2T1 A8,*B4++ |
| 112 | |
| 113 | #endif |
| 114 | |
| 115 | ;; Manage eventually the last byte |
| 116 | L82: AND .S2X 1,A6,B0 |
| 117 | [!B0] BNOP .S1 L9,5 |
| 118 | |
| 119 | || ZERO .L1 A7 |
| 120 | |
| 121 | L83: LDBU .D1T1 *A4++,A7 |
| 122 | NOP 4 |
| 123 | |
| 124 | MV .L2X A7,B7 |
| 125 | |
| 126 | #ifdef CONFIG_CPU_BIG_ENDIAN |
| 127 | |
| 128 | STB .D2T2 B7,*B4++ |
| 129 | || SHL .S1 A7,8,A7 |
| 130 | ADD .S1 A7,A9,A9 |
| 131 | |
| 132 | #else |
| 133 | |
| 134 | STB .D2T2 B7,*B4++ |
| 135 | || ADD .S1 A7,A9,A9 |
| 136 | |
| 137 | #endif |
| 138 | |
| 139 | ;; Fold the csum |
| 140 | L9: SHRU .S2X A9,16,B0 |
| 141 | [!B0] BNOP .S1 L10,5 |
| 142 | |
| 143 | L91: SHRU .S2X A9,16,B4 |
| 144 | || EXTU .S1 A9,16,16,A3 |
| 145 | ADD .D1X A3,B4,A9 |
| 146 | |
| 147 | SHRU .S1 A9,16,A0 |
| 148 | [A0] BNOP .S1 L91,5 |
| 149 | |
| 150 | L10: ADD .D1 A31,A9,A9 |
| 151 | MV .D1 A9,A4 |
| 152 | |
| 153 | BNOP .S2 B3,4 |
| 154 | MVC .S2 B30,ILC |
| 155 | ENDPROC(csum_partial_copy) |
| 156 | |
| 157 | ; |
| 158 | ;unsigned short |
| 159 | ;ip_fast_csum(unsigned char *iph, unsigned int ihl) |
| 160 | ;{ |
| 161 | ; unsigned int checksum = 0; |
| 162 | ; unsigned short *tosum = (unsigned short *) iph; |
| 163 | ; int len; |
| 164 | ; |
| 165 | ; len = ihl*4; |
| 166 | ; |
| 167 | ; if (len <= 0) |
| 168 | ; return 0; |
| 169 | ; |
| 170 | ; while(len) { |
| 171 | ; len -= 2; |
| 172 | ; checksum += *tosum++; |
| 173 | ; } |
| 174 | ; if (len & 1) |
| 175 | ; checksum += *(unsigned char*) tosum; |
| 176 | ; |
| 177 | ; while(checksum >> 16) |
| 178 | ; checksum = (checksum & 0xffff) + (checksum >> 16); |
| 179 | ; |
| 180 | ; return ~checksum; |
| 181 | ;} |
| 182 | ; |
| 183 | ; A4: iph |
| 184 | ; B4: ihl |
| 185 | ; return checksum in A4 |
| 186 | ; |
| 187 | .text |
| 188 | |
| 189 | ENTRY(ip_fast_csum) |
| 190 | ZERO .D1 A5 |
| 191 | || MVC .S2 ILC,B30 |
| 192 | SHL .S2 B4,2,B0 |
| 193 | CMPGT .L2 B0,0,B1 |
| 194 | [!B1] BNOP .S1 L15,4 |
| 195 | [!B1] ZERO .D1 A3 |
| 196 | |
| 197 | [!B0] B .S1 L12 |
| 198 | SHRU .S2 B0,1,B0 |
| 199 | MVC .S2 B0,ILC |
| 200 | NOP 3 |
| 201 | |
| 202 | SPLOOP 1 |
| 203 | LDHU .D1T1 *A4++,A3 |
| 204 | NOP 3 |
| 205 | NOP |
| 206 | SPKERNEL 5,0 |
| 207 | || ADD .L1 A3,A5,A5 |
| 208 | |
| 209 | L12: SHRU .S1 A5,16,A0 |
| 210 | [!A0] BNOP .S1 L14,5 |
| 211 | |
| 212 | L13: SHRU .S2X A5,16,B4 |
| 213 | EXTU .S1 A5,16,16,A3 |
| 214 | ADD .D1X A3,B4,A5 |
| 215 | SHRU .S1 A5,16,A0 |
| 216 | [A0] BNOP .S1 L13,5 |
| 217 | |
| 218 | L14: NOT .D1 A5,A3 |
| 219 | EXTU .S1 A3,16,16,A3 |
| 220 | |
| 221 | L15: BNOP .S2 B3,3 |
| 222 | MVC .S2 B30,ILC |
| 223 | MV .D1 A3,A4 |
| 224 | ENDPROC(ip_fast_csum) |
| 225 | |
| 226 | ; |
| 227 | ;unsigned short |
| 228 | ;do_csum(unsigned char *buff, unsigned int len) |
| 229 | ;{ |
| 230 | ; int odd, count; |
| 231 | ; unsigned int result = 0; |
| 232 | ; |
| 233 | ; if (len <= 0) |
| 234 | ; goto out; |
| 235 | ; odd = 1 & (unsigned long) buff; |
| 236 | ; if (odd) { |
| 237 | ;#ifdef __LITTLE_ENDIAN |
| 238 | ; result += (*buff << 8); |
| 239 | ;#else |
| 240 | ; result = *buff; |
| 241 | ;#endif |
| 242 | ; len--; |
| 243 | ; buff++; |
| 244 | ; } |
| 245 | ; count = len >> 1; /* nr of 16-bit words.. */ |
| 246 | ; if (count) { |
| 247 | ; if (2 & (unsigned long) buff) { |
| 248 | ; result += *(unsigned short *) buff; |
| 249 | ; count--; |
| 250 | ; len -= 2; |
| 251 | ; buff += 2; |
| 252 | ; } |
| 253 | ; count >>= 1; /* nr of 32-bit words.. */ |
| 254 | ; if (count) { |
| 255 | ; unsigned int carry = 0; |
| 256 | ; do { |
| 257 | ; unsigned int w = *(unsigned int *) buff; |
| 258 | ; count--; |
| 259 | ; buff += 4; |
| 260 | ; result += carry; |
| 261 | ; result += w; |
| 262 | ; carry = (w > result); |
| 263 | ; } while (count); |
| 264 | ; result += carry; |
| 265 | ; result = (result & 0xffff) + (result >> 16); |
| 266 | ; } |
| 267 | ; if (len & 2) { |
| 268 | ; result += *(unsigned short *) buff; |
| 269 | ; buff += 2; |
| 270 | ; } |
| 271 | ; } |
| 272 | ; if (len & 1) |
| 273 | ;#ifdef __LITTLE_ENDIAN |
| 274 | ; result += *buff; |
| 275 | ;#else |
| 276 | ; result += (*buff << 8); |
| 277 | ;#endif |
| 278 | ; result = (result & 0xffff) + (result >> 16); |
| 279 | ; /* add up carry.. */ |
| 280 | ; result = (result & 0xffff) + (result >> 16); |
| 281 | ; if (odd) |
| 282 | ; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); |
| 283 | ;out: |
| 284 | ; return result; |
| 285 | ;} |
| 286 | ; |
| 287 | ; A4: buff |
| 288 | ; B4: len |
| 289 | ; return checksum in A4 |
| 290 | ; |
| 291 | |
| 292 | ENTRY(do_csum) |
| 293 | CMPGT .L2 B4,0,B0 |
| 294 | [!B0] BNOP .S1 L26,3 |
| 295 | EXTU .S1 A4,31,31,A0 |
| 296 | |
| 297 | MV .L1 A0,A3 |
| 298 | || MV .S1X B3,A5 |
| 299 | || MV .L2 B4,B3 |
| 300 | || ZERO .D1 A1 |
| 301 | |
| 302 | #ifdef CONFIG_CPU_BIG_ENDIAN |
| 303 | [A0] SUB .L2 B3,1,B3 |
| 304 | || [A0] LDBU .D1T1 *A4++,A1 |
| 305 | #else |
| 306 | [!A0] BNOP .S1 L21,5 |
| 307 | || [A0] LDBU .D1T1 *A4++,A0 |
| 308 | SUB .L2 B3,1,B3 |
| 309 | || SHL .S1 A0,8,A1 |
| 310 | L21: |
| 311 | #endif |
| 312 | SHR .S2 B3,1,B0 |
| 313 | [!B0] BNOP .S1 L24,3 |
| 314 | MVK .L1 2,A0 |
| 315 | AND .L1 A4,A0,A0 |
| 316 | |
| 317 | [!A0] BNOP .S1 L22,5 |
| 318 | || [A0] LDHU .D1T1 *A4++,A0 |
| 319 | SUB .L2 B0,1,B0 |
| 320 | || SUB .S2 B3,2,B3 |
| 321 | || ADD .L1 A0,A1,A1 |
| 322 | L22: |
| 323 | SHR .S2 B0,1,B0 |
| 324 | || ZERO .L1 A0 |
| 325 | |
| 326 | [!B0] BNOP .S1 L23,5 |
| 327 | || [B0] MVC .S2 B0,ILC |
| 328 | |
| 329 | SPLOOP 3 |
| 330 | SPMASK L1 |
| 331 | || MV .L1 A1,A2 |
| 332 | || LDW .D1T1 *A4++,A1 |
| 333 | |
| 334 | NOP 4 |
| 335 | ADD .L1 A0,A1,A0 |
| 336 | ADD .L1 A2,A0,A2 |
| 337 | |
| 338 | SPKERNEL 1,2 |
| 339 | || CMPGTU .L1 A1,A2,A0 |
| 340 | |
| 341 | ADD .L1 A0,A2,A6 |
| 342 | EXTU .S1 A6,16,16,A7 |
| 343 | SHRU .S2X A6,16,B0 |
| 344 | NOP 1 |
| 345 | ADD .L1X A7,B0,A1 |
| 346 | L23: |
| 347 | MVK .L2 2,B0 |
| 348 | AND .L2 B3,B0,B0 |
| 349 | [B0] LDHU .D1T1 *A4++,A0 |
| 350 | NOP 4 |
| 351 | [B0] ADD .L1 A0,A1,A1 |
| 352 | L24: |
| 353 | EXTU .S2 B3,31,31,B0 |
| 354 | #ifdef CONFIG_CPU_BIG_ENDIAN |
| 355 | [!B0] BNOP .S1 L25,4 |
| 356 | || [B0] LDBU .D1T1 *A4,A0 |
| 357 | SHL .S1 A0,8,A0 |
| 358 | ADD .L1 A0,A1,A1 |
| 359 | L25: |
| 360 | #else |
| 361 | [B0] LDBU .D1T1 *A4,A0 |
| 362 | NOP 4 |
| 363 | [B0] ADD .L1 A0,A1,A1 |
| 364 | #endif |
| 365 | EXTU .S1 A1,16,16,A0 |
| 366 | SHRU .S2X A1,16,B0 |
| 367 | NOP 1 |
| 368 | ADD .L1X A0,B0,A0 |
| 369 | SHRU .S1 A0,16,A1 |
| 370 | ADD .L1 A0,A1,A0 |
| 371 | EXTU .S1 A0,16,16,A1 |
| 372 | EXTU .S1 A1,16,24,A2 |
| 373 | |
| 374 | EXTU .S1 A1,24,16,A0 |
| 375 | || MV .L2X A3,B0 |
| 376 | |
| 377 | [B0] OR .L1 A0,A2,A1 |
| 378 | L26: |
| 379 | NOP 1 |
| 380 | BNOP .S2X A5,4 |
| 381 | MV .L1 A1,A4 |
| 382 | ENDPROC(do_csum) |
| 383 | |
| 384 | ;__wsum csum_partial(const void *buff, int len, __wsum wsum) |
| 385 | ;{ |
| 386 | ; unsigned int sum = (__force unsigned int)wsum; |
| 387 | ; unsigned int result = do_csum(buff, len); |
| 388 | ; |
| 389 | ; /* add in old sum, and carry.. */ |
| 390 | ; result += sum; |
| 391 | ; if (sum > result) |
| 392 | ; result += 1; |
| 393 | ; return (__force __wsum)result; |
| 394 | ;} |
| 395 | ; |
| 396 | ENTRY(csum_partial) |
| 397 | MV .L1X B3,A9 |
| 398 | || CALLP .S2 do_csum,B3 |
| 399 | || MV .S1 A6,A8 |
| 400 | BNOP .S2X A9,2 |
| 401 | ADD .L1 A8,A4,A1 |
| 402 | CMPGTU .L1 A8,A1,A0 |
| 403 | ADD .L1 A1,A0,A4 |
| 404 | ENDPROC(csum_partial) |
| 405 | |
| 406 | ;unsigned short |
| 407 | ;ip_compute_csum(unsigned char *buff, unsigned int len) |
| 408 | ; |
| 409 | ; A4: buff |
| 410 | ; B4: len |
| 411 | ; return checksum in A4 |
| 412 | |
| 413 | ENTRY(ip_compute_csum) |
| 414 | MV .L1X B3,A9 |
| 415 | || CALLP .S2 do_csum,B3 |
| 416 | BNOP .S2X A9,3 |
| 417 | NOT .S1 A4,A4 |
| 418 | CLR .S1 A4,16,31,A4 |
| 419 | ENDPROC(ip_compute_csum) |