Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #ifndef _M68K_STRING_H_ |
| 2 | #define _M68K_STRING_H_ |
| 3 | |
| 4 | #include <asm/setup.h> |
| 5 | #include <asm/page.h> |
| 6 | |
| 7 | #define __HAVE_ARCH_STRCPY |
| 8 | static inline char * strcpy(char * dest,const char *src) |
| 9 | { |
| 10 | char *xdest = dest; |
| 11 | |
| 12 | __asm__ __volatile__ |
| 13 | ("1:\tmoveb %1@+,%0@+\n\t" |
| 14 | "jne 1b" |
| 15 | : "=a" (dest), "=a" (src) |
| 16 | : "0" (dest), "1" (src) : "memory"); |
| 17 | return xdest; |
| 18 | } |
| 19 | |
| 20 | #define __HAVE_ARCH_STRNCPY |
| 21 | static inline char * strncpy(char *dest, const char *src, size_t n) |
| 22 | { |
| 23 | char *xdest = dest; |
| 24 | |
| 25 | if (n == 0) |
| 26 | return xdest; |
| 27 | |
| 28 | __asm__ __volatile__ |
| 29 | ("1:\tmoveb %1@+,%0@+\n\t" |
| 30 | "jeq 2f\n\t" |
| 31 | "subql #1,%2\n\t" |
| 32 | "jne 1b\n\t" |
| 33 | "2:" |
| 34 | : "=a" (dest), "=a" (src), "=d" (n) |
| 35 | : "0" (dest), "1" (src), "2" (n) |
| 36 | : "memory"); |
| 37 | return xdest; |
| 38 | } |
| 39 | |
| 40 | #define __HAVE_ARCH_STRCAT |
| 41 | static inline char * strcat(char * dest, const char * src) |
| 42 | { |
| 43 | char *tmp = dest; |
| 44 | |
| 45 | while (*dest) |
| 46 | dest++; |
| 47 | while ((*dest++ = *src++)) |
| 48 | ; |
| 49 | |
| 50 | return tmp; |
| 51 | } |
| 52 | |
| 53 | #define __HAVE_ARCH_STRNCAT |
| 54 | static inline char * strncat(char *dest, const char *src, size_t count) |
| 55 | { |
| 56 | char *tmp = dest; |
| 57 | |
| 58 | if (count) { |
| 59 | while (*dest) |
| 60 | dest++; |
| 61 | while ((*dest++ = *src++)) { |
| 62 | if (--count == 0) { |
| 63 | *dest++='\0'; |
| 64 | break; |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | return tmp; |
| 70 | } |
| 71 | |
| 72 | #define __HAVE_ARCH_STRCHR |
| 73 | static inline char * strchr(const char * s, int c) |
| 74 | { |
| 75 | const char ch = c; |
| 76 | |
| 77 | for(; *s != ch; ++s) |
| 78 | if (*s == '\0') |
| 79 | return( NULL ); |
| 80 | return( (char *) s); |
| 81 | } |
| 82 | |
| 83 | #if 0 |
| 84 | #define __HAVE_ARCH_STRPBRK |
| 85 | static inline char *strpbrk(const char *cs,const char *ct) |
| 86 | { |
| 87 | const char *sc1,*sc2; |
| 88 | |
| 89 | for( sc1 = cs; *sc1 != '\0'; ++sc1) |
| 90 | for( sc2 = ct; *sc2 != '\0'; ++sc2) |
| 91 | if (*sc1 == *sc2) |
| 92 | return((char *) sc1); |
| 93 | return( NULL ); |
| 94 | } |
| 95 | #endif |
| 96 | |
| 97 | #if 0 |
| 98 | #define __HAVE_ARCH_STRSPN |
| 99 | static inline size_t strspn(const char *s, const char *accept) |
| 100 | { |
| 101 | const char *p; |
| 102 | const char *a; |
| 103 | size_t count = 0; |
| 104 | |
| 105 | for (p = s; *p != '\0'; ++p) |
| 106 | { |
| 107 | for (a = accept; *a != '\0'; ++a) |
| 108 | if (*p == *a) |
| 109 | break; |
| 110 | if (*a == '\0') |
| 111 | return count; |
| 112 | else |
| 113 | ++count; |
| 114 | } |
| 115 | |
| 116 | return count; |
| 117 | } |
| 118 | #endif |
| 119 | |
| 120 | /* strstr !! */ |
| 121 | |
| 122 | #define __HAVE_ARCH_STRLEN |
| 123 | static inline size_t strlen(const char * s) |
| 124 | { |
| 125 | const char *sc; |
| 126 | for (sc = s; *sc != '\0'; ++sc) ; |
| 127 | return(sc - s); |
| 128 | } |
| 129 | |
| 130 | /* strnlen !! */ |
| 131 | |
| 132 | #define __HAVE_ARCH_STRCMP |
| 133 | static inline int strcmp(const char * cs,const char * ct) |
| 134 | { |
| 135 | char __res; |
| 136 | |
| 137 | __asm__ |
| 138 | ("1:\tmoveb %0@+,%2\n\t" /* get *cs */ |
| 139 | "cmpb %1@+,%2\n\t" /* compare a byte */ |
| 140 | "jne 2f\n\t" /* not equal, break out */ |
| 141 | "tstb %2\n\t" /* at end of cs? */ |
| 142 | "jne 1b\n\t" /* no, keep going */ |
| 143 | "jra 3f\n\t" /* strings are equal */ |
| 144 | "2:\tsubb %1@-,%2\n\t" /* *cs - *ct */ |
| 145 | "3:" |
| 146 | : "=a" (cs), "=a" (ct), "=d" (__res) |
| 147 | : "0" (cs), "1" (ct)); |
| 148 | return __res; |
| 149 | } |
| 150 | |
| 151 | #define __HAVE_ARCH_STRNCMP |
| 152 | static inline int strncmp(const char * cs,const char * ct,size_t count) |
| 153 | { |
| 154 | char __res; |
| 155 | |
| 156 | if (!count) |
| 157 | return 0; |
| 158 | __asm__ |
| 159 | ("1:\tmovb %0@+,%3\n\t" /* get *cs */ |
| 160 | "cmpb %1@+,%3\n\t" /* compare a byte */ |
| 161 | "jne 3f\n\t" /* not equal, break out */ |
| 162 | "tstb %3\n\t" /* at end of cs? */ |
| 163 | "jeq 4f\n\t" /* yes, all done */ |
| 164 | "subql #1,%2\n\t" /* no, adjust count */ |
| 165 | "jne 1b\n\t" /* more to do, keep going */ |
| 166 | "2:\tmoveq #0,%3\n\t" /* strings are equal */ |
| 167 | "jra 4f\n\t" |
| 168 | "3:\tsubb %1@-,%3\n\t" /* *cs - *ct */ |
| 169 | "4:" |
| 170 | : "=a" (cs), "=a" (ct), "=d" (count), "=d" (__res) |
| 171 | : "0" (cs), "1" (ct), "2" (count)); |
| 172 | return __res; |
| 173 | } |
| 174 | |
| 175 | #define __HAVE_ARCH_MEMSET |
| 176 | /* |
| 177 | * This is really ugly, but its highly optimizatiable by the |
| 178 | * compiler and is meant as compensation for gcc's missing |
| 179 | * __builtin_memset(). For the 680[23]0 it might be worth considering |
| 180 | * the optimal number of misaligned writes compared to the number of |
| 181 | * tests'n'branches needed to align the destination address. The |
| 182 | * 680[46]0 doesn't really care due to their copy-back caches. |
| 183 | * 10/09/96 - Jes Sorensen |
| 184 | */ |
| 185 | static inline void * __memset_g(void * s, int c, size_t count) |
| 186 | { |
| 187 | void *xs = s; |
| 188 | size_t temp; |
| 189 | |
| 190 | if (!count) |
| 191 | return xs; |
| 192 | |
| 193 | c &= 0xff; |
| 194 | c |= c << 8; |
| 195 | c |= c << 16; |
| 196 | |
| 197 | if (count < 36){ |
| 198 | long *ls = s; |
| 199 | |
| 200 | switch(count){ |
| 201 | case 32: case 33: case 34: case 35: |
| 202 | *ls++ = c; |
| 203 | case 28: case 29: case 30: case 31: |
| 204 | *ls++ = c; |
| 205 | case 24: case 25: case 26: case 27: |
| 206 | *ls++ = c; |
| 207 | case 20: case 21: case 22: case 23: |
| 208 | *ls++ = c; |
| 209 | case 16: case 17: case 18: case 19: |
| 210 | *ls++ = c; |
| 211 | case 12: case 13: case 14: case 15: |
| 212 | *ls++ = c; |
| 213 | case 8: case 9: case 10: case 11: |
| 214 | *ls++ = c; |
| 215 | case 4: case 5: case 6: case 7: |
| 216 | *ls++ = c; |
| 217 | break; |
| 218 | default: |
| 219 | break; |
| 220 | } |
| 221 | s = ls; |
| 222 | if (count & 0x02){ |
| 223 | short *ss = s; |
| 224 | *ss++ = c; |
| 225 | s = ss; |
| 226 | } |
| 227 | if (count & 0x01){ |
| 228 | char *cs = s; |
| 229 | *cs++ = c; |
| 230 | s = cs; |
| 231 | } |
| 232 | return xs; |
| 233 | } |
| 234 | |
| 235 | if ((long) s & 1) |
| 236 | { |
| 237 | char *cs = s; |
| 238 | *cs++ = c; |
| 239 | s = cs; |
| 240 | count--; |
| 241 | } |
| 242 | if (count > 2 && (long) s & 2) |
| 243 | { |
| 244 | short *ss = s; |
| 245 | *ss++ = c; |
| 246 | s = ss; |
| 247 | count -= 2; |
| 248 | } |
| 249 | temp = count >> 2; |
| 250 | if (temp) |
| 251 | { |
| 252 | long *ls = s; |
| 253 | temp--; |
| 254 | do |
| 255 | *ls++ = c; |
| 256 | while (temp--); |
| 257 | s = ls; |
| 258 | } |
| 259 | if (count & 2) |
| 260 | { |
| 261 | short *ss = s; |
| 262 | *ss++ = c; |
| 263 | s = ss; |
| 264 | } |
| 265 | if (count & 1) |
| 266 | { |
| 267 | char *cs = s; |
| 268 | *cs = c; |
| 269 | } |
| 270 | return xs; |
| 271 | } |
| 272 | |
| 273 | /* |
| 274 | * __memset_page assumes that data is longword aligned. Most, if not |
| 275 | * all, of these page sized memsets are performed on page aligned |
| 276 | * areas, thus we do not need to check if the destination is longword |
| 277 | * aligned. Of course we suffer a serious performance loss if this is |
| 278 | * not the case but I think the risk of this ever happening is |
| 279 | * extremely small. We spend a lot of time clearing pages in |
| 280 | * get_empty_page() so I think it is worth it anyway. Besides, the |
| 281 | * 680[46]0 do not really care about misaligned writes due to their |
| 282 | * copy-back cache. |
| 283 | * |
| 284 | * The optimized case for the 680[46]0 is implemented using the move16 |
| 285 | * instruction. My tests showed that this implementation is 35-45% |
| 286 | * faster than the original implementation using movel, the only |
| 287 | * caveat is that the destination address must be 16-byte aligned. |
| 288 | * 01/09/96 - Jes Sorensen |
| 289 | */ |
| 290 | static inline void * __memset_page(void * s,int c,size_t count) |
| 291 | { |
| 292 | unsigned long data, tmp; |
| 293 | void *xs = s; |
| 294 | |
| 295 | c = c & 255; |
| 296 | data = c | (c << 8); |
| 297 | data |= data << 16; |
| 298 | |
| 299 | #ifdef CPU_M68040_OR_M68060_ONLY |
| 300 | |
| 301 | if (((unsigned long) s) & 0x0f) |
| 302 | __memset_g(s, c, count); |
| 303 | else{ |
| 304 | unsigned long *sp = s; |
| 305 | *sp++ = data; |
| 306 | *sp++ = data; |
| 307 | *sp++ = data; |
| 308 | *sp++ = data; |
| 309 | |
| 310 | __asm__ __volatile__("1:\t" |
| 311 | ".chip 68040\n\t" |
| 312 | "move16 %2@+,%0@+\n\t" |
| 313 | ".chip 68k\n\t" |
| 314 | "subqw #8,%2\n\t" |
| 315 | "subqw #8,%2\n\t" |
| 316 | "dbra %1,1b\n\t" |
| 317 | : "=a" (sp), "=d" (tmp) |
| 318 | : "a" (s), "0" (sp), "1" ((count - 16) / 16 - 1) |
| 319 | ); |
| 320 | } |
| 321 | |
| 322 | #else |
| 323 | __asm__ __volatile__("1:\t" |
| 324 | "movel %2,%0@+\n\t" |
| 325 | "movel %2,%0@+\n\t" |
| 326 | "movel %2,%0@+\n\t" |
| 327 | "movel %2,%0@+\n\t" |
| 328 | "movel %2,%0@+\n\t" |
| 329 | "movel %2,%0@+\n\t" |
| 330 | "movel %2,%0@+\n\t" |
| 331 | "movel %2,%0@+\n\t" |
| 332 | "dbra %1,1b\n\t" |
| 333 | : "=a" (s), "=d" (tmp) |
| 334 | : "d" (data), "0" (s), "1" (count / 32 - 1) |
| 335 | ); |
| 336 | #endif |
| 337 | |
| 338 | return xs; |
| 339 | } |
| 340 | |
| 341 | extern void *memset(void *,int,__kernel_size_t); |
| 342 | |
| 343 | #define __memset_const(s,c,count) \ |
| 344 | ((count==PAGE_SIZE) ? \ |
| 345 | __memset_page((s),(c),(count)) : \ |
| 346 | __memset_g((s),(c),(count))) |
| 347 | |
| 348 | #define memset(s, c, count) \ |
| 349 | (__builtin_constant_p(count) ? \ |
| 350 | __memset_const((s),(c),(count)) : \ |
| 351 | __memset_g((s),(c),(count))) |
| 352 | |
| 353 | #define __HAVE_ARCH_MEMCPY |
| 354 | extern void * memcpy(void *, const void *, size_t ); |
| 355 | /* |
| 356 | * __builtin_memcpy() does not handle page-sized memcpys very well, |
| 357 | * thus following the same assumptions as for page-sized memsets, this |
| 358 | * function copies page-sized areas using an unrolled loop, without |
| 359 | * considering alignment. |
| 360 | * |
| 361 | * For the 680[46]0 only kernels we use the move16 instruction instead |
| 362 | * as it writes through the data-cache, invalidating the cache-lines |
| 363 | * touched. In this way we do not use up the entire data-cache (well, |
| 364 | * half of it on the 68060) by copying a page. An unrolled loop of two |
| 365 | * move16 instructions seem to the fastest. The only caveat is that |
| 366 | * both source and destination must be 16-byte aligned, if not we fall |
| 367 | * back to the generic memcpy function. - Jes |
| 368 | */ |
| 369 | static inline void * __memcpy_page(void * to, const void * from, size_t count) |
| 370 | { |
| 371 | unsigned long tmp; |
| 372 | void *xto = to; |
| 373 | |
| 374 | #ifdef CPU_M68040_OR_M68060_ONLY |
| 375 | |
| 376 | if (((unsigned long) to | (unsigned long) from) & 0x0f) |
| 377 | return memcpy(to, from, count); |
| 378 | |
| 379 | __asm__ __volatile__("1:\t" |
| 380 | ".chip 68040\n\t" |
| 381 | "move16 %1@+,%0@+\n\t" |
| 382 | "move16 %1@+,%0@+\n\t" |
| 383 | ".chip 68k\n\t" |
| 384 | "dbra %2,1b\n\t" |
| 385 | : "=a" (to), "=a" (from), "=d" (tmp) |
| 386 | : "0" (to), "1" (from) , "2" (count / 32 - 1) |
| 387 | ); |
| 388 | #else |
| 389 | __asm__ __volatile__("1:\t" |
| 390 | "movel %1@+,%0@+\n\t" |
| 391 | "movel %1@+,%0@+\n\t" |
| 392 | "movel %1@+,%0@+\n\t" |
| 393 | "movel %1@+,%0@+\n\t" |
| 394 | "movel %1@+,%0@+\n\t" |
| 395 | "movel %1@+,%0@+\n\t" |
| 396 | "movel %1@+,%0@+\n\t" |
| 397 | "movel %1@+,%0@+\n\t" |
| 398 | "dbra %2,1b\n\t" |
| 399 | : "=a" (to), "=a" (from), "=d" (tmp) |
| 400 | : "0" (to), "1" (from) , "2" (count / 32 - 1) |
| 401 | ); |
| 402 | #endif |
| 403 | return xto; |
| 404 | } |
| 405 | |
| 406 | #define __memcpy_const(to, from, n) \ |
| 407 | ((n==PAGE_SIZE) ? \ |
| 408 | __memcpy_page((to),(from),(n)) : \ |
| 409 | __builtin_memcpy((to),(from),(n))) |
| 410 | |
| 411 | #define memcpy(to, from, n) \ |
| 412 | (__builtin_constant_p(n) ? \ |
| 413 | __memcpy_const((to),(from),(n)) : \ |
| 414 | memcpy((to),(from),(n))) |
| 415 | |
| 416 | #define __HAVE_ARCH_MEMMOVE |
| 417 | static inline void * memmove(void * dest,const void * src, size_t n) |
| 418 | { |
| 419 | void *xdest = dest; |
| 420 | size_t temp; |
| 421 | |
| 422 | if (!n) |
| 423 | return xdest; |
| 424 | |
| 425 | if (dest < src) |
| 426 | { |
| 427 | if ((long) dest & 1) |
| 428 | { |
| 429 | char *cdest = dest; |
| 430 | const char *csrc = src; |
| 431 | *cdest++ = *csrc++; |
| 432 | dest = cdest; |
| 433 | src = csrc; |
| 434 | n--; |
| 435 | } |
| 436 | if (n > 2 && (long) dest & 2) |
| 437 | { |
| 438 | short *sdest = dest; |
| 439 | const short *ssrc = src; |
| 440 | *sdest++ = *ssrc++; |
| 441 | dest = sdest; |
| 442 | src = ssrc; |
| 443 | n -= 2; |
| 444 | } |
| 445 | temp = n >> 2; |
| 446 | if (temp) |
| 447 | { |
| 448 | long *ldest = dest; |
| 449 | const long *lsrc = src; |
| 450 | temp--; |
| 451 | do |
| 452 | *ldest++ = *lsrc++; |
| 453 | while (temp--); |
| 454 | dest = ldest; |
| 455 | src = lsrc; |
| 456 | } |
| 457 | if (n & 2) |
| 458 | { |
| 459 | short *sdest = dest; |
| 460 | const short *ssrc = src; |
| 461 | *sdest++ = *ssrc++; |
| 462 | dest = sdest; |
| 463 | src = ssrc; |
| 464 | } |
| 465 | if (n & 1) |
| 466 | { |
| 467 | char *cdest = dest; |
| 468 | const char *csrc = src; |
| 469 | *cdest = *csrc; |
| 470 | } |
| 471 | } |
| 472 | else |
| 473 | { |
| 474 | dest = (char *) dest + n; |
| 475 | src = (const char *) src + n; |
| 476 | if ((long) dest & 1) |
| 477 | { |
| 478 | char *cdest = dest; |
| 479 | const char *csrc = src; |
| 480 | *--cdest = *--csrc; |
| 481 | dest = cdest; |
| 482 | src = csrc; |
| 483 | n--; |
| 484 | } |
| 485 | if (n > 2 && (long) dest & 2) |
| 486 | { |
| 487 | short *sdest = dest; |
| 488 | const short *ssrc = src; |
| 489 | *--sdest = *--ssrc; |
| 490 | dest = sdest; |
| 491 | src = ssrc; |
| 492 | n -= 2; |
| 493 | } |
| 494 | temp = n >> 2; |
| 495 | if (temp) |
| 496 | { |
| 497 | long *ldest = dest; |
| 498 | const long *lsrc = src; |
| 499 | temp--; |
| 500 | do |
| 501 | *--ldest = *--lsrc; |
| 502 | while (temp--); |
| 503 | dest = ldest; |
| 504 | src = lsrc; |
| 505 | } |
| 506 | if (n & 2) |
| 507 | { |
| 508 | short *sdest = dest; |
| 509 | const short *ssrc = src; |
| 510 | *--sdest = *--ssrc; |
| 511 | dest = sdest; |
| 512 | src = ssrc; |
| 513 | } |
| 514 | if (n & 1) |
| 515 | { |
| 516 | char *cdest = dest; |
| 517 | const char *csrc = src; |
| 518 | *--cdest = *--csrc; |
| 519 | } |
| 520 | } |
| 521 | return xdest; |
| 522 | } |
| 523 | |
| 524 | #define __HAVE_ARCH_MEMCMP |
| 525 | extern int memcmp(const void * ,const void * ,size_t ); |
| 526 | #define memcmp(cs, ct, n) \ |
| 527 | (__builtin_constant_p(n) ? \ |
| 528 | __builtin_memcmp((cs),(ct),(n)) : \ |
| 529 | memcmp((cs),(ct),(n))) |
| 530 | |
| 531 | #define __HAVE_ARCH_MEMCHR |
| 532 | static inline void *memchr(const void *cs, int c, size_t count) |
| 533 | { |
| 534 | /* Someone else can optimize this, I don't care - tonym@mac.linux-m68k.org */ |
| 535 | unsigned char *ret = (unsigned char *)cs; |
| 536 | for(;count>0;count--,ret++) |
| 537 | if(*ret == c) return ret; |
| 538 | |
| 539 | return NULL; |
| 540 | } |
| 541 | |
| 542 | #endif /* _M68K_STRING_H_ */ |