blob: b9277361954b795ed539fe013385240cd8c26eca [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001#ifndef _I386_STRING_H_
2#define _I386_STRING_H_
3
4#ifdef __KERNEL__
Linus Torvalds1da177e2005-04-16 15:20:36 -07005/*
6 * On a 486 or Pentium, we are better off not using the
7 * byte string operations. But on a 386 or a PPro the
8 * byte string ops are faster than doing it by hand
9 * (MUCH faster on a Pentium).
10 */
11
12/*
13 * This string-include defines all string functions as inline
14 * functions. Use gcc. It also assumes ds=es=data space, this should be
15 * normal. Most of the string-functions are rather heavily hand-optimized,
16 * see especially strsep,strstr,str[c]spn. They should work, but are not
17 * very easy to understand. Everything is done entirely within the register
18 * set, making the functions fast and clean. String instructions have been
19 * used through-out, making for "slightly" unclear code :-)
20 *
21 * NO Copyright (C) 1991, 1992 Linus Torvalds,
22 * consider these trivial functions to be PD.
23 */
24
25/* AK: in fact I bet it would be better to move this stuff all out of line.
26 */
27
28#define __HAVE_ARCH_STRCPY
29static inline char * strcpy(char * dest,const char *src)
30{
31int d0, d1, d2;
32__asm__ __volatile__(
33 "1:\tlodsb\n\t"
34 "stosb\n\t"
35 "testb %%al,%%al\n\t"
36 "jne 1b"
37 : "=&S" (d0), "=&D" (d1), "=&a" (d2)
38 :"0" (src),"1" (dest) : "memory");
39return dest;
40}
41
42#define __HAVE_ARCH_STRNCPY
43static inline char * strncpy(char * dest,const char *src,size_t count)
44{
45int d0, d1, d2, d3;
46__asm__ __volatile__(
47 "1:\tdecl %2\n\t"
48 "js 2f\n\t"
49 "lodsb\n\t"
50 "stosb\n\t"
51 "testb %%al,%%al\n\t"
52 "jne 1b\n\t"
53 "rep\n\t"
54 "stosb\n"
55 "2:"
56 : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
57 :"0" (src),"1" (dest),"2" (count) : "memory");
58return dest;
59}
60
61#define __HAVE_ARCH_STRCAT
62static inline char * strcat(char * dest,const char * src)
63{
64int d0, d1, d2, d3;
65__asm__ __volatile__(
66 "repne\n\t"
67 "scasb\n\t"
68 "decl %1\n"
69 "1:\tlodsb\n\t"
70 "stosb\n\t"
71 "testb %%al,%%al\n\t"
72 "jne 1b"
73 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
74 : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu):"memory");
75return dest;
76}
77
78#define __HAVE_ARCH_STRNCAT
79static inline char * strncat(char * dest,const char * src,size_t count)
80{
81int d0, d1, d2, d3;
82__asm__ __volatile__(
83 "repne\n\t"
84 "scasb\n\t"
85 "decl %1\n\t"
86 "movl %8,%3\n"
87 "1:\tdecl %3\n\t"
88 "js 2f\n\t"
89 "lodsb\n\t"
90 "stosb\n\t"
91 "testb %%al,%%al\n\t"
92 "jne 1b\n"
93 "2:\txorl %2,%2\n\t"
94 "stosb"
95 : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
96 : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count)
97 : "memory");
98return dest;
99}
100
101#define __HAVE_ARCH_STRCMP
102static inline int strcmp(const char * cs,const char * ct)
103{
104int d0, d1;
105register int __res;
106__asm__ __volatile__(
107 "1:\tlodsb\n\t"
108 "scasb\n\t"
109 "jne 2f\n\t"
110 "testb %%al,%%al\n\t"
111 "jne 1b\n\t"
112 "xorl %%eax,%%eax\n\t"
113 "jmp 3f\n"
114 "2:\tsbbl %%eax,%%eax\n\t"
115 "orb $1,%%al\n"
116 "3:"
117 :"=a" (__res), "=&S" (d0), "=&D" (d1)
Linus Torvalds793ae772005-06-24 10:39:17 -0700118 :"1" (cs),"2" (ct)
119 :"memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120return __res;
121}
122
123#define __HAVE_ARCH_STRNCMP
124static inline int strncmp(const char * cs,const char * ct,size_t count)
125{
126register int __res;
127int d0, d1, d2;
128__asm__ __volatile__(
129 "1:\tdecl %3\n\t"
130 "js 2f\n\t"
131 "lodsb\n\t"
132 "scasb\n\t"
133 "jne 3f\n\t"
134 "testb %%al,%%al\n\t"
135 "jne 1b\n"
136 "2:\txorl %%eax,%%eax\n\t"
137 "jmp 4f\n"
138 "3:\tsbbl %%eax,%%eax\n\t"
139 "orb $1,%%al\n"
140 "4:"
Linus Torvalds793ae772005-06-24 10:39:17 -0700141 :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
142 :"1" (cs),"2" (ct),"3" (count)
143 :"memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144return __res;
145}
146
147#define __HAVE_ARCH_STRCHR
148static inline char * strchr(const char * s, int c)
149{
150int d0;
151register char * __res;
152__asm__ __volatile__(
153 "movb %%al,%%ah\n"
154 "1:\tlodsb\n\t"
155 "cmpb %%ah,%%al\n\t"
156 "je 2f\n\t"
157 "testb %%al,%%al\n\t"
158 "jne 1b\n\t"
159 "movl $1,%1\n"
160 "2:\tmovl %1,%0\n\t"
161 "decl %0"
Linus Torvalds793ae772005-06-24 10:39:17 -0700162 :"=a" (__res), "=&S" (d0)
163 :"1" (s),"0" (c)
164 :"memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165return __res;
166}
167
168#define __HAVE_ARCH_STRRCHR
169static inline char * strrchr(const char * s, int c)
170{
171int d0, d1;
172register char * __res;
173__asm__ __volatile__(
174 "movb %%al,%%ah\n"
175 "1:\tlodsb\n\t"
176 "cmpb %%ah,%%al\n\t"
177 "jne 2f\n\t"
178 "leal -1(%%esi),%0\n"
179 "2:\ttestb %%al,%%al\n\t"
180 "jne 1b"
Linus Torvalds793ae772005-06-24 10:39:17 -0700181 :"=g" (__res), "=&S" (d0), "=&a" (d1)
182 :"0" (0),"1" (s),"2" (c)
183 :"memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184return __res;
185}
186
187#define __HAVE_ARCH_STRLEN
188static inline size_t strlen(const char * s)
189{
190int d0;
191register int __res;
192__asm__ __volatile__(
193 "repne\n\t"
194 "scasb\n\t"
195 "notl %0\n\t"
196 "decl %0"
Linus Torvalds793ae772005-06-24 10:39:17 -0700197 :"=c" (__res), "=&D" (d0)
198 :"1" (s),"a" (0), "0" (0xffffffffu)
199 :"memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200return __res;
201}
202
Ingo Molnar652050a2006-01-14 13:21:30 -0800203static __always_inline void * __memcpy(void * to, const void * from, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204{
205int d0, d1, d2;
206__asm__ __volatile__(
207 "rep ; movsl\n\t"
Denis Vlasenkod5b63d72005-05-01 08:58:48 -0700208 "movl %4,%%ecx\n\t"
209 "andl $3,%%ecx\n\t"
210#if 1 /* want to pay 2 byte penalty for a chance to skip microcoded rep? */
211 "jz 1f\n\t"
212#endif
213 "rep ; movsb\n\t"
214 "1:"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 : "=&c" (d0), "=&D" (d1), "=&S" (d2)
Denis Vlasenkod5b63d72005-05-01 08:58:48 -0700216 : "0" (n/4), "g" (n), "1" ((long) to), "2" ((long) from)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 : "memory");
218return (to);
219}
220
221/*
Denis Vlasenkod5b63d72005-05-01 08:58:48 -0700222 * This looks ugly, but the compiler can optimize it totally,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 * as the count is constant.
224 */
Ingo Molnar652050a2006-01-14 13:21:30 -0800225static __always_inline void * __constant_memcpy(void * to, const void * from, size_t n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226{
Denis Vlasenkod5b63d72005-05-01 08:58:48 -0700227 long esi, edi;
228 if (!n) return to;
229#if 1 /* want to do small copies with non-string ops? */
230 switch (n) {
231 case 1: *(char*)to = *(char*)from; return to;
232 case 2: *(short*)to = *(short*)from; return to;
233 case 4: *(int*)to = *(int*)from; return to;
234#if 1 /* including those doable with two moves? */
235 case 3: *(short*)to = *(short*)from;
236 *((char*)to+2) = *((char*)from+2); return to;
237 case 5: *(int*)to = *(int*)from;
238 *((char*)to+4) = *((char*)from+4); return to;
239 case 6: *(int*)to = *(int*)from;
240 *((short*)to+2) = *((short*)from+2); return to;
241 case 8: *(int*)to = *(int*)from;
242 *((int*)to+1) = *((int*)from+1); return to;
243#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 }
Denis Vlasenkod5b63d72005-05-01 08:58:48 -0700245#endif
246 esi = (long) from;
247 edi = (long) to;
248 if (n >= 5*4) {
249 /* large block: use rep prefix */
250 int ecx;
251 __asm__ __volatile__(
252 "rep ; movsl"
253 : "=&c" (ecx), "=&D" (edi), "=&S" (esi)
254 : "0" (n/4), "1" (edi),"2" (esi)
255 : "memory"
256 );
257 } else {
258 /* small block: don't clobber ecx + smaller code */
259 if (n >= 4*4) __asm__ __volatile__("movsl"
260 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
261 if (n >= 3*4) __asm__ __volatile__("movsl"
262 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
263 if (n >= 2*4) __asm__ __volatile__("movsl"
264 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
265 if (n >= 1*4) __asm__ __volatile__("movsl"
266 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
267 }
268 switch (n % 4) {
269 /* tail */
270 case 0: return to;
271 case 1: __asm__ __volatile__("movsb"
272 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
273 return to;
274 case 2: __asm__ __volatile__("movsw"
275 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
276 return to;
277 default: __asm__ __volatile__("movsw\n\tmovsb"
278 :"=&D"(edi),"=&S"(esi):"0"(edi),"1"(esi):"memory");
279 return to;
280 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281}
282
283#define __HAVE_ARCH_MEMCPY
284
285#ifdef CONFIG_X86_USE_3DNOW
286
287#include <asm/mmx.h>
288
289/*
290 * This CPU favours 3DNow strongly (eg AMD Athlon)
291 */
292
293static inline void * __constant_memcpy3d(void * to, const void * from, size_t len)
294{
295 if (len < 512)
296 return __constant_memcpy(to, from, len);
297 return _mmx_memcpy(to, from, len);
298}
299
300static __inline__ void *__memcpy3d(void *to, const void *from, size_t len)
301{
302 if (len < 512)
303 return __memcpy(to, from, len);
304 return _mmx_memcpy(to, from, len);
305}
306
307#define memcpy(t, f, n) \
308(__builtin_constant_p(n) ? \
309 __constant_memcpy3d((t),(f),(n)) : \
310 __memcpy3d((t),(f),(n)))
311
312#else
313
314/*
315 * No 3D Now!
316 */
317
318#define memcpy(t, f, n) \
319(__builtin_constant_p(n) ? \
320 __constant_memcpy((t),(f),(n)) : \
321 __memcpy((t),(f),(n)))
322
323#endif
324
325#define __HAVE_ARCH_MEMMOVE
326void *memmove(void * dest,const void * src, size_t n);
327
328#define memcmp __builtin_memcmp
329
330#define __HAVE_ARCH_MEMCHR
331static inline void * memchr(const void * cs,int c,size_t count)
332{
333int d0;
334register void * __res;
335if (!count)
336 return NULL;
337__asm__ __volatile__(
338 "repne\n\t"
339 "scasb\n\t"
340 "je 1f\n\t"
341 "movl $1,%0\n"
342 "1:\tdecl %0"
Linus Torvalds793ae772005-06-24 10:39:17 -0700343 :"=D" (__res), "=&c" (d0)
344 :"a" (c),"0" (cs),"1" (count)
345 :"memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346return __res;
347}
348
349static inline void * __memset_generic(void * s, char c,size_t count)
350{
351int d0, d1;
352__asm__ __volatile__(
353 "rep\n\t"
354 "stosb"
355 : "=&c" (d0), "=&D" (d1)
356 :"a" (c),"1" (s),"0" (count)
357 :"memory");
358return s;
359}
360
361/* we might want to write optimized versions of these later */
362#define __constant_count_memset(s,c,count) __memset_generic((s),(c),(count))
363
364/*
365 * memset(x,0,y) is a reasonably common thing to do, so we want to fill
366 * things 32 bits at a time even when we don't know the size of the
367 * area at compile-time..
368 */
Ingo Molnar652050a2006-01-14 13:21:30 -0800369static __always_inline void * __constant_c_memset(void * s, unsigned long c, size_t count)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370{
371int d0, d1;
372__asm__ __volatile__(
373 "rep ; stosl\n\t"
374 "testb $2,%b3\n\t"
375 "je 1f\n\t"
376 "stosw\n"
377 "1:\ttestb $1,%b3\n\t"
378 "je 2f\n\t"
379 "stosb\n"
380 "2:"
Linus Torvalds793ae772005-06-24 10:39:17 -0700381 :"=&c" (d0), "=&D" (d1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 :"a" (c), "q" (count), "0" (count/4), "1" ((long) s)
383 :"memory");
384return (s);
385}
386
387/* Added by Gertjan van Wingerde to make minix and sysv module work */
388#define __HAVE_ARCH_STRNLEN
389static inline size_t strnlen(const char * s, size_t count)
390{
391int d0;
392register int __res;
393__asm__ __volatile__(
394 "movl %2,%0\n\t"
395 "jmp 2f\n"
396 "1:\tcmpb $0,(%0)\n\t"
397 "je 3f\n\t"
398 "incl %0\n"
399 "2:\tdecl %1\n\t"
400 "cmpl $-1,%1\n\t"
401 "jne 1b\n"
402 "3:\tsubl %2,%0"
403 :"=a" (__res), "=&d" (d0)
Linus Torvalds793ae772005-06-24 10:39:17 -0700404 :"c" (s),"1" (count)
405 :"memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406return __res;
407}
408/* end of additional stuff */
409
410#define __HAVE_ARCH_STRSTR
411
412extern char *strstr(const char *cs, const char *ct);
413
414/*
415 * This looks horribly ugly, but the compiler can optimize it totally,
416 * as we by now know that both pattern and count is constant..
417 */
Ingo Molnar652050a2006-01-14 13:21:30 -0800418static __always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419{
420 switch (count) {
421 case 0:
422 return s;
423 case 1:
424 *(unsigned char *)s = pattern;
425 return s;
426 case 2:
427 *(unsigned short *)s = pattern;
428 return s;
429 case 3:
430 *(unsigned short *)s = pattern;
431 *(2+(unsigned char *)s) = pattern;
432 return s;
433 case 4:
434 *(unsigned long *)s = pattern;
435 return s;
436 }
437#define COMMON(x) \
438__asm__ __volatile__( \
439 "rep ; stosl" \
440 x \
441 : "=&c" (d0), "=&D" (d1) \
442 : "a" (pattern),"0" (count/4),"1" ((long) s) \
443 : "memory")
444{
445 int d0, d1;
446 switch (count % 4) {
447 case 0: COMMON(""); return s;
448 case 1: COMMON("\n\tstosb"); return s;
449 case 2: COMMON("\n\tstosw"); return s;
450 default: COMMON("\n\tstosw\n\tstosb"); return s;
451 }
452}
453
454#undef COMMON
455}
456
457#define __constant_c_x_memset(s, c, count) \
458(__builtin_constant_p(count) ? \
459 __constant_c_and_count_memset((s),(c),(count)) : \
460 __constant_c_memset((s),(c),(count)))
461
462#define __memset(s, c, count) \
463(__builtin_constant_p(count) ? \
464 __constant_count_memset((s),(c),(count)) : \
465 __memset_generic((s),(c),(count)))
466
467#define __HAVE_ARCH_MEMSET
468#define memset(s, c, count) \
469(__builtin_constant_p(c) ? \
470 __constant_c_x_memset((s),(0x01010101UL*(unsigned char)(c)),(count)) : \
471 __memset((s),(c),(count)))
472
473/*
474 * find the first occurrence of byte 'c', or 1 past the area if none
475 */
476#define __HAVE_ARCH_MEMSCAN
477static inline void * memscan(void * addr, int c, size_t size)
478{
479 if (!size)
480 return addr;
481 __asm__("repnz; scasb\n\t"
482 "jnz 1f\n\t"
483 "dec %%edi\n"
484 "1:"
485 : "=D" (addr), "=c" (size)
Linus Torvalds793ae772005-06-24 10:39:17 -0700486 : "0" (addr), "1" (size), "a" (c)
487 : "memory");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 return addr;
489}
490
491#endif /* __KERNEL__ */
492
493#endif