blob: 577505b692ae0643b046223187d071de9106f618 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
2 *
3 * Copyright(C) 1995 Linus Torvalds
4 * Copyright(C) 1996 David S. Miller
5 * Copyright(C) 1996 Eddie C. Dost
6 * Copyright(C) 1996,1998 Jakub Jelinek
7 *
8 * derived from:
9 * e-mail between David and Eddie.
10 *
11 * Returns 0 if successful, otherwise count of bytes not copied yet
12 */
13
14#include <asm/ptrace.h>
15#include <asm/asmmacro.h>
16#include <asm/page.h>
17
18/* Work around cpp -rob */
19#define ALLOC #alloc
20#define EXECINSTR #execinstr
21#define EX(x,y,a,b) \
2298: x,y; \
23 .section .fixup,ALLOC,EXECINSTR; \
24 .align 4; \
2599: ba fixupretl; \
26 a, b, %g3; \
27 .section __ex_table,ALLOC; \
28 .align 4; \
29 .word 98b, 99b; \
30 .text; \
31 .align 4
32
33#define EX2(x,y,c,d,e,a,b) \
3498: x,y; \
35 .section .fixup,ALLOC,EXECINSTR; \
36 .align 4; \
3799: c, d, e; \
38 ba fixupretl; \
39 a, b, %g3; \
40 .section __ex_table,ALLOC; \
41 .align 4; \
42 .word 98b, 99b; \
43 .text; \
44 .align 4
45
46#define EXO2(x,y) \
4798: x, y; \
48 .section __ex_table,ALLOC; \
49 .align 4; \
50 .word 98b, 97f; \
51 .text; \
52 .align 4
53
54#define EXT(start,end,handler) \
55 .section __ex_table,ALLOC; \
56 .align 4; \
57 .word start, 0, end, handler; \
58 .text; \
59 .align 4
60
61/* Please do not change following macros unless you change logic used
62 * in .fixup at the end of this file as well
63 */
64
65/* Both these macros have to start with exactly the same insn */
66#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
67 ldd [%src + (offset) + 0x00], %t0; \
68 ldd [%src + (offset) + 0x08], %t2; \
69 ldd [%src + (offset) + 0x10], %t4; \
70 ldd [%src + (offset) + 0x18], %t6; \
71 st %t0, [%dst + (offset) + 0x00]; \
72 st %t1, [%dst + (offset) + 0x04]; \
73 st %t2, [%dst + (offset) + 0x08]; \
74 st %t3, [%dst + (offset) + 0x0c]; \
75 st %t4, [%dst + (offset) + 0x10]; \
76 st %t5, [%dst + (offset) + 0x14]; \
77 st %t6, [%dst + (offset) + 0x18]; \
78 st %t7, [%dst + (offset) + 0x1c];
79
80#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
81 ldd [%src + (offset) + 0x00], %t0; \
82 ldd [%src + (offset) + 0x08], %t2; \
83 ldd [%src + (offset) + 0x10], %t4; \
84 ldd [%src + (offset) + 0x18], %t6; \
85 std %t0, [%dst + (offset) + 0x00]; \
86 std %t2, [%dst + (offset) + 0x08]; \
87 std %t4, [%dst + (offset) + 0x10]; \
88 std %t6, [%dst + (offset) + 0x18];
89
90#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
91 ldd [%src - (offset) - 0x10], %t0; \
92 ldd [%src - (offset) - 0x08], %t2; \
93 st %t0, [%dst - (offset) - 0x10]; \
94 st %t1, [%dst - (offset) - 0x0c]; \
95 st %t2, [%dst - (offset) - 0x08]; \
96 st %t3, [%dst - (offset) - 0x04];
97
98#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
99 lduh [%src + (offset) + 0x00], %t0; \
100 lduh [%src + (offset) + 0x02], %t1; \
101 lduh [%src + (offset) + 0x04], %t2; \
102 lduh [%src + (offset) + 0x06], %t3; \
103 sth %t0, [%dst + (offset) + 0x00]; \
104 sth %t1, [%dst + (offset) + 0x02]; \
105 sth %t2, [%dst + (offset) + 0x04]; \
106 sth %t3, [%dst + (offset) + 0x06];
107
108#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
109 ldub [%src - (offset) - 0x02], %t0; \
110 ldub [%src - (offset) - 0x01], %t1; \
111 stb %t0, [%dst - (offset) - 0x02]; \
112 stb %t1, [%dst - (offset) - 0x01];
113
114 .text
115 .align 4
116
117 .globl __copy_user_begin
118__copy_user_begin:
119
120 .globl __copy_user
121dword_align:
122 andcc %o1, 1, %g0
123 be 4f
124 andcc %o1, 2, %g0
125
126 EXO2(ldub [%o1], %g2)
127 add %o1, 1, %o1
128 EXO2(stb %g2, [%o0])
129 sub %o2, 1, %o2
130 bne 3f
131 add %o0, 1, %o0
132
133 EXO2(lduh [%o1], %g2)
134 add %o1, 2, %o1
135 EXO2(sth %g2, [%o0])
136 sub %o2, 2, %o2
137 b 3f
138 add %o0, 2, %o0
1394:
140 EXO2(lduh [%o1], %g2)
141 add %o1, 2, %o1
142 EXO2(sth %g2, [%o0])
143 sub %o2, 2, %o2
144 b 3f
145 add %o0, 2, %o0
146
147__copy_user: /* %o0=dst %o1=src %o2=len */
148 xor %o0, %o1, %o4
1491:
150 andcc %o4, 3, %o5
1512:
152 bne cannot_optimize
153 cmp %o2, 15
154
155 bleu short_aligned_end
156 andcc %o1, 3, %g0
157
158 bne dword_align
1593:
160 andcc %o1, 4, %g0
161
162 be 2f
163 mov %o2, %g1
164
165 EXO2(ld [%o1], %o4)
166 sub %g1, 4, %g1
167 EXO2(st %o4, [%o0])
168 add %o1, 4, %o1
169 add %o0, 4, %o0
1702:
171 andcc %g1, 0xffffff80, %g7
172 be 3f
173 andcc %o0, 4, %g0
174
175 be ldd_std + 4
1765:
177 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
178 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
179 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
180 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
18180:
182 EXT(5b, 80b, 50f)
183 subcc %g7, 128, %g7
184 add %o1, 128, %o1
185 bne 5b
186 add %o0, 128, %o0
1873:
188 andcc %g1, 0x70, %g7
189 be copy_user_table_end
190 andcc %g1, 8, %g0
191
192 sethi %hi(copy_user_table_end), %o5
193 srl %g7, 1, %o4
194 add %g7, %o4, %o4
195 add %o1, %g7, %o1
196 sub %o5, %o4, %o5
197 jmpl %o5 + %lo(copy_user_table_end), %g0
198 add %o0, %g7, %o0
199
200copy_user_table:
201 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
202 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
203 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
204 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
205 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
206 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
207 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
208copy_user_table_end:
209 EXT(copy_user_table, copy_user_table_end, 51f)
210 be copy_user_last7
211 andcc %g1, 4, %g0
212
213 EX(ldd [%o1], %g2, and %g1, 0xf)
214 add %o0, 8, %o0
215 add %o1, 8, %o1
216 EX(st %g2, [%o0 - 0x08], and %g1, 0xf)
217 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
218copy_user_last7:
219 be 1f
220 andcc %g1, 2, %g0
221
222 EX(ld [%o1], %g2, and %g1, 7)
223 add %o1, 4, %o1
224 EX(st %g2, [%o0], and %g1, 7)
225 add %o0, 4, %o0
2261:
227 be 1f
228 andcc %g1, 1, %g0
229
230 EX(lduh [%o1], %g2, and %g1, 3)
231 add %o1, 2, %o1
232 EX(sth %g2, [%o0], and %g1, 3)
233 add %o0, 2, %o0
2341:
235 be 1f
236 nop
237
238 EX(ldub [%o1], %g2, add %g0, 1)
239 EX(stb %g2, [%o0], add %g0, 1)
2401:
241 retl
242 clr %o0
243
244ldd_std:
245 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
246 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
247 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
248 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
24981:
250 EXT(ldd_std, 81b, 52f)
251 subcc %g7, 128, %g7
252 add %o1, 128, %o1
253 bne ldd_std
254 add %o0, 128, %o0
255
256 andcc %g1, 0x70, %g7
257 be copy_user_table_end
258 andcc %g1, 8, %g0
259
260 sethi %hi(copy_user_table_end), %o5
261 srl %g7, 1, %o4
262 add %g7, %o4, %o4
263 add %o1, %g7, %o1
264 sub %o5, %o4, %o5
265 jmpl %o5 + %lo(copy_user_table_end), %g0
266 add %o0, %g7, %o0
267
268cannot_optimize:
269 bleu short_end
270 cmp %o5, 2
271
272 bne byte_chunk
273 and %o2, 0xfffffff0, %o3
274
275 andcc %o1, 1, %g0
276 be 10f
277 nop
278
279 EXO2(ldub [%o1], %g2)
280 add %o1, 1, %o1
281 EXO2(stb %g2, [%o0])
282 sub %o2, 1, %o2
283 andcc %o2, 0xfffffff0, %o3
284 be short_end
285 add %o0, 1, %o0
28610:
287 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
288 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
28982:
290 EXT(10b, 82b, 53f)
291 subcc %o3, 0x10, %o3
292 add %o1, 0x10, %o1
293 bne 10b
294 add %o0, 0x10, %o0
295 b 2f
296 and %o2, 0xe, %o3
297
298byte_chunk:
299 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
300 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
301 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
302 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
303 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
304 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
305 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
306 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
30783:
308 EXT(byte_chunk, 83b, 54f)
309 subcc %o3, 0x10, %o3
310 add %o1, 0x10, %o1
311 bne byte_chunk
312 add %o0, 0x10, %o0
313
314short_end:
315 and %o2, 0xe, %o3
3162:
317 sethi %hi(short_table_end), %o5
318 sll %o3, 3, %o4
319 add %o0, %o3, %o0
320 sub %o5, %o4, %o5
321 add %o1, %o3, %o1
322 jmpl %o5 + %lo(short_table_end), %g0
323 andcc %o2, 1, %g0
32484:
325 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
326 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
327 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
328 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
329 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
330 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
331 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
332short_table_end:
333 EXT(84b, short_table_end, 55f)
334 be 1f
335 nop
336 EX(ldub [%o1], %g2, add %g0, 1)
337 EX(stb %g2, [%o0], add %g0, 1)
3381:
339 retl
340 clr %o0
341
342short_aligned_end:
343 bne short_end
344 andcc %o2, 8, %g0
345
346 be 1f
347 andcc %o2, 4, %g0
348
349 EXO2(ld [%o1 + 0x00], %g2)
350 EXO2(ld [%o1 + 0x04], %g3)
351 add %o1, 8, %o1
352 EXO2(st %g2, [%o0 + 0x00])
353 EX(st %g3, [%o0 + 0x04], sub %o2, 4)
354 add %o0, 8, %o0
3551:
356 b copy_user_last7
357 mov %o2, %g1
358
359 .section .fixup,#alloc,#execinstr
360 .align 4
36197:
362 mov %o2, %g3
363fixupretl:
364 sethi %hi(PAGE_OFFSET), %g1
365 cmp %o0, %g1
366 blu 1f
367 cmp %o1, %g1
368 bgeu 1f
369 nop
370 save %sp, -64, %sp
371 mov %i0, %o0
372 call __bzero
373 mov %g3, %o1
374 restore
3751: retl
376 mov %g3, %o0
377
378/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
37950:
380/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
381 * happens. This is derived from the amount ldd reads, st stores, etc.
382 * x = g2 % 12;
383 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
384 * o0 += (g2 / 12) * 32;
385 */
386 cmp %g2, 12
387 add %o0, %g7, %o0
388 bcs 1f
389 cmp %g2, 24
390 bcs 2f
391 cmp %g2, 36
392 bcs 3f
393 nop
394 sub %g2, 12, %g2
395 sub %g7, 32, %g7
3963: sub %g2, 12, %g2
397 sub %g7, 32, %g7
3982: sub %g2, 12, %g2
399 sub %g7, 32, %g7
4001: cmp %g2, 4
401 bcs,a 60f
402 clr %g2
403 sub %g2, 4, %g2
404 sll %g2, 2, %g2
40560: and %g1, 0x7f, %g3
406 sub %o0, %g7, %o0
407 add %g3, %g7, %g3
408 ba fixupretl
409 sub %g3, %g2, %g3
41051:
411/* i = 41 - g2; j = i % 6;
412 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
413 * o0 -= (i / 6) * 16 + 16;
414 */
415 neg %g2
416 and %g1, 0xf, %g1
417 add %g2, 41, %g2
418 add %o0, %g1, %o0
4191: cmp %g2, 6
420 bcs,a 2f
421 cmp %g2, 4
422 add %g1, 16, %g1
423 b 1b
424 sub %g2, 6, %g2
4252: bcc,a 2f
426 mov 16, %g2
427 inc %g2
428 sll %g2, 2, %g2
4292: add %g1, %g2, %g3
430 ba fixupretl
431 sub %o0, %g3, %o0
43252:
433/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
434 o0 += (g2 / 8) * 32 */
435 andn %g2, 7, %g4
436 add %o0, %g7, %o0
437 andcc %g2, 4, %g0
438 and %g2, 3, %g2
439 sll %g4, 2, %g4
440 sll %g2, 3, %g2
441 bne 60b
442 sub %g7, %g4, %g7
443 ba 60b
444 clr %g2
44553:
446/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
447 o0 += (g2 & 8) */
448 and %g2, 3, %g4
449 andcc %g2, 4, %g0
450 and %g2, 8, %g2
451 sll %g4, 1, %g4
452 be 1f
453 add %o0, %g2, %o0
454 add %g2, %g4, %g2
4551: and %o2, 0xf, %g3
456 add %g3, %o3, %g3
457 ba fixupretl
458 sub %g3, %g2, %g3
45954:
460/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
461 o0 += (g2 / 4) * 2 */
462 srl %g2, 2, %o4
463 and %g2, 1, %o5
464 srl %g2, 1, %g2
465 add %o4, %o4, %o4
466 and %o5, %g2, %o5
467 and %o2, 0xf, %o2
468 add %o0, %o4, %o0
469 sub %o3, %o5, %o3
470 sub %o2, %o4, %o2
471 ba fixupretl
472 add %o2, %o3, %g3
47355:
474/* i = 27 - g2;
475 g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
476 o0 -= i / 4 * 2 + 1 */
477 neg %g2
478 and %o2, 1, %o2
479 add %g2, 27, %g2
480 srl %g2, 2, %o5
481 andcc %g2, 3, %g0
482 mov 1, %g2
483 add %o5, %o5, %o5
484 be,a 1f
485 clr %g2
4861: add %g2, %o5, %g3
487 sub %o0, %g3, %o0
488 ba fixupretl
489 add %g3, %o2, %g3
490
491 .globl __copy_user_end
492__copy_user_end: