blob: 25ec5378afa454232704d48c604e968efdd33cc0 [file] [log] [blame]
Paul Mackerras14cf11a2005-09-26 16:04:21 +10001/*
Paul Mackerras14cf11a2005-09-26 16:04:21 +10002 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11
12 .align 7
13_GLOBAL(__copy_tofrom_user)
14 /* first check for a whole page copy on a page boundary */
15 cmpldi cr1,r5,16
16 cmpdi cr6,r5,4096
17 or r0,r3,r4
18 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
19 andi. r0,r0,4095
20 std r3,-24(r1)
21 crand cr0*4+2,cr0*4+2,cr6*4+2
22 std r4,-16(r1)
23 std r5,-8(r1)
24 dcbt 0,r4
Benjamin Herrenschmidt3c726f82005-11-07 11:06:55 +110025 beq .Lcopy_page_4K
Paul Mackerras14cf11a2005-09-26 16:04:21 +100026 andi. r6,r6,7
Olof Johansson3467bfd2007-03-22 09:34:13 -050027 PPC_MTOCRF 0x01,r5
Paul Mackerras14cf11a2005-09-26 16:04:21 +100028 blt cr1,.Lshort_copy
29 bne .Ldst_unaligned
30.Ldst_aligned:
31 andi. r0,r4,7
32 addi r3,r3,-16
33 bne .Lsrc_unaligned
34 srdi r7,r5,4
3520: ld r9,0(r4)
36 addi r4,r4,-8
37 mtctr r7
38 andi. r5,r5,7
39 bf cr7*4+0,22f
40 addi r3,r3,8
41 addi r4,r4,8
42 mr r8,r9
43 blt cr1,72f
4421: ld r9,8(r4)
4570: std r8,8(r3)
4622: ldu r8,16(r4)
4771: stdu r9,16(r3)
48 bdnz 21b
4972: std r8,8(r3)
50 beq+ 3f
51 addi r3,r3,16
5223: ld r9,8(r4)
53.Ldo_tail:
54 bf cr7*4+1,1f
55 rotldi r9,r9,32
5673: stw r9,0(r3)
57 addi r3,r3,4
581: bf cr7*4+2,2f
59 rotldi r9,r9,16
6074: sth r9,0(r3)
61 addi r3,r3,2
622: bf cr7*4+3,3f
63 rotldi r9,r9,8
6475: stb r9,0(r3)
653: li r3,0
66 blr
67
68.Lsrc_unaligned:
69 srdi r6,r5,3
70 addi r5,r5,-16
71 subf r4,r0,r4
72 srdi r7,r5,4
73 sldi r10,r0,3
74 cmpldi cr6,r6,3
75 andi. r5,r5,7
76 mtctr r7
77 subfic r11,r10,64
78 add r5,r5,r0
79 bt cr7*4+0,28f
80
8124: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
8225: ld r0,8(r4)
83 sld r6,r9,r10
8426: ldu r9,16(r4)
85 srd r7,r0,r11
86 sld r8,r0,r10
87 or r7,r7,r6
88 blt cr6,79f
8927: ld r0,8(r4)
90 b 2f
91
9228: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
9329: ldu r9,8(r4)
94 sld r8,r0,r10
95 addi r3,r3,-8
96 blt cr6,5f
9730: ld r0,8(r4)
98 srd r12,r9,r11
99 sld r6,r9,r10
10031: ldu r9,16(r4)
101 or r12,r8,r12
102 srd r7,r0,r11
103 sld r8,r0,r10
104 addi r3,r3,16
105 beq cr6,78f
106
1071: or r7,r7,r6
10832: ld r0,8(r4)
10976: std r12,8(r3)
1102: srd r12,r9,r11
111 sld r6,r9,r10
11233: ldu r9,16(r4)
113 or r12,r8,r12
11477: stdu r7,16(r3)
115 srd r7,r0,r11
116 sld r8,r0,r10
117 bdnz 1b
118
11978: std r12,8(r3)
120 or r7,r7,r6
12179: std r7,16(r3)
1225: srd r12,r9,r11
123 or r12,r8,r12
12480: std r12,24(r3)
125 bne 6f
126 li r3,0
127 blr
1286: cmpwi cr1,r5,8
129 addi r3,r3,32
130 sld r9,r9,r10
131 ble cr1,.Ldo_tail
13234: ld r0,8(r4)
133 srd r7,r0,r11
134 or r9,r7,r9
135 b .Ldo_tail
136
137.Ldst_unaligned:
Olof Johansson3467bfd2007-03-22 09:34:13 -0500138 PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000139 subf r5,r6,r5
140 li r7,0
141 cmpldi r1,r5,16
142 bf cr7*4+3,1f
14335: lbz r0,0(r4)
14481: stb r0,0(r3)
145 addi r7,r7,1
1461: bf cr7*4+2,2f
14736: lhzx r0,r7,r4
14882: sthx r0,r7,r3
149 addi r7,r7,2
1502: bf cr7*4+1,3f
15137: lwzx r0,r7,r4
15283: stwx r0,r7,r3
Olof Johansson3467bfd2007-03-22 09:34:13 -05001533: PPC_MTOCRF 0x01,r5
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000154 add r4,r6,r4
155 add r3,r6,r3
156 b .Ldst_aligned
157
158.Lshort_copy:
159 bf cr7*4+0,1f
16038: lwz r0,0(r4)
16139: lwz r9,4(r4)
162 addi r4,r4,8
16384: stw r0,0(r3)
16485: stw r9,4(r3)
165 addi r3,r3,8
1661: bf cr7*4+1,2f
16740: lwz r0,0(r4)
168 addi r4,r4,4
16986: stw r0,0(r3)
170 addi r3,r3,4
1712: bf cr7*4+2,3f
17241: lhz r0,0(r4)
173 addi r4,r4,2
17487: sth r0,0(r3)
175 addi r3,r3,2
1763: bf cr7*4+3,4f
17742: lbz r0,0(r4)
17888: stb r0,0(r3)
1794: li r3,0
180 blr
181
182/*
183 * exception handlers follow
184 * we have to return the number of bytes not copied
185 * for an exception on a load, we set the rest of the destination to 0
186 */
187
188136:
189137:
190 add r3,r3,r7
191 b 1f
192130:
193131:
194 addi r3,r3,8
195120:
196122:
197124:
198125:
199126:
200127:
201128:
202129:
203133:
204 addi r3,r3,8
205121:
206132:
207 addi r3,r3,8
208123:
209134:
210135:
211138:
212139:
213140:
214141:
215142:
216
217/*
218 * here we have had a fault on a load and r3 points to the first
219 * unmodified byte of the destination
220 */
2211: ld r6,-24(r1)
222 ld r4,-16(r1)
223 ld r5,-8(r1)
224 subf r6,r6,r3
225 add r4,r4,r6
226 subf r5,r6,r5 /* #bytes left to go */
227
228/*
229 * first see if we can copy any more bytes before hitting another exception
230 */
231 mtctr r5
23243: lbz r0,0(r4)
233 addi r4,r4,1
23489: stb r0,0(r3)
235 addi r3,r3,1
236 bdnz 43b
237 li r3,0 /* huh? all copied successfully this time? */
238 blr
239
240/*
241 * here we have trapped again, need to clear ctr bytes starting at r3
242 */
243143: mfctr r5
244 li r0,0
245 mr r4,r3
246 mr r3,r5 /* return the number of bytes not copied */
2471: andi. r9,r4,7
248 beq 3f
24990: stb r0,0(r4)
250 addic. r5,r5,-1
251 addi r4,r4,1
252 bne 1b
253 blr
2543: cmpldi cr1,r5,8
255 srdi r9,r5,3
256 andi. r5,r5,7
257 blt cr1,93f
258 mtctr r9
25991: std r0,0(r4)
260 addi r4,r4,8
261 bdnz 91b
26293: beqlr
263 mtctr r5
26492: stb r0,0(r4)
265 addi r4,r4,1
266 bdnz 92b
267 blr
268
269/*
270 * exception handlers for stores: we just need to work
271 * out how many bytes weren't copied
272 */
273182:
274183:
275 add r3,r3,r7
276 b 1f
277180:
278 addi r3,r3,8
279171:
280177:
281 addi r3,r3,8
282170:
283172:
284176:
285178:
286 addi r3,r3,4
287185:
288 addi r3,r3,4
289173:
290174:
291175:
292179:
293181:
294184:
295186:
296187:
297188:
298189:
2991:
300 ld r6,-24(r1)
301 ld r5,-8(r1)
302 add r6,r6,r5
303 subf r3,r3,r6 /* #bytes not copied */
304190:
305191:
306192:
307 blr /* #bytes not copied in r3 */
308
309 .section __ex_table,"a"
310 .align 3
311 .llong 20b,120b
312 .llong 21b,121b
313 .llong 70b,170b
314 .llong 22b,122b
315 .llong 71b,171b
316 .llong 72b,172b
317 .llong 23b,123b
318 .llong 73b,173b
319 .llong 74b,174b
320 .llong 75b,175b
321 .llong 24b,124b
322 .llong 25b,125b
323 .llong 26b,126b
324 .llong 27b,127b
325 .llong 28b,128b
326 .llong 29b,129b
327 .llong 30b,130b
328 .llong 31b,131b
329 .llong 32b,132b
330 .llong 76b,176b
331 .llong 33b,133b
332 .llong 77b,177b
333 .llong 78b,178b
334 .llong 79b,179b
335 .llong 80b,180b
336 .llong 34b,134b
337 .llong 35b,135b
338 .llong 81b,181b
339 .llong 36b,136b
340 .llong 82b,182b
341 .llong 37b,137b
342 .llong 83b,183b
343 .llong 38b,138b
344 .llong 39b,139b
345 .llong 84b,184b
346 .llong 85b,185b
347 .llong 40b,140b
348 .llong 86b,186b
349 .llong 41b,141b
350 .llong 87b,187b
351 .llong 42b,142b
352 .llong 88b,188b
353 .llong 43b,143b
354 .llong 89b,189b
355 .llong 90b,190b
356 .llong 91b,191b
357 .llong 92b,192b
358
359 .text
360
361/*
362 * Routine to copy a whole page of data, optimized for POWER4.
363 * On POWER4 it is more than 50% faster than the simple loop
364 * above (following the .Ldst_aligned label) but it runs slightly
365 * slower on POWER3.
366 */
Benjamin Herrenschmidt3c726f82005-11-07 11:06:55 +1100367.Lcopy_page_4K:
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000368 std r31,-32(1)
369 std r30,-40(1)
370 std r29,-48(1)
371 std r28,-56(1)
372 std r27,-64(1)
373 std r26,-72(1)
374 std r25,-80(1)
375 std r24,-88(1)
376 std r23,-96(1)
377 std r22,-104(1)
378 std r21,-112(1)
379 std r20,-120(1)
380 li r5,4096/32 - 1
381 addi r3,r3,-8
382 li r0,5
3830: addi r5,r5,-24
384 mtctr r0
38520: ld r22,640(4)
38621: ld r21,512(4)
38722: ld r20,384(4)
38823: ld r11,256(4)
38924: ld r9,128(4)
39025: ld r7,0(4)
39126: ld r25,648(4)
39227: ld r24,520(4)
39328: ld r23,392(4)
39429: ld r10,264(4)
39530: ld r8,136(4)
39631: ldu r6,8(4)
397 cmpwi r5,24
3981:
39932: std r22,648(3)
40033: std r21,520(3)
40134: std r20,392(3)
40235: std r11,264(3)
40336: std r9,136(3)
40437: std r7,8(3)
40538: ld r28,648(4)
40639: ld r27,520(4)
40740: ld r26,392(4)
40841: ld r31,264(4)
40942: ld r30,136(4)
41043: ld r29,8(4)
41144: std r25,656(3)
41245: std r24,528(3)
41346: std r23,400(3)
41447: std r10,272(3)
41548: std r8,144(3)
41649: std r6,16(3)
41750: ld r22,656(4)
41851: ld r21,528(4)
41952: ld r20,400(4)
42053: ld r11,272(4)
42154: ld r9,144(4)
42255: ld r7,16(4)
42356: std r28,664(3)
42457: std r27,536(3)
42558: std r26,408(3)
42659: std r31,280(3)
42760: std r30,152(3)
42861: stdu r29,24(3)
42962: ld r25,664(4)
43063: ld r24,536(4)
43164: ld r23,408(4)
43265: ld r10,280(4)
43366: ld r8,152(4)
43467: ldu r6,24(4)
435 bdnz 1b
43668: std r22,648(3)
43769: std r21,520(3)
43870: std r20,392(3)
43971: std r11,264(3)
44072: std r9,136(3)
44173: std r7,8(3)
44274: addi r4,r4,640
44375: addi r3,r3,648
444 bge 0b
445 mtctr r5
44676: ld r7,0(4)
44777: ld r8,8(4)
44878: ldu r9,16(4)
4493:
45079: ld r10,8(4)
45180: std r7,8(3)
45281: ld r7,16(4)
45382: std r8,16(3)
45483: ld r8,24(4)
45584: std r9,24(3)
45685: ldu r9,32(4)
45786: stdu r10,32(3)
458 bdnz 3b
4594:
46087: ld r10,8(4)
46188: std r7,8(3)
46289: std r8,16(3)
46390: std r9,24(3)
46491: std r10,32(3)
4659: ld r20,-120(1)
466 ld r21,-112(1)
467 ld r22,-104(1)
468 ld r23,-96(1)
469 ld r24,-88(1)
470 ld r25,-80(1)
471 ld r26,-72(1)
472 ld r27,-64(1)
473 ld r28,-56(1)
474 ld r29,-48(1)
475 ld r30,-40(1)
476 ld r31,-32(1)
477 li r3,0
478 blr
479
480/*
481 * on an exception, reset to the beginning and jump back into the
482 * standard __copy_tofrom_user
483 */
484100: ld r20,-120(1)
485 ld r21,-112(1)
486 ld r22,-104(1)
487 ld r23,-96(1)
488 ld r24,-88(1)
489 ld r25,-80(1)
490 ld r26,-72(1)
491 ld r27,-64(1)
492 ld r28,-56(1)
493 ld r29,-48(1)
494 ld r30,-40(1)
495 ld r31,-32(1)
496 ld r3,-24(r1)
497 ld r4,-16(r1)
498 li r5,4096
499 b .Ldst_aligned
500
501 .section __ex_table,"a"
502 .align 3
503 .llong 20b,100b
504 .llong 21b,100b
505 .llong 22b,100b
506 .llong 23b,100b
507 .llong 24b,100b
508 .llong 25b,100b
509 .llong 26b,100b
510 .llong 27b,100b
511 .llong 28b,100b
512 .llong 29b,100b
513 .llong 30b,100b
514 .llong 31b,100b
515 .llong 32b,100b
516 .llong 33b,100b
517 .llong 34b,100b
518 .llong 35b,100b
519 .llong 36b,100b
520 .llong 37b,100b
521 .llong 38b,100b
522 .llong 39b,100b
523 .llong 40b,100b
524 .llong 41b,100b
525 .llong 42b,100b
526 .llong 43b,100b
527 .llong 44b,100b
528 .llong 45b,100b
529 .llong 46b,100b
530 .llong 47b,100b
531 .llong 48b,100b
532 .llong 49b,100b
533 .llong 50b,100b
534 .llong 51b,100b
535 .llong 52b,100b
536 .llong 53b,100b
537 .llong 54b,100b
538 .llong 55b,100b
539 .llong 56b,100b
540 .llong 57b,100b
541 .llong 58b,100b
542 .llong 59b,100b
543 .llong 60b,100b
544 .llong 61b,100b
545 .llong 62b,100b
546 .llong 63b,100b
547 .llong 64b,100b
548 .llong 65b,100b
549 .llong 66b,100b
550 .llong 67b,100b
551 .llong 68b,100b
552 .llong 69b,100b
553 .llong 70b,100b
554 .llong 71b,100b
555 .llong 72b,100b
556 .llong 73b,100b
557 .llong 74b,100b
558 .llong 75b,100b
559 .llong 76b,100b
560 .llong 77b,100b
561 .llong 78b,100b
562 .llong 79b,100b
563 .llong 80b,100b
564 .llong 81b,100b
565 .llong 82b,100b
566 .llong 83b,100b
567 .llong 84b,100b
568 .llong 85b,100b
569 .llong 86b,100b
570 .llong 87b,100b
571 .llong 88b,100b
572 .llong 89b,100b
573 .llong 90b,100b
574 .llong 91b,100b