blob: 693b14a778fa152f4c595b22e30ca52bc724bee5 [file] [log] [blame]
Paul Mackerras14cf11a2005-09-26 16:04:21 +10001/*
Paul Mackerras14cf11a2005-09-26 16:04:21 +10002 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11
12 .align 7
13_GLOBAL(__copy_tofrom_user)
14 /* first check for a whole page copy on a page boundary */
15 cmpldi cr1,r5,16
16 cmpdi cr6,r5,4096
17 or r0,r3,r4
18 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
19 andi. r0,r0,4095
20 std r3,-24(r1)
21 crand cr0*4+2,cr0*4+2,cr6*4+2
22 std r4,-16(r1)
23 std r5,-8(r1)
24 dcbt 0,r4
Benjamin Herrenschmidt3c726f82005-11-07 11:06:55 +110025 beq .Lcopy_page_4K
Paul Mackerras14cf11a2005-09-26 16:04:21 +100026 andi. r6,r6,7
Olof Johansson3467bfd2007-03-22 09:34:13 -050027 PPC_MTOCRF 0x01,r5
Paul Mackerras14cf11a2005-09-26 16:04:21 +100028 blt cr1,.Lshort_copy
Mark Nelsona4e22f02008-11-11 00:53:34 +000029/* Below we want to nop out the bne if we're on a CPU that has the
30 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
31 * cleared.
32 * At the time of writing the only CPU that has this combination of bits
33 * set is Power6.
34 */
35BEGIN_FTR_SECTION
36 nop
37FTR_SECTION_ELSE
Paul Mackerras14cf11a2005-09-26 16:04:21 +100038 bne .Ldst_unaligned
Mark Nelsona4e22f02008-11-11 00:53:34 +000039ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
40 CPU_FTR_UNALIGNED_LD_STD)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100041.Ldst_aligned:
Paul Mackerras14cf11a2005-09-26 16:04:21 +100042 addi r3,r3,-16
Mark Nelsona4e22f02008-11-11 00:53:34 +000043BEGIN_FTR_SECTION
44 andi. r0,r4,7
Paul Mackerras14cf11a2005-09-26 16:04:21 +100045 bne .Lsrc_unaligned
Mark Nelsona4e22f02008-11-11 00:53:34 +000046END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100047 srdi r7,r5,4
4820: ld r9,0(r4)
49 addi r4,r4,-8
50 mtctr r7
51 andi. r5,r5,7
52 bf cr7*4+0,22f
53 addi r3,r3,8
54 addi r4,r4,8
55 mr r8,r9
56 blt cr1,72f
5721: ld r9,8(r4)
5870: std r8,8(r3)
5922: ldu r8,16(r4)
6071: stdu r9,16(r3)
61 bdnz 21b
6272: std r8,8(r3)
63 beq+ 3f
64 addi r3,r3,16
Paul Mackerras14cf11a2005-09-26 16:04:21 +100065.Ldo_tail:
66 bf cr7*4+1,1f
Mark Nelsonf72b7282009-02-25 13:46:24 +00006723: lwz r9,8(r4)
68 addi r4,r4,4
Paul Mackerras14cf11a2005-09-26 16:04:21 +10006973: stw r9,0(r3)
70 addi r3,r3,4
711: bf cr7*4+2,2f
Mark Nelsonf72b7282009-02-25 13:46:24 +00007244: lhz r9,8(r4)
73 addi r4,r4,2
Paul Mackerras14cf11a2005-09-26 16:04:21 +10007474: sth r9,0(r3)
75 addi r3,r3,2
762: bf cr7*4+3,3f
Mark Nelsonf72b7282009-02-25 13:46:24 +00007745: lbz r9,8(r4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +10007875: stb r9,0(r3)
793: li r3,0
80 blr
81
82.Lsrc_unaligned:
83 srdi r6,r5,3
84 addi r5,r5,-16
85 subf r4,r0,r4
86 srdi r7,r5,4
87 sldi r10,r0,3
88 cmpldi cr6,r6,3
89 andi. r5,r5,7
90 mtctr r7
91 subfic r11,r10,64
92 add r5,r5,r0
93 bt cr7*4+0,28f
94
9524: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
9625: ld r0,8(r4)
97 sld r6,r9,r10
9826: ldu r9,16(r4)
99 srd r7,r0,r11
100 sld r8,r0,r10
101 or r7,r7,r6
102 blt cr6,79f
10327: ld r0,8(r4)
104 b 2f
105
10628: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
10729: ldu r9,8(r4)
108 sld r8,r0,r10
109 addi r3,r3,-8
110 blt cr6,5f
11130: ld r0,8(r4)
112 srd r12,r9,r11
113 sld r6,r9,r10
11431: ldu r9,16(r4)
115 or r12,r8,r12
116 srd r7,r0,r11
117 sld r8,r0,r10
118 addi r3,r3,16
119 beq cr6,78f
120
1211: or r7,r7,r6
12232: ld r0,8(r4)
12376: std r12,8(r3)
1242: srd r12,r9,r11
125 sld r6,r9,r10
12633: ldu r9,16(r4)
127 or r12,r8,r12
12877: stdu r7,16(r3)
129 srd r7,r0,r11
130 sld r8,r0,r10
131 bdnz 1b
132
13378: std r12,8(r3)
134 or r7,r7,r6
13579: std r7,16(r3)
1365: srd r12,r9,r11
137 or r12,r8,r12
13880: std r12,24(r3)
139 bne 6f
140 li r3,0
141 blr
1426: cmpwi cr1,r5,8
143 addi r3,r3,32
144 sld r9,r9,r10
Mark Nelsonf72b7282009-02-25 13:46:24 +0000145 ble cr1,7f
Paul Mackerras14cf11a2005-09-26 16:04:21 +100014634: ld r0,8(r4)
147 srd r7,r0,r11
148 or r9,r7,r9
Mark Nelsonf72b7282009-02-25 13:46:24 +00001497:
150 bf cr7*4+1,1f
151 rotldi r9,r9,32
15294: stw r9,0(r3)
153 addi r3,r3,4
1541: bf cr7*4+2,2f
155 rotldi r9,r9,16
15695: sth r9,0(r3)
157 addi r3,r3,2
1582: bf cr7*4+3,3f
159 rotldi r9,r9,8
16096: stb r9,0(r3)
1613: li r3,0
162 blr
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000163
164.Ldst_unaligned:
Olof Johansson3467bfd2007-03-22 09:34:13 -0500165 PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000166 subf r5,r6,r5
167 li r7,0
Mark Nelsona4e22f02008-11-11 00:53:34 +0000168 cmpldi cr1,r5,16
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000169 bf cr7*4+3,1f
17035: lbz r0,0(r4)
17181: stb r0,0(r3)
172 addi r7,r7,1
1731: bf cr7*4+2,2f
17436: lhzx r0,r7,r4
17582: sthx r0,r7,r3
176 addi r7,r7,2
1772: bf cr7*4+1,3f
17837: lwzx r0,r7,r4
17983: stwx r0,r7,r3
Olof Johansson3467bfd2007-03-22 09:34:13 -05001803: PPC_MTOCRF 0x01,r5
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000181 add r4,r6,r4
182 add r3,r6,r3
183 b .Ldst_aligned
184
185.Lshort_copy:
186 bf cr7*4+0,1f
18738: lwz r0,0(r4)
18839: lwz r9,4(r4)
189 addi r4,r4,8
19084: stw r0,0(r3)
19185: stw r9,4(r3)
192 addi r3,r3,8
1931: bf cr7*4+1,2f
19440: lwz r0,0(r4)
195 addi r4,r4,4
19686: stw r0,0(r3)
197 addi r3,r3,4
1982: bf cr7*4+2,3f
19941: lhz r0,0(r4)
200 addi r4,r4,2
20187: sth r0,0(r3)
202 addi r3,r3,2
2033: bf cr7*4+3,4f
20442: lbz r0,0(r4)
20588: stb r0,0(r3)
2064: li r3,0
207 blr
208
209/*
210 * exception handlers follow
211 * we have to return the number of bytes not copied
212 * for an exception on a load, we set the rest of the destination to 0
213 */
214
215136:
216137:
217 add r3,r3,r7
218 b 1f
219130:
220131:
221 addi r3,r3,8
222120:
223122:
224124:
225125:
226126:
227127:
228128:
229129:
230133:
231 addi r3,r3,8
232121:
233132:
234 addi r3,r3,8
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000235134:
236135:
237138:
238139:
239140:
240141:
241142:
Mark Nelsonf72b7282009-02-25 13:46:24 +0000242123:
243144:
244145:
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000245
246/*
247 * here we have had a fault on a load and r3 points to the first
248 * unmodified byte of the destination
249 */
2501: ld r6,-24(r1)
251 ld r4,-16(r1)
252 ld r5,-8(r1)
253 subf r6,r6,r3
254 add r4,r4,r6
255 subf r5,r6,r5 /* #bytes left to go */
256
257/*
258 * first see if we can copy any more bytes before hitting another exception
259 */
260 mtctr r5
26143: lbz r0,0(r4)
262 addi r4,r4,1
26389: stb r0,0(r3)
264 addi r3,r3,1
265 bdnz 43b
266 li r3,0 /* huh? all copied successfully this time? */
267 blr
268
269/*
270 * here we have trapped again, need to clear ctr bytes starting at r3
271 */
272143: mfctr r5
273 li r0,0
274 mr r4,r3
275 mr r3,r5 /* return the number of bytes not copied */
2761: andi. r9,r4,7
277 beq 3f
27890: stb r0,0(r4)
279 addic. r5,r5,-1
280 addi r4,r4,1
281 bne 1b
282 blr
2833: cmpldi cr1,r5,8
284 srdi r9,r5,3
285 andi. r5,r5,7
286 blt cr1,93f
287 mtctr r9
28891: std r0,0(r4)
289 addi r4,r4,8
290 bdnz 91b
29193: beqlr
292 mtctr r5
29392: stb r0,0(r4)
294 addi r4,r4,1
295 bdnz 92b
296 blr
297
298/*
299 * exception handlers for stores: we just need to work
300 * out how many bytes weren't copied
301 */
302182:
303183:
304 add r3,r3,r7
305 b 1f
306180:
307 addi r3,r3,8
308171:
309177:
310 addi r3,r3,8
311170:
312172:
313176:
314178:
315 addi r3,r3,4
316185:
317 addi r3,r3,4
318173:
319174:
320175:
321179:
322181:
323184:
324186:
325187:
326188:
327189:
Mark Nelsonf72b7282009-02-25 13:46:24 +0000328194:
329195:
330196:
Paul Mackerras14cf11a2005-09-26 16:04:21 +10003311:
332 ld r6,-24(r1)
333 ld r5,-8(r1)
334 add r6,r6,r5
335 subf r3,r3,r6 /* #bytes not copied */
336190:
337191:
338192:
339 blr /* #bytes not copied in r3 */
340
341 .section __ex_table,"a"
342 .align 3
343 .llong 20b,120b
344 .llong 21b,121b
345 .llong 70b,170b
346 .llong 22b,122b
347 .llong 71b,171b
348 .llong 72b,172b
349 .llong 23b,123b
350 .llong 73b,173b
Mark Nelsonf72b7282009-02-25 13:46:24 +0000351 .llong 44b,144b
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000352 .llong 74b,174b
Mark Nelsonf72b7282009-02-25 13:46:24 +0000353 .llong 45b,145b
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000354 .llong 75b,175b
355 .llong 24b,124b
356 .llong 25b,125b
357 .llong 26b,126b
358 .llong 27b,127b
359 .llong 28b,128b
360 .llong 29b,129b
361 .llong 30b,130b
362 .llong 31b,131b
363 .llong 32b,132b
364 .llong 76b,176b
365 .llong 33b,133b
366 .llong 77b,177b
367 .llong 78b,178b
368 .llong 79b,179b
369 .llong 80b,180b
370 .llong 34b,134b
Mark Nelsonf72b7282009-02-25 13:46:24 +0000371 .llong 94b,194b
372 .llong 95b,195b
373 .llong 96b,196b
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000374 .llong 35b,135b
375 .llong 81b,181b
376 .llong 36b,136b
377 .llong 82b,182b
378 .llong 37b,137b
379 .llong 83b,183b
380 .llong 38b,138b
381 .llong 39b,139b
382 .llong 84b,184b
383 .llong 85b,185b
384 .llong 40b,140b
385 .llong 86b,186b
386 .llong 41b,141b
387 .llong 87b,187b
388 .llong 42b,142b
389 .llong 88b,188b
390 .llong 43b,143b
391 .llong 89b,189b
392 .llong 90b,190b
393 .llong 91b,191b
394 .llong 92b,192b
395
396 .text
397
398/*
399 * Routine to copy a whole page of data, optimized for POWER4.
400 * On POWER4 it is more than 50% faster than the simple loop
401 * above (following the .Ldst_aligned label) but it runs slightly
402 * slower on POWER3.
403 */
Benjamin Herrenschmidt3c726f82005-11-07 11:06:55 +1100404.Lcopy_page_4K:
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000405 std r31,-32(1)
406 std r30,-40(1)
407 std r29,-48(1)
408 std r28,-56(1)
409 std r27,-64(1)
410 std r26,-72(1)
411 std r25,-80(1)
412 std r24,-88(1)
413 std r23,-96(1)
414 std r22,-104(1)
415 std r21,-112(1)
416 std r20,-120(1)
417 li r5,4096/32 - 1
418 addi r3,r3,-8
419 li r0,5
4200: addi r5,r5,-24
421 mtctr r0
42220: ld r22,640(4)
42321: ld r21,512(4)
42422: ld r20,384(4)
42523: ld r11,256(4)
42624: ld r9,128(4)
42725: ld r7,0(4)
42826: ld r25,648(4)
42927: ld r24,520(4)
43028: ld r23,392(4)
43129: ld r10,264(4)
43230: ld r8,136(4)
43331: ldu r6,8(4)
434 cmpwi r5,24
4351:
43632: std r22,648(3)
43733: std r21,520(3)
43834: std r20,392(3)
43935: std r11,264(3)
44036: std r9,136(3)
44137: std r7,8(3)
44238: ld r28,648(4)
44339: ld r27,520(4)
44440: ld r26,392(4)
44541: ld r31,264(4)
44642: ld r30,136(4)
44743: ld r29,8(4)
44844: std r25,656(3)
44945: std r24,528(3)
45046: std r23,400(3)
45147: std r10,272(3)
45248: std r8,144(3)
45349: std r6,16(3)
45450: ld r22,656(4)
45551: ld r21,528(4)
45652: ld r20,400(4)
45753: ld r11,272(4)
45854: ld r9,144(4)
45955: ld r7,16(4)
46056: std r28,664(3)
46157: std r27,536(3)
46258: std r26,408(3)
46359: std r31,280(3)
46460: std r30,152(3)
46561: stdu r29,24(3)
46662: ld r25,664(4)
46763: ld r24,536(4)
46864: ld r23,408(4)
46965: ld r10,280(4)
47066: ld r8,152(4)
47167: ldu r6,24(4)
472 bdnz 1b
47368: std r22,648(3)
47469: std r21,520(3)
47570: std r20,392(3)
47671: std r11,264(3)
47772: std r9,136(3)
47873: std r7,8(3)
47974: addi r4,r4,640
48075: addi r3,r3,648
481 bge 0b
482 mtctr r5
48376: ld r7,0(4)
48477: ld r8,8(4)
48578: ldu r9,16(4)
4863:
48779: ld r10,8(4)
48880: std r7,8(3)
48981: ld r7,16(4)
49082: std r8,16(3)
49183: ld r8,24(4)
49284: std r9,24(3)
49385: ldu r9,32(4)
49486: stdu r10,32(3)
495 bdnz 3b
4964:
49787: ld r10,8(4)
49888: std r7,8(3)
49989: std r8,16(3)
50090: std r9,24(3)
50191: std r10,32(3)
5029: ld r20,-120(1)
503 ld r21,-112(1)
504 ld r22,-104(1)
505 ld r23,-96(1)
506 ld r24,-88(1)
507 ld r25,-80(1)
508 ld r26,-72(1)
509 ld r27,-64(1)
510 ld r28,-56(1)
511 ld r29,-48(1)
512 ld r30,-40(1)
513 ld r31,-32(1)
514 li r3,0
515 blr
516
517/*
518 * on an exception, reset to the beginning and jump back into the
519 * standard __copy_tofrom_user
520 */
521100: ld r20,-120(1)
522 ld r21,-112(1)
523 ld r22,-104(1)
524 ld r23,-96(1)
525 ld r24,-88(1)
526 ld r25,-80(1)
527 ld r26,-72(1)
528 ld r27,-64(1)
529 ld r28,-56(1)
530 ld r29,-48(1)
531 ld r30,-40(1)
532 ld r31,-32(1)
533 ld r3,-24(r1)
534 ld r4,-16(r1)
535 li r5,4096
536 b .Ldst_aligned
537
538 .section __ex_table,"a"
539 .align 3
540 .llong 20b,100b
541 .llong 21b,100b
542 .llong 22b,100b
543 .llong 23b,100b
544 .llong 24b,100b
545 .llong 25b,100b
546 .llong 26b,100b
547 .llong 27b,100b
548 .llong 28b,100b
549 .llong 29b,100b
550 .llong 30b,100b
551 .llong 31b,100b
552 .llong 32b,100b
553 .llong 33b,100b
554 .llong 34b,100b
555 .llong 35b,100b
556 .llong 36b,100b
557 .llong 37b,100b
558 .llong 38b,100b
559 .llong 39b,100b
560 .llong 40b,100b
561 .llong 41b,100b
562 .llong 42b,100b
563 .llong 43b,100b
564 .llong 44b,100b
565 .llong 45b,100b
566 .llong 46b,100b
567 .llong 47b,100b
568 .llong 48b,100b
569 .llong 49b,100b
570 .llong 50b,100b
571 .llong 51b,100b
572 .llong 52b,100b
573 .llong 53b,100b
574 .llong 54b,100b
575 .llong 55b,100b
576 .llong 56b,100b
577 .llong 57b,100b
578 .llong 58b,100b
579 .llong 59b,100b
580 .llong 60b,100b
581 .llong 61b,100b
582 .llong 62b,100b
583 .llong 63b,100b
584 .llong 64b,100b
585 .llong 65b,100b
586 .llong 66b,100b
587 .llong 67b,100b
588 .llong 68b,100b
589 .llong 69b,100b
590 .llong 70b,100b
591 .llong 71b,100b
592 .llong 72b,100b
593 .llong 73b,100b
594 .llong 74b,100b
595 .llong 75b,100b
596 .llong 76b,100b
597 .llong 77b,100b
598 .llong 78b,100b
599 .llong 79b,100b
600 .llong 80b,100b
601 .llong 81b,100b
602 .llong 82b,100b
603 .llong 83b,100b
604 .llong 84b,100b
605 .llong 85b,100b
606 .llong 86b,100b
607 .llong 87b,100b
608 .llong 88b,100b
609 .llong 89b,100b
610 .llong 90b,100b
611 .llong 91b,100b