blob: f9ede7c6606e54d36f4b54fddc5b3101085ee5ef [file] [log] [blame]
Anton Blancharda66086b2011-12-07 20:11:45 +00001/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2011
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/ppc_asm.h>
21
Anton Blancharda66086b2011-12-07 20:11:45 +000022 .macro err1
23100:
24 .section __ex_table,"a"
25 .align 3
26 .llong 100b,.Ldo_err1
27 .previous
28 .endm
29
30 .macro err2
31200:
32 .section __ex_table,"a"
33 .align 3
34 .llong 200b,.Ldo_err2
35 .previous
36 .endm
37
38#ifdef CONFIG_ALTIVEC
39 .macro err3
40300:
41 .section __ex_table,"a"
42 .align 3
43 .llong 300b,.Ldo_err3
44 .previous
45 .endm
46
47 .macro err4
48400:
49 .section __ex_table,"a"
50 .align 3
51 .llong 400b,.Ldo_err4
52 .previous
53 .endm
54
55
56.Ldo_err4:
Michael Neulingc75df6f2012-06-25 13:33:10 +000057 ld r16,STK_REG(R16)(r1)
58 ld r15,STK_REG(R15)(r1)
59 ld r14,STK_REG(R14)(r1)
Anton Blancharda66086b2011-12-07 20:11:45 +000060.Ldo_err3:
Anton Blanchard6f7839e2012-05-29 19:31:24 +000061 bl .exit_vmx_usercopy
Anton Blancharda66086b2011-12-07 20:11:45 +000062 ld r0,STACKFRAMESIZE+16(r1)
63 mtlr r0
64 b .Lexit
65#endif /* CONFIG_ALTIVEC */
66
67.Ldo_err2:
Michael Neulingc75df6f2012-06-25 13:33:10 +000068 ld r22,STK_REG(R22)(r1)
69 ld r21,STK_REG(R21)(r1)
70 ld r20,STK_REG(R20)(r1)
71 ld r19,STK_REG(R19)(r1)
72 ld r18,STK_REG(R18)(r1)
73 ld r17,STK_REG(R17)(r1)
74 ld r16,STK_REG(R16)(r1)
75 ld r15,STK_REG(R15)(r1)
76 ld r14,STK_REG(R14)(r1)
Anton Blancharda66086b2011-12-07 20:11:45 +000077.Lexit:
78 addi r1,r1,STACKFRAMESIZE
79.Ldo_err1:
80 ld r3,48(r1)
81 ld r4,56(r1)
82 ld r5,64(r1)
83 b __copy_tofrom_user_base
84
85
86_GLOBAL(__copy_tofrom_user_power7)
87#ifdef CONFIG_ALTIVEC
88 cmpldi r5,16
89 cmpldi cr1,r5,4096
90
91 std r3,48(r1)
92 std r4,56(r1)
93 std r5,64(r1)
94
95 blt .Lshort_copy
96 bgt cr1,.Lvmx_copy
97#else
98 cmpldi r5,16
99
100 std r3,48(r1)
101 std r4,56(r1)
102 std r5,64(r1)
103
104 blt .Lshort_copy
105#endif
106
107.Lnonvmx_copy:
108 /* Get the source 8B aligned */
109 neg r6,r4
110 mtocrf 0x01,r6
111 clrldi r6,r6,(64-3)
112
113 bf cr7*4+3,1f
114err1; lbz r0,0(r4)
115 addi r4,r4,1
116err1; stb r0,0(r3)
117 addi r3,r3,1
118
1191: bf cr7*4+2,2f
120err1; lhz r0,0(r4)
121 addi r4,r4,2
122err1; sth r0,0(r3)
123 addi r3,r3,2
124
1252: bf cr7*4+1,3f
126err1; lwz r0,0(r4)
127 addi r4,r4,4
128err1; stw r0,0(r3)
129 addi r3,r3,4
130
1313: sub r5,r5,r6
132 cmpldi r5,128
133 blt 5f
134
135 mflr r0
136 stdu r1,-STACKFRAMESIZE(r1)
Michael Neulingc75df6f2012-06-25 13:33:10 +0000137 std r14,STK_REG(R14)(r1)
138 std r15,STK_REG(R15)(r1)
139 std r16,STK_REG(R16)(r1)
140 std r17,STK_REG(R17)(r1)
141 std r18,STK_REG(R18)(r1)
142 std r19,STK_REG(R19)(r1)
143 std r20,STK_REG(R20)(r1)
144 std r21,STK_REG(R21)(r1)
145 std r22,STK_REG(R22)(r1)
Anton Blancharda66086b2011-12-07 20:11:45 +0000146 std r0,STACKFRAMESIZE+16(r1)
147
148 srdi r6,r5,7
149 mtctr r6
150
151 /* Now do cacheline (128B) sized loads and stores. */
152 .align 5
1534:
154err2; ld r0,0(r4)
155err2; ld r6,8(r4)
156err2; ld r7,16(r4)
157err2; ld r8,24(r4)
158err2; ld r9,32(r4)
159err2; ld r10,40(r4)
160err2; ld r11,48(r4)
161err2; ld r12,56(r4)
162err2; ld r14,64(r4)
163err2; ld r15,72(r4)
164err2; ld r16,80(r4)
165err2; ld r17,88(r4)
166err2; ld r18,96(r4)
167err2; ld r19,104(r4)
168err2; ld r20,112(r4)
169err2; ld r21,120(r4)
170 addi r4,r4,128
171err2; std r0,0(r3)
172err2; std r6,8(r3)
173err2; std r7,16(r3)
174err2; std r8,24(r3)
175err2; std r9,32(r3)
176err2; std r10,40(r3)
177err2; std r11,48(r3)
178err2; std r12,56(r3)
179err2; std r14,64(r3)
180err2; std r15,72(r3)
181err2; std r16,80(r3)
182err2; std r17,88(r3)
183err2; std r18,96(r3)
184err2; std r19,104(r3)
185err2; std r20,112(r3)
186err2; std r21,120(r3)
187 addi r3,r3,128
188 bdnz 4b
189
190 clrldi r5,r5,(64-7)
191
Michael Neulingc75df6f2012-06-25 13:33:10 +0000192 ld r14,STK_REG(R14)(r1)
193 ld r15,STK_REG(R15)(r1)
194 ld r16,STK_REG(R16)(r1)
195 ld r17,STK_REG(R17)(r1)
196 ld r18,STK_REG(R18)(r1)
197 ld r19,STK_REG(R19)(r1)
198 ld r20,STK_REG(R20)(r1)
199 ld r21,STK_REG(R21)(r1)
200 ld r22,STK_REG(R22)(r1)
Anton Blancharda66086b2011-12-07 20:11:45 +0000201 addi r1,r1,STACKFRAMESIZE
202
203 /* Up to 127B to go */
2045: srdi r6,r5,4
205 mtocrf 0x01,r6
206
2076: bf cr7*4+1,7f
208err1; ld r0,0(r4)
209err1; ld r6,8(r4)
210err1; ld r7,16(r4)
211err1; ld r8,24(r4)
212err1; ld r9,32(r4)
213err1; ld r10,40(r4)
214err1; ld r11,48(r4)
215err1; ld r12,56(r4)
216 addi r4,r4,64
217err1; std r0,0(r3)
218err1; std r6,8(r3)
219err1; std r7,16(r3)
220err1; std r8,24(r3)
221err1; std r9,32(r3)
222err1; std r10,40(r3)
223err1; std r11,48(r3)
224err1; std r12,56(r3)
225 addi r3,r3,64
226
227 /* Up to 63B to go */
2287: bf cr7*4+2,8f
229err1; ld r0,0(r4)
230err1; ld r6,8(r4)
231err1; ld r7,16(r4)
232err1; ld r8,24(r4)
233 addi r4,r4,32
234err1; std r0,0(r3)
235err1; std r6,8(r3)
236err1; std r7,16(r3)
237err1; std r8,24(r3)
238 addi r3,r3,32
239
240 /* Up to 31B to go */
2418: bf cr7*4+3,9f
242err1; ld r0,0(r4)
243err1; ld r6,8(r4)
244 addi r4,r4,16
245err1; std r0,0(r3)
246err1; std r6,8(r3)
247 addi r3,r3,16
248
2499: clrldi r5,r5,(64-4)
250
251 /* Up to 15B to go */
252.Lshort_copy:
253 mtocrf 0x01,r5
254 bf cr7*4+0,12f
255err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
256err1; lwz r6,4(r4)
257 addi r4,r4,8
258err1; stw r0,0(r3)
259err1; stw r6,4(r3)
260 addi r3,r3,8
261
26212: bf cr7*4+1,13f
263err1; lwz r0,0(r4)
264 addi r4,r4,4
265err1; stw r0,0(r3)
266 addi r3,r3,4
267
26813: bf cr7*4+2,14f
269err1; lhz r0,0(r4)
270 addi r4,r4,2
271err1; sth r0,0(r3)
272 addi r3,r3,2
273
27414: bf cr7*4+3,15f
275err1; lbz r0,0(r4)
276err1; stb r0,0(r3)
277
27815: li r3,0
279 blr
280
281.Lunwind_stack_nonvmx_copy:
282 addi r1,r1,STACKFRAMESIZE
283 b .Lnonvmx_copy
284
285#ifdef CONFIG_ALTIVEC
286.Lvmx_copy:
287 mflr r0
288 std r0,16(r1)
289 stdu r1,-STACKFRAMESIZE(r1)
Anton Blanchard6f7839e2012-05-29 19:31:24 +0000290 bl .enter_vmx_usercopy
Anton Blancharda66086b2011-12-07 20:11:45 +0000291 cmpwi r3,0
292 ld r0,STACKFRAMESIZE+16(r1)
293 ld r3,STACKFRAMESIZE+48(r1)
294 ld r4,STACKFRAMESIZE+56(r1)
295 ld r5,STACKFRAMESIZE+64(r1)
296 mtlr r0
297
Anton Blancharda9514dc2012-05-28 22:14:32 +0000298 /*
299 * We prefetch both the source and destination using enhanced touch
300 * instructions. We use a stream ID of 0 for the load side and
301 * 1 for the store side.
302 */
303 clrrdi r6,r4,7
304 clrrdi r9,r3,7
305 ori r9,r9,1 /* stream=1 */
306
307 srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
308 cmpldi r7,0x3FF
309 ble 1f
310 li r7,0x3FF
3111: lis r0,0x0E00 /* depth=7 */
312 sldi r7,r7,7
313 or r7,r7,r0
314 ori r10,r7,1 /* stream=1 */
315
316 lis r8,0x8000 /* GO=1 */
317 clrldi r8,r8,32
318
319.machine push
320.machine "power4"
321 dcbt r0,r6,0b01000
322 dcbt r0,r7,0b01010
323 dcbtst r0,r9,0b01000
324 dcbtst r0,r10,0b01010
325 eieio
326 dcbt r0,r8,0b01010 /* GO */
327.machine pop
328
Anton Blanchardbce4b4b2012-05-30 20:19:19 +0000329 /*
330 * We prefetch both the source and destination using enhanced touch
331 * instructions. We use a stream ID of 0 for the load side and
332 * 1 for the store side.
333 */
334 clrrdi r6,r4,7
335 clrrdi r9,r3,7
336 ori r9,r9,1 /* stream=1 */
337
338 srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
339 cmpldi cr1,r7,0x3FF
340 ble cr1,1f
341 li r7,0x3FF
3421: lis r0,0x0E00 /* depth=7 */
343 sldi r7,r7,7
344 or r7,r7,r0
345 ori r10,r7,1 /* stream=1 */
346
347 lis r8,0x8000 /* GO=1 */
348 clrldi r8,r8,32
349
350.machine push
351.machine "power4"
352 dcbt r0,r6,0b01000
353 dcbt r0,r7,0b01010
354 dcbtst r0,r9,0b01000
355 dcbtst r0,r10,0b01010
356 eieio
357 dcbt r0,r8,0b01010 /* GO */
358.machine pop
359
Anton Blancharda66086b2011-12-07 20:11:45 +0000360 beq .Lunwind_stack_nonvmx_copy
361
362 /*
363 * If source and destination are not relatively aligned we use a
364 * slower permute loop.
365 */
366 xor r6,r4,r3
367 rldicl. r6,r6,0,(64-4)
368 bne .Lvmx_unaligned_copy
369
370 /* Get the destination 16B aligned */
371 neg r6,r3
372 mtocrf 0x01,r6
373 clrldi r6,r6,(64-4)
374
375 bf cr7*4+3,1f
376err3; lbz r0,0(r4)
377 addi r4,r4,1
378err3; stb r0,0(r3)
379 addi r3,r3,1
380
3811: bf cr7*4+2,2f
382err3; lhz r0,0(r4)
383 addi r4,r4,2
384err3; sth r0,0(r3)
385 addi r3,r3,2
386
3872: bf cr7*4+1,3f
388err3; lwz r0,0(r4)
389 addi r4,r4,4
390err3; stw r0,0(r3)
391 addi r3,r3,4
392
3933: bf cr7*4+0,4f
394err3; ld r0,0(r4)
395 addi r4,r4,8
396err3; std r0,0(r3)
397 addi r3,r3,8
398
3994: sub r5,r5,r6
400
401 /* Get the desination 128B aligned */
402 neg r6,r3
403 srdi r7,r6,4
404 mtocrf 0x01,r7
405 clrldi r6,r6,(64-7)
406
407 li r9,16
408 li r10,32
409 li r11,48
410
411 bf cr7*4+3,5f
412err3; lvx vr1,r0,r4
413 addi r4,r4,16
414err3; stvx vr1,r0,r3
415 addi r3,r3,16
416
4175: bf cr7*4+2,6f
418err3; lvx vr1,r0,r4
419err3; lvx vr0,r4,r9
420 addi r4,r4,32
421err3; stvx vr1,r0,r3
422err3; stvx vr0,r3,r9
423 addi r3,r3,32
424
4256: bf cr7*4+1,7f
426err3; lvx vr3,r0,r4
427err3; lvx vr2,r4,r9
428err3; lvx vr1,r4,r10
429err3; lvx vr0,r4,r11
430 addi r4,r4,64
431err3; stvx vr3,r0,r3
432err3; stvx vr2,r3,r9
433err3; stvx vr1,r3,r10
434err3; stvx vr0,r3,r11
435 addi r3,r3,64
436
4377: sub r5,r5,r6
438 srdi r6,r5,7
439
Michael Neulingc75df6f2012-06-25 13:33:10 +0000440 std r14,STK_REG(R14)(r1)
441 std r15,STK_REG(R15)(r1)
442 std r16,STK_REG(R16)(r1)
Anton Blancharda66086b2011-12-07 20:11:45 +0000443
444 li r12,64
445 li r14,80
446 li r15,96
447 li r16,112
448
449 mtctr r6
450
451 /*
452 * Now do cacheline sized loads and stores. By this stage the
453 * cacheline stores are also cacheline aligned.
454 */
455 .align 5
4568:
457err4; lvx vr7,r0,r4
458err4; lvx vr6,r4,r9
459err4; lvx vr5,r4,r10
460err4; lvx vr4,r4,r11
461err4; lvx vr3,r4,r12
462err4; lvx vr2,r4,r14
463err4; lvx vr1,r4,r15
464err4; lvx vr0,r4,r16
465 addi r4,r4,128
466err4; stvx vr7,r0,r3
467err4; stvx vr6,r3,r9
468err4; stvx vr5,r3,r10
469err4; stvx vr4,r3,r11
470err4; stvx vr3,r3,r12
471err4; stvx vr2,r3,r14
472err4; stvx vr1,r3,r15
473err4; stvx vr0,r3,r16
474 addi r3,r3,128
475 bdnz 8b
476
Michael Neulingc75df6f2012-06-25 13:33:10 +0000477 ld r14,STK_REG(R14)(r1)
478 ld r15,STK_REG(R15)(r1)
479 ld r16,STK_REG(R16)(r1)
Anton Blancharda66086b2011-12-07 20:11:45 +0000480
481 /* Up to 127B to go */
482 clrldi r5,r5,(64-7)
483 srdi r6,r5,4
484 mtocrf 0x01,r6
485
486 bf cr7*4+1,9f
487err3; lvx vr3,r0,r4
488err3; lvx vr2,r4,r9
489err3; lvx vr1,r4,r10
490err3; lvx vr0,r4,r11
491 addi r4,r4,64
492err3; stvx vr3,r0,r3
493err3; stvx vr2,r3,r9
494err3; stvx vr1,r3,r10
495err3; stvx vr0,r3,r11
496 addi r3,r3,64
497
4989: bf cr7*4+2,10f
499err3; lvx vr1,r0,r4
500err3; lvx vr0,r4,r9
501 addi r4,r4,32
502err3; stvx vr1,r0,r3
503err3; stvx vr0,r3,r9
504 addi r3,r3,32
505
50610: bf cr7*4+3,11f
507err3; lvx vr1,r0,r4
508 addi r4,r4,16
509err3; stvx vr1,r0,r3
510 addi r3,r3,16
511
512 /* Up to 15B to go */
51311: clrldi r5,r5,(64-4)
514 mtocrf 0x01,r5
515 bf cr7*4+0,12f
516err3; ld r0,0(r4)
517 addi r4,r4,8
518err3; std r0,0(r3)
519 addi r3,r3,8
520
52112: bf cr7*4+1,13f
522err3; lwz r0,0(r4)
523 addi r4,r4,4
524err3; stw r0,0(r3)
525 addi r3,r3,4
526
52713: bf cr7*4+2,14f
528err3; lhz r0,0(r4)
529 addi r4,r4,2
530err3; sth r0,0(r3)
531 addi r3,r3,2
532
53314: bf cr7*4+3,15f
534err3; lbz r0,0(r4)
535err3; stb r0,0(r3)
536
53715: addi r1,r1,STACKFRAMESIZE
Anton Blanchard6f7839e2012-05-29 19:31:24 +0000538 b .exit_vmx_usercopy /* tail call optimise */
Anton Blancharda66086b2011-12-07 20:11:45 +0000539
540.Lvmx_unaligned_copy:
541 /* Get the destination 16B aligned */
542 neg r6,r3
543 mtocrf 0x01,r6
544 clrldi r6,r6,(64-4)
545
546 bf cr7*4+3,1f
547err3; lbz r0,0(r4)
548 addi r4,r4,1
549err3; stb r0,0(r3)
550 addi r3,r3,1
551
5521: bf cr7*4+2,2f
553err3; lhz r0,0(r4)
554 addi r4,r4,2
555err3; sth r0,0(r3)
556 addi r3,r3,2
557
5582: bf cr7*4+1,3f
559err3; lwz r0,0(r4)
560 addi r4,r4,4
561err3; stw r0,0(r3)
562 addi r3,r3,4
563
5643: bf cr7*4+0,4f
565err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
566err3; lwz r7,4(r4)
567 addi r4,r4,8
568err3; stw r0,0(r3)
569err3; stw r7,4(r3)
570 addi r3,r3,8
571
5724: sub r5,r5,r6
573
574 /* Get the desination 128B aligned */
575 neg r6,r3
576 srdi r7,r6,4
577 mtocrf 0x01,r7
578 clrldi r6,r6,(64-7)
579
580 li r9,16
581 li r10,32
582 li r11,48
583
584 lvsl vr16,0,r4 /* Setup permute control vector */
585err3; lvx vr0,0,r4
586 addi r4,r4,16
587
588 bf cr7*4+3,5f
589err3; lvx vr1,r0,r4
590 vperm vr8,vr0,vr1,vr16
591 addi r4,r4,16
592err3; stvx vr8,r0,r3
593 addi r3,r3,16
594 vor vr0,vr1,vr1
595
5965: bf cr7*4+2,6f
597err3; lvx vr1,r0,r4
598 vperm vr8,vr0,vr1,vr16
599err3; lvx vr0,r4,r9
600 vperm vr9,vr1,vr0,vr16
601 addi r4,r4,32
602err3; stvx vr8,r0,r3
603err3; stvx vr9,r3,r9
604 addi r3,r3,32
605
6066: bf cr7*4+1,7f
607err3; lvx vr3,r0,r4
608 vperm vr8,vr0,vr3,vr16
609err3; lvx vr2,r4,r9
610 vperm vr9,vr3,vr2,vr16
611err3; lvx vr1,r4,r10
612 vperm vr10,vr2,vr1,vr16
613err3; lvx vr0,r4,r11
614 vperm vr11,vr1,vr0,vr16
615 addi r4,r4,64
616err3; stvx vr8,r0,r3
617err3; stvx vr9,r3,r9
618err3; stvx vr10,r3,r10
619err3; stvx vr11,r3,r11
620 addi r3,r3,64
621
6227: sub r5,r5,r6
623 srdi r6,r5,7
624
Michael Neulingc75df6f2012-06-25 13:33:10 +0000625 std r14,STK_REG(R14)(r1)
626 std r15,STK_REG(R15)(r1)
627 std r16,STK_REG(R16)(r1)
Anton Blancharda66086b2011-12-07 20:11:45 +0000628
629 li r12,64
630 li r14,80
631 li r15,96
632 li r16,112
633
634 mtctr r6
635
636 /*
637 * Now do cacheline sized loads and stores. By this stage the
638 * cacheline stores are also cacheline aligned.
639 */
640 .align 5
6418:
642err4; lvx vr7,r0,r4
643 vperm vr8,vr0,vr7,vr16
644err4; lvx vr6,r4,r9
645 vperm vr9,vr7,vr6,vr16
646err4; lvx vr5,r4,r10
647 vperm vr10,vr6,vr5,vr16
648err4; lvx vr4,r4,r11
649 vperm vr11,vr5,vr4,vr16
650err4; lvx vr3,r4,r12
651 vperm vr12,vr4,vr3,vr16
652err4; lvx vr2,r4,r14
653 vperm vr13,vr3,vr2,vr16
654err4; lvx vr1,r4,r15
655 vperm vr14,vr2,vr1,vr16
656err4; lvx vr0,r4,r16
657 vperm vr15,vr1,vr0,vr16
658 addi r4,r4,128
659err4; stvx vr8,r0,r3
660err4; stvx vr9,r3,r9
661err4; stvx vr10,r3,r10
662err4; stvx vr11,r3,r11
663err4; stvx vr12,r3,r12
664err4; stvx vr13,r3,r14
665err4; stvx vr14,r3,r15
666err4; stvx vr15,r3,r16
667 addi r3,r3,128
668 bdnz 8b
669
Michael Neulingc75df6f2012-06-25 13:33:10 +0000670 ld r14,STK_REG(R14)(r1)
671 ld r15,STK_REG(R15)(r1)
672 ld r16,STK_REG(R16)(r1)
Anton Blancharda66086b2011-12-07 20:11:45 +0000673
674 /* Up to 127B to go */
675 clrldi r5,r5,(64-7)
676 srdi r6,r5,4
677 mtocrf 0x01,r6
678
679 bf cr7*4+1,9f
680err3; lvx vr3,r0,r4
681 vperm vr8,vr0,vr3,vr16
682err3; lvx vr2,r4,r9
683 vperm vr9,vr3,vr2,vr16
684err3; lvx vr1,r4,r10
685 vperm vr10,vr2,vr1,vr16
686err3; lvx vr0,r4,r11
687 vperm vr11,vr1,vr0,vr16
688 addi r4,r4,64
689err3; stvx vr8,r0,r3
690err3; stvx vr9,r3,r9
691err3; stvx vr10,r3,r10
692err3; stvx vr11,r3,r11
693 addi r3,r3,64
694
6959: bf cr7*4+2,10f
696err3; lvx vr1,r0,r4
697 vperm vr8,vr0,vr1,vr16
698err3; lvx vr0,r4,r9
699 vperm vr9,vr1,vr0,vr16
700 addi r4,r4,32
701err3; stvx vr8,r0,r3
702err3; stvx vr9,r3,r9
703 addi r3,r3,32
704
70510: bf cr7*4+3,11f
706err3; lvx vr1,r0,r4
707 vperm vr8,vr0,vr1,vr16
708 addi r4,r4,16
709err3; stvx vr8,r0,r3
710 addi r3,r3,16
711
712 /* Up to 15B to go */
71311: clrldi r5,r5,(64-4)
714 addi r4,r4,-16 /* Unwind the +16 load offset */
715 mtocrf 0x01,r5
716 bf cr7*4+0,12f
717err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
718err3; lwz r6,4(r4)
719 addi r4,r4,8
720err3; stw r0,0(r3)
721err3; stw r6,4(r3)
722 addi r3,r3,8
723
72412: bf cr7*4+1,13f
725err3; lwz r0,0(r4)
726 addi r4,r4,4
727err3; stw r0,0(r3)
728 addi r3,r3,4
729
73013: bf cr7*4+2,14f
731err3; lhz r0,0(r4)
732 addi r4,r4,2
733err3; sth r0,0(r3)
734 addi r3,r3,2
735
73614: bf cr7*4+3,15f
737err3; lbz r0,0(r4)
738err3; stb r0,0(r3)
739
74015: addi r1,r1,STACKFRAMESIZE
Anton Blanchard6f7839e2012-05-29 19:31:24 +0000741 b .exit_vmx_usercopy /* tail call optimise */
Anton Blancharda66086b2011-12-07 20:11:45 +0000742#endif /* CONFiG_ALTIVEC */