| /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
| |
| /* Don't use streaming store because it's better when the target |
| ends up in cache. */ |
| |
| /* Could vary the prefetch distance based on SMP/UP */ |
| |
| .globl copy_page |
| .p2align 4 |
| copy_page: |
| subq $3*8,%rsp |
| movq %rbx,(%rsp) |
| movq %r12,1*8(%rsp) |
| movq %r13,2*8(%rsp) |
| |
| movl $(4096/64)-5,%ecx |
| .p2align 4 |
| .Loop64: |
| dec %rcx |
| |
| movq (%rsi), %rax |
| movq 8 (%rsi), %rbx |
| movq 16 (%rsi), %rdx |
| movq 24 (%rsi), %r8 |
| movq 32 (%rsi), %r9 |
| movq 40 (%rsi), %r10 |
| movq 48 (%rsi), %r11 |
| movq 56 (%rsi), %r12 |
| |
| prefetcht0 5*64(%rsi) |
| |
| movq %rax, (%rdi) |
| movq %rbx, 8 (%rdi) |
| movq %rdx, 16 (%rdi) |
| movq %r8, 24 (%rdi) |
| movq %r9, 32 (%rdi) |
| movq %r10, 40 (%rdi) |
| movq %r11, 48 (%rdi) |
| movq %r12, 56 (%rdi) |
| |
| leaq 64 (%rsi), %rsi |
| leaq 64 (%rdi), %rdi |
| |
| jnz .Loop64 |
| |
| movl $5,%ecx |
| .p2align 4 |
| .Loop2: |
| decl %ecx |
| |
| movq (%rsi), %rax |
| movq 8 (%rsi), %rbx |
| movq 16 (%rsi), %rdx |
| movq 24 (%rsi), %r8 |
| movq 32 (%rsi), %r9 |
| movq 40 (%rsi), %r10 |
| movq 48 (%rsi), %r11 |
| movq 56 (%rsi), %r12 |
| |
| movq %rax, (%rdi) |
| movq %rbx, 8 (%rdi) |
| movq %rdx, 16 (%rdi) |
| movq %r8, 24 (%rdi) |
| movq %r9, 32 (%rdi) |
| movq %r10, 40 (%rdi) |
| movq %r11, 48 (%rdi) |
| movq %r12, 56 (%rdi) |
| |
| leaq 64(%rdi),%rdi |
| leaq 64(%rsi),%rsi |
| |
| jnz .Loop2 |
| |
| movq (%rsp),%rbx |
| movq 1*8(%rsp),%r12 |
| movq 2*8(%rsp),%r13 |
| addq $3*8,%rsp |
| ret |
| |
| /* C stepping K8 run faster using the string copy instructions. |
| It is also a lot simpler. Use this when possible */ |
| |
| #include <asm/cpufeature.h> |
| |
| .section .altinstructions,"a" |
| .align 8 |
| .quad copy_page |
| .quad copy_page_c |
| .byte X86_FEATURE_K8_C |
| .byte copy_page_c_end-copy_page_c |
| .byte copy_page_c_end-copy_page_c |
| .previous |
| |
| .section .altinstr_replacement,"ax" |
| copy_page_c: |
| movl $4096/8,%ecx |
| rep |
| movsq |
| ret |
| copy_page_c_end: |
| .previous |