| /* |
| * Copyright 2002,2003 Andi Kleen, SuSE Labs. |
| * |
| * This file is subject to the terms and conditions of the GNU General Public |
| * License. See the file COPYING in the main directory of this archive |
| * for more details. No warranty for anything given at all. |
| */ |
| #include <linux/linkage.h> |
| #include <asm/dwarf2.h> |
| #include <asm/errno.h> |
| |
| /* |
| * Checksum copy with exception handling. |
| * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
| * destination is zeroed. |
| * |
| * Input |
| * rdi source |
| * rsi destination |
| * edx len (32bit) |
| * ecx sum (32bit) |
| * r8 src_err_ptr (int) |
| * r9 dst_err_ptr (int) |
| * |
| * Output |
| * eax 64bit sum. undefined in case of exception. |
| * |
| * Wrappers need to take care of valid exception sum and zeroing. |
| * They also should align source or destination to 8 bytes. |
| */ |
| |
| .macro source |
| 10: |
| .section __ex_table,"a" |
| .align 8 |
| .quad 10b,.Lbad_source |
| .previous |
| .endm |
| |
| .macro dest |
| 20: |
| .section __ex_table,"a" |
| .align 8 |
| .quad 20b,.Lbad_dest |
| .previous |
| .endm |
| |
| .macro ignore L=.Lignore |
| 30: |
| .section __ex_table,"a" |
| .align 8 |
| .quad 30b,\L |
| .previous |
| .endm |
| |
| |
| ENTRY(csum_partial_copy_generic) |
| CFI_STARTPROC |
| cmpl $3*64,%edx |
| jle .Lignore |
| |
| .Lignore: |
| subq $7*8,%rsp |
| CFI_ADJUST_CFA_OFFSET 7*8 |
| movq %rbx,2*8(%rsp) |
| CFI_REL_OFFSET rbx, 2*8 |
| movq %r12,3*8(%rsp) |
| CFI_REL_OFFSET r12, 3*8 |
| movq %r14,4*8(%rsp) |
| CFI_REL_OFFSET r14, 4*8 |
| movq %r13,5*8(%rsp) |
| CFI_REL_OFFSET r13, 5*8 |
| movq %rbp,6*8(%rsp) |
| CFI_REL_OFFSET rbp, 6*8 |
| |
| movq %r8,(%rsp) |
| movq %r9,1*8(%rsp) |
| |
| movl %ecx,%eax |
| movl %edx,%ecx |
| |
| xorl %r9d,%r9d |
| movq %rcx,%r12 |
| |
| shrq $6,%r12 |
| jz .Lhandle_tail /* < 64 */ |
| |
| clc |
| |
| /* main loop. clear in 64 byte blocks */ |
| /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
| /* r11: temp3, rdx: temp4, r12 loopcnt */ |
| /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ |
| .p2align 4 |
| .Lloop: |
| source |
| movq (%rdi),%rbx |
| source |
| movq 8(%rdi),%r8 |
| source |
| movq 16(%rdi),%r11 |
| source |
| movq 24(%rdi),%rdx |
| |
| source |
| movq 32(%rdi),%r10 |
| source |
| movq 40(%rdi),%rbp |
| source |
| movq 48(%rdi),%r14 |
| source |
| movq 56(%rdi),%r13 |
| |
| ignore 2f |
| prefetcht0 5*64(%rdi) |
| 2: |
| adcq %rbx,%rax |
| adcq %r8,%rax |
| adcq %r11,%rax |
| adcq %rdx,%rax |
| adcq %r10,%rax |
| adcq %rbp,%rax |
| adcq %r14,%rax |
| adcq %r13,%rax |
| |
| decl %r12d |
| |
| dest |
| movq %rbx,(%rsi) |
| dest |
| movq %r8,8(%rsi) |
| dest |
| movq %r11,16(%rsi) |
| dest |
| movq %rdx,24(%rsi) |
| |
| dest |
| movq %r10,32(%rsi) |
| dest |
| movq %rbp,40(%rsi) |
| dest |
| movq %r14,48(%rsi) |
| dest |
| movq %r13,56(%rsi) |
| |
| 3: |
| |
| leaq 64(%rdi),%rdi |
| leaq 64(%rsi),%rsi |
| |
| jnz .Lloop |
| |
| adcq %r9,%rax |
| |
| /* do last upto 56 bytes */ |
| .Lhandle_tail: |
| /* ecx: count */ |
| movl %ecx,%r10d |
| andl $63,%ecx |
| shrl $3,%ecx |
| jz .Lfold |
| clc |
| .p2align 4 |
| .Lloop_8: |
| source |
| movq (%rdi),%rbx |
| adcq %rbx,%rax |
| decl %ecx |
| dest |
| movq %rbx,(%rsi) |
| leaq 8(%rsi),%rsi /* preserve carry */ |
| leaq 8(%rdi),%rdi |
| jnz .Lloop_8 |
| adcq %r9,%rax /* add in carry */ |
| |
| .Lfold: |
| /* reduce checksum to 32bits */ |
| movl %eax,%ebx |
| shrq $32,%rax |
| addl %ebx,%eax |
| adcl %r9d,%eax |
| |
| /* do last upto 6 bytes */ |
| .Lhandle_7: |
| movl %r10d,%ecx |
| andl $7,%ecx |
| shrl $1,%ecx |
| jz .Lhandle_1 |
| movl $2,%edx |
| xorl %ebx,%ebx |
| clc |
| .p2align 4 |
| .Lloop_1: |
| source |
| movw (%rdi),%bx |
| adcl %ebx,%eax |
| decl %ecx |
| dest |
| movw %bx,(%rsi) |
| leaq 2(%rdi),%rdi |
| leaq 2(%rsi),%rsi |
| jnz .Lloop_1 |
| adcl %r9d,%eax /* add in carry */ |
| |
| /* handle last odd byte */ |
| .Lhandle_1: |
| testl $1,%r10d |
| jz .Lende |
| xorl %ebx,%ebx |
| source |
| movb (%rdi),%bl |
| dest |
| movb %bl,(%rsi) |
| addl %ebx,%eax |
| adcl %r9d,%eax /* carry */ |
| |
| CFI_REMEMBER_STATE |
| .Lende: |
| movq 2*8(%rsp),%rbx |
| CFI_RESTORE rbx |
| movq 3*8(%rsp),%r12 |
| CFI_RESTORE r12 |
| movq 4*8(%rsp),%r14 |
| CFI_RESTORE r14 |
| movq 5*8(%rsp),%r13 |
| CFI_RESTORE r13 |
| movq 6*8(%rsp),%rbp |
| CFI_RESTORE rbp |
| addq $7*8,%rsp |
| CFI_ADJUST_CFA_OFFSET -7*8 |
| ret |
| CFI_RESTORE_STATE |
| |
| /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
| .Lbad_source: |
| movq (%rsp),%rax |
| testq %rax,%rax |
| jz .Lende |
| movl $-EFAULT,(%rax) |
| jmp .Lende |
| |
| .Lbad_dest: |
| movq 8(%rsp),%rax |
| testq %rax,%rax |
| jz .Lende |
| movl $-EFAULT,(%rax) |
| jmp .Lende |
| CFI_ENDPROC |
| ENDPROC(csum_partial_copy_generic) |